+#endif
+#include <ctype.h>
+
+#include "index.h"
+#include <zebra_xpath.h>
+
+#include <charmap.h>
+#include <rset.h>
+
+
+static const struct key_control it_ctrl={
+ sizeof(struct it_key),
+ key_compare_it,
+ key_logdump_txt, /* FIXME - clean up these functions */
+ key_get_seq,
+};
+
+const struct key_control *key_it_ctrl=&it_ctrl;
+
+struct rpn_char_map_info {
+ ZebraMaps zm;
+ int reg_type;
+};
+
+typedef struct {
+ int type;
+ int major;
+ int minor;
+ Z_AttributesPlusTerm *zapt;
+} AttrType;
+
+
+static const char **rpn_char_map_handler (void *vp, const char **from, int len)
+{
+ struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
+ const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
+#if 0
+ if (out && *out)
+ {
+ const char *outp = *out;
+ yaz_log (LOG_LOG, "---");
+ while (*outp)
+ {
+ yaz_log (LOG_LOG, "%02X", *outp);
+ outp++;
+ }
+ }
+#endif
+ return out;
+}
+
+static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
+ struct rpn_char_map_info *map_info)
+{
+ map_info->zm = reg->zebra_maps;
+ map_info->reg_type = reg_type;
+ dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
+}
+
+static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
+ const char **string_value)
+{
+ int num_attributes;
+
+ num_attributes = src->zapt->attributes->num_attributes;
+ while (src->major < num_attributes)
+ {
+ Z_AttributeElement *element;
+
+ element = src->zapt->attributes->attributes[src->major];
+ if (src->type == *element->attributeType)
+ {
+ switch (element->which)
+ {
+ case Z_AttributeValue_numeric:
+ ++(src->major);
+ if (element->attributeSet && attributeSetP)
+ {
+ oident *attrset;
+
+ attrset = oid_getentbyoid (element->attributeSet);
+ *attributeSetP = attrset->value;
+ }
+ return *element->value.numeric;
+ break;
+ case Z_AttributeValue_complex:
+ if (src->minor >= element->value.complex->num_list)
+ break;
+ if (element->attributeSet && attributeSetP)
+ {
+ oident *attrset;
+
+ attrset = oid_getentbyoid (element->attributeSet);
+ *attributeSetP = attrset->value;
+ }
+ if (element->value.complex->list[src->minor]->which ==
+ Z_StringOrNumeric_numeric)
+ {
+ ++(src->minor);
+ return
+ *element->value.complex->list[src->minor-1]->u.numeric;
+ }
+ else if (element->value.complex->list[src->minor]->which ==
+ Z_StringOrNumeric_string)
+ {
+ if (!string_value)
+ break;
+ ++(src->minor);
+ *string_value =
+ element->value.complex->list[src->minor-1]->u.string;
+ return -2;
+ }
+ else
+ break;
+ default:
+ assert (0);
+ }
+ }
+ ++(src->major);
+ }
+ return -1;
+}
+
+static int attr_find (AttrType *src, oid_value *attributeSetP)
+{
+ return attr_find_ex (src, attributeSetP, 0);
+}
+
+static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
+ int type)
+{
+ src->zapt = zapt;
+ src->type = type;
+ src->major = 0;
+ src->minor = 0;
+}
+
+#define TERM_COUNT
+
+struct grep_info {
+#ifdef TERM_COUNT
+ int *term_no;
+#endif
+ ISAMC_P *isam_p_buf;
+ int isam_p_size;
+ int isam_p_indx;
+ ZebraHandle zh;
+ int reg_type;
+ ZebraSet termset;
+};
+
+static void term_untrans (ZebraHandle zh, int reg_type,
+ char *dst, const char *src)
+{
+ int len = 0;
+ while (*src)
+ {
+ const char *cp = zebra_maps_output (zh->reg->zebra_maps,
+ reg_type, &src);
+ if (!cp && len < IT_MAX_WORD-1)
+ dst[len++] = *src++;
+ else
+ while (*cp && len < IT_MAX_WORD-1)
+ dst[len++] = *cp++;
+ }
+ dst[len] = '\0';
+}
+
+static void add_isam_p (const char *name, const char *info,
+ struct grep_info *p)
+{
+ if (p->isam_p_indx == p->isam_p_size)
+ {
+ ISAMC_P *new_isam_p_buf;
+#ifdef TERM_COUNT
+ int *new_term_no;
+#endif
+ p->isam_p_size = 2*p->isam_p_size + 100;
+ new_isam_p_buf = (ISAMC_P *) xmalloc (sizeof(*new_isam_p_buf) *
+ p->isam_p_size);
+ if (p->isam_p_buf)
+ {
+ memcpy (new_isam_p_buf, p->isam_p_buf,
+ p->isam_p_indx * sizeof(*p->isam_p_buf));
+ xfree (p->isam_p_buf);
+ }
+ p->isam_p_buf = new_isam_p_buf;
+
+#ifdef TERM_COUNT
+ new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
+ p->isam_p_size);
+ if (p->term_no)
+ {
+ memcpy (new_term_no, p->isam_p_buf,
+ p->isam_p_indx * sizeof(*p->term_no));
+ xfree (p->term_no);
+ }
+ p->term_no = new_term_no;
+#endif
+ }
+ assert (*info == sizeof(*p->isam_p_buf));
+ memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
+
+#if 1
+ if (p->termset)
+ {
+ const char *db;
+ int set, use;
+ char term_tmp[IT_MAX_WORD];
+ int su_code = 0;
+ int len = key_SU_decode (&su_code, name);
+
+ term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
+ logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
+ zebraExplain_lookup_ord (p->zh->reg->zei,
+ su_code, &db, &set, &use);
+ logf (LOG_LOG, "grep: set=%d use=%d db=%s", set, use, db);
+
+ resultSetAddTerm (p->zh, p->termset, name[len], db,
+ set, use, term_tmp);
+ }
+#endif
+ (p->isam_p_indx)++;
+}
+
+static int grep_handle (char *name, const char *info, void *p)
+{
+ add_isam_p (name, info, (struct grep_info *) p);
+ return 0;
+}
+
+static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
+ const char *ct1, const char *ct2)
+{
+ const char *s1, *s0 = *src;
+ const char **map;
+
+ /* skip white space */
+ while (*s0)
+ {
+ if (ct1 && strchr (ct1, *s0))
+ break;
+ if (ct2 && strchr (ct2, *s0))
+ break;
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1));
+ if (**map != *CHR_SPACE)
+ break;
+ s0 = s1;
+ }
+ *src = s0;
+ return *s0;
+}
+
+#define REGEX_CHARS " []()|.*+?!"
+
+/* term_100: handle term, where trunc=none (no operators at all) */
+static int term_100 (ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ const char *space_start = 0;
+ const char *space_end = 0;
+
+ if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+ if (space_split)
+ {
+ if (**map == *CHR_SPACE)
+ break;
+ }
+ else /* complete subfield only. */
+ {
+ if (**map == *CHR_SPACE)
+ { /* save space mapping for later .. */
+ space_start = s1;
+ space_end = s0;
+ continue;
+ }
+ else if (space_start)
+ { /* reload last space */
+ while (space_start < space_end)
+ {
+ if (strchr (REGEX_CHARS, *space_start))
+ dst[i++] = '\\';
+ dst_term[j++] = *space_start;
+ dst[i++] = *space_start++;
+ }
+ /* and reset */
+ space_start = space_end = 0;
+ }
+ }
+ /* add non-space char */
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_101: handle term, where trunc=Process # */
+static int term_101 (ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '#')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else
+ {
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+ if (space_split && **map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_103: handle term, where trunc=re-2 (regular expressions) */
+static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
+ char *dst, int *errors, int space_split,
+ char *dst_term)
+{
+ int i = 0;
+ int j = 0;
+ const char *s0, *s1;
+ const char **map;
+
+ if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "("))
+ return 0;
+ s0 = *src;
+ if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
+ isdigit (s0[1]))
+ {
+ *errors = s0[1] - '0';
+ s0 += 3;
+ if (*errors > 3)
+ *errors = 3;
+ }
+ while (*s0)
+ {
+ if (strchr ("^\\()[].*+?|-", *s0))
+ {
+ dst_term[j++] = *s0;
+ dst[i++] = *s0++;
+ }
+ else
+ {
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+ if (**map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_103: handle term, where trunc=re-1 (regular expressions) */
+static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
+ char *dst, int space_split, char *dst_term)
+{
+ return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
+ dst_term);
+}
+
+
+/* term_104: handle term, where trunc=Process # and ! */
+static int term_104 (ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#"))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '?')
+ {
+ dst_term[j++] = *s0++;
+ if (*s0 >= '0' && *s0 <= '9')
+ {
+ int limit = 0;
+ while (*s0 >= '0' && *s0 <= '9')
+ {
+ limit = limit * 10 + (*s0 - '0');
+ dst_term[j++] = *s0++;
+ }
+ if (limit > 20)
+ limit = 20;
+ while (--limit >= 0)
+ {
+ dst[i++] = '.';
+ dst[i++] = '?';
+ }
+ }
+ else
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ }
+ }
+ else if (*s0 == '*')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else if (*s0 == '#')
+ {
+ dst[i++] = '.';
+ dst_term[j++] = *s0++;
+ }
+ {
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+ if (space_split && **map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_105/106: handle term, where trunc=Process * and ! and right trunc */
+static int term_105 (ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term, int right_truncate)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre (zebra_maps, reg_type, src, "*!", "*!"))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '*')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else if (*s0 == '!')
+ {
+ dst[i++] = '.';
+ dst_term[j++] = *s0++;
+ }
+ {
+ s1 = s0;
+ map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+ if (space_split && **map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ if (right_truncate)
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ }
+ dst[i] = '\0';
+
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+
+/* gen_regular_rel - generate regular expression from relation
+ * val: border value (inclusive)
+ * islt: 1 if <=; 0 if >=.
+ */
+static void gen_regular_rel (char *dst, int val, int islt)
+{
+ int dst_p;
+ int w, d, i;
+ int pos = 0;
+ char numstr[20];
+
+ logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
+ if (val >= 0)
+ {
+ if (islt)
+ strcpy (dst, "(-[0-9]+|(");
+ else
+ strcpy (dst, "((");
+ }
+ else
+ {
+ if (!islt)
+ {
+ strcpy (dst, "([0-9]+|-(");
+ dst_p = strlen (dst);
+ islt = 1;
+ }
+ else
+ {
+ strcpy (dst, "(-(");
+ islt = 0;
+ }
+ val = -val;
+ }
+ dst_p = strlen (dst);
+ sprintf (numstr, "%d", val);
+ for (w = strlen(numstr); --w >= 0; pos++)
+ {
+ d = numstr[w];
+ if (pos > 0)
+ {
+ if (islt)
+ {
+ if (d == '0')
+ continue;
+ d--;
+ }
+ else
+ {
+ if (d == '9')
+ continue;
+ d++;
+ }
+ }
+
+ strcpy (dst + dst_p, numstr);
+ dst_p = strlen(dst) - pos - 1;
+
+ if (islt)
+ {
+ if (d != '0')
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = '0';
+ dst[dst_p++] = '-';
+ dst[dst_p++] = d;
+ dst[dst_p++] = ']';
+ }
+ else
+ dst[dst_p++] = d;
+ }
+ else
+ {
+ if (d != '9')
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = d;
+ dst[dst_p++] = '-';
+ dst[dst_p++] = '9';
+ dst[dst_p++] = ']';
+ }
+ else
+ dst[dst_p++] = d;
+ }
+ for (i = 0; i<pos; i++)
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = '0';
+ dst[dst_p++] = '-';
+ dst[dst_p++] = '9';
+ dst[dst_p++] = ']';
+ }
+ dst[dst_p++] = '|';
+ }
+ dst[dst_p] = '\0';
+ if (islt)
+ {
+ /* match everything less than 10^(pos-1) */
+ strcat (dst, "0*");
+ for (i=1; i<pos; i++)
+ strcat (dst, "[0-9]?");
+ }
+ else
+ {
+ /* match everything greater than 10^pos */
+ for (i = 0; i <= pos; i++)
+ strcat (dst, "[0-9]");
+ strcat (dst, "[0-9]*");
+ }
+ strcat (dst, "))");
+}
+
+void string_rel_add_char (char **term_p, const char *src, int *indx)
+{
+ if (src[*indx] == '\\')
+ *(*term_p)++ = src[(*indx)++];
+ *(*term_p)++ = src[(*indx)++];
+}
+
+/*
+ * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
+ * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
+ * >= abc ([b-].*|a[c-].*|ab[c-].*)
+ * ([^-a].*|a[^-b].*|ab[c-].*)
+ * < abc ([-0].*|a[-a].*|ab[-b].*)
+ * ([^a-].*|a[^b-].*|ab[^c-].*)
+ * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
+ * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
+ */
+static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub, char *term_dict,
+ oid_value attributeSet,
+ int reg_type, int space_split, char *term_dst)
+{
+ AttrType relation;
+ int relation_value;
+ int i;
+ char *term_tmp = term_dict + strlen(term_dict);
+ char term_component[2*IT_MAX_WORD+20];
+
+ attr_init (&relation, zapt, 2);
+ relation_value = attr_find (&relation, NULL);
+
+ logf (LOG_DEBUG, "string relation value=%d", relation_value);
+ switch (relation_value)
+ {
+ case 1:
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ term_sub, term_component,
+ space_split, term_dst))
+ return 0;
+ logf (LOG_DEBUG, "Relation <");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i]; )
+ {
+ int j = 0;
+
+ if (i)
+ *term_tmp++ = '|';
+ while (j < i)
+ string_rel_add_char (&term_tmp, term_component, &j);
+
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ string_rel_add_char (&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 2:
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ term_sub, term_component,
+ space_split, term_dst))
+ return 0;
+ logf (LOG_DEBUG, "Relation <=");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i]; )
+ {
+ int j = 0;
+
+ while (j < i)
+ string_rel_add_char (&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ string_rel_add_char (&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ *term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ for (i = 0; term_component[i]; )
+ string_rel_add_char (&term_tmp, term_component, &i);
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 5:
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ term_sub, term_component, space_split, term_dst))
+ return 0;
+ logf (LOG_DEBUG, "Relation >");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i];)
+ {
+ int j = 0;
+
+ while (j < i)
+ string_rel_add_char (&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ *term_tmp++ = '-';
+ string_rel_add_char (&term_tmp, term_component, &i);
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ *term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ for (i = 0; term_component[i];)
+ string_rel_add_char (&term_tmp, term_component, &i);
+ *term_tmp++ = '.';
+ *term_tmp++ = '+';
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 4:
+ if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
+ term_component, space_split, term_dst))
+ return 0;
+ logf (LOG_DEBUG, "Relation >=");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i];)
+ {
+ int j = 0;
+
+ if (i)
+ *term_tmp++ = '|';
+ while (j < i)
+ string_rel_add_char (&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ if (term_component[i+1])
+ {
+ *term_tmp++ = '^';
+ *term_tmp++ = '-';
+ string_rel_add_char (&term_tmp, term_component, &i);
+ }
+ else
+ {
+ string_rel_add_char (&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+ }
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 3:
+ default:
+ logf (LOG_DEBUG, "Relation =");
+ if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
+ term_component, space_split, term_dst))
+ return 0;
+ strcat (term_tmp, "(");
+ strcat (term_tmp, term_component);
+ strcat (term_tmp, ")");
+ }
+ return 1;
+}
+
+static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst, int xpath_use);
+
+static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst,
+ const char *rank_type, int xpath_use,
+ NMEM rset_nmem)
+{
+ int r;
+ grep_info->isam_p_indx = 0;
+ r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
+ reg_type, complete_flag, num_bases, basenames,
+ term_dst, xpath_use);
+ if (r < 1)
+ return 0;
+ logf (LOG_DEBUG, "term: %s", term_dst);
+ return rset_trunc (zh, grep_info->isam_p_buf,
+ grep_info->isam_p_indx, term_dst,
+ strlen(term_dst), rank_type, 1 /* preserve pos */,
+ zapt->term->which, rset_nmem,key_it_ctrl);
+}
+
+
+static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst, int xpath_use)
+{
+ char term_dict[2*IT_MAX_WORD+4000];
+ int j, r, base_no;
+ AttrType truncation;
+ int truncation_value;
+ AttrType use;
+ int use_value;
+ const char *use_string = 0;
+ oid_value curAttributeSet = attributeSet;
+ const char *termp;
+ struct rpn_char_map_info rcmi;
+ int space_split = complete_flag ? 0 : 1;
+
+ int bases_ok = 0; /* no of databases with OK attribute */
+ int errCode = 0; /* err code (if any is not OK) */
+ char *errString = 0; /* addinfo */
+
+ rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
+ attr_init (&use, zapt, 1);
+ use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
+ logf (LOG_DEBUG, "string_term, use value %d", use_value);
+ attr_init (&truncation, zapt, 5);
+ truncation_value = attr_find (&truncation, NULL);
+ logf (LOG_DEBUG, "truncation value %d", truncation_value);
+
+ if (use_value == -1) /* no attribute - assumy "any" */
+ use_value = 1016;
+ for (base_no = 0; base_no < num_bases; base_no++)
+ {
+ attent attp;
+ data1_local_attribute id_xpath_attr;
+ data1_local_attribute *local_attr;
+ int max_pos, prefix_len = 0;
+
+ termp = *term_sub;
+
+ if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+ {
+ zh->errCode = 109; /* Database unavailable */
+ zh->errString = basenames[base_no];
+ return -1;
+ }
+ if (xpath_use > 0 && use_value == -2)
+ {
+ use_value = xpath_use;
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+ id_xpath_attr.local = use_value;
+ }
+ else if (curAttributeSet == VAL_IDXPATH)
+ {
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+ id_xpath_attr.local = use_value;
+ }
+ else
+ {
+ if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
+ use_string)))
+ {
+ logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
+ curAttributeSet, use_value, r);
+ if (r == -1)
+ {
+ /* set was found, but value wasn't defined */
+ errCode = 114;
+ if (use_string)
+ errString = nmem_strdup(stream, use_string);
+ else
+ {
+ char val_str[32];
+ sprintf (val_str, "%d", use_value);
+ errString = nmem_strdup (stream, val_str);
+ }
+ }
+ else
+ {
+ int oid[OID_SIZE];
+ struct oident oident;
+
+ oident.proto = PROTO_Z3950;
+ oident.oclass = CLASS_ATTSET;
+ oident.value = curAttributeSet;
+ oid_ent_to_oid (&oident, oid);
+
+ errCode = 121;
+ errString = nmem_strdup (stream, oident.desc);
+ }
+ continue;
+ }
+ }
+ for (local_attr = attp.local_attributes; local_attr;
+ local_attr = local_attr->next)
+ {
+ int ord;
+ char ord_buf[32];
+ int i, ord_len;
+
+ ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
+ local_attr->local);
+ if (ord < 0)
+ continue;
+ if (prefix_len)
+ term_dict[prefix_len++] = '|';
+ else
+ term_dict[prefix_len++] = '(';
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ for (i = 0; i<ord_len; i++)
+ {
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = ord_buf[i];
+ }
+ }
+ if (!prefix_len)
+ {
+#if 1
+ bases_ok++;
+#else
+ char val_str[32];
+ sprintf (val_str, "%d", use_value);
+ errCode = 114;
+ errString = nmem_strdup (stream, val_str);
+#endif
+ continue;
+ }
+ bases_ok++; /* this has OK attributes */
+
+ term_dict[prefix_len++] = ')';
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = reg_type;
+ logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
+ term_dict[prefix_len] = '\0';
+ j = prefix_len;
+ switch (truncation_value)
+ {
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!string_relation (zh, zapt, &termp, term_dict,
+ attributeSet,
+ reg_type, space_split, term_dst))
+ return 0;
+ logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len);
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
+ grep_info, &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep fail %d", r);
+ break;
+ case 1: /* right truncation */
+ term_dict[j++] = '(';
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ".*)");
+ dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ break;
+ case 2: /* keft truncation */
+ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ")");
+ dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ break;
+ case 3: /* left&right truncation */
+ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ".*)");
+ dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ break;
+ zh->errCode = 120;
+ return -1;
+ case 101: /* process # in term */
+ term_dict[j++] = '(';
+ if (!term_101 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ")");
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
+ break;
+ case 102: /* Regexp-1 */
+ term_dict[j++] = '(';
+ if (!term_102 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ")");
+ logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
+ r);
+ break;
+ case 103: /* Regexp-2 */
+ r = 1;
+ term_dict[j++] = '(';
+ if (!term_103 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, &r, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ")");
+ logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
+ r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
+ &max_pos, 2, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
+ r);
+ break;
+ case 104: /* process # and ! in term */
+ term_dict[j++] = '(';
+ if (!term_104 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ return 0;
+ strcat (term_dict, ")");
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
+ break;
+ case 105: /* process * and ! in term */
+ term_dict[j++] = '(';
+ if (!term_105 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst, 1))
+ return 0;
+ strcat (term_dict, ")");
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
+ break;
+ case 106: /* process * and ! in term */
+ term_dict[j++] = '(';
+ if (!term_105 (zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst, 0))
+ return 0;
+ strcat (term_dict, ")");
+ r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
+ &max_pos, 0, grep_handle);
+ if (r)
+ logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
+ break;
+ }
+ }
+ if (!bases_ok)
+ {
+ zh->errCode = errCode;
+ zh->errString = errString;
+ return -1;
+ }
+ *term_sub = termp;
+ logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
+ return 1;
+}
+
+
+/* convert APT search term to UTF8 */
+static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz)
+{
+ size_t sizez;
+ Z_Term *term = zapt->term;
+
+ switch (term->which)
+ {
+ case Z_Term_general:
+ if (zh->iconv_to_utf8 != 0)
+ {
+ char *inbuf = term->u.general->buf;
+ size_t inleft = term->u.general->len;
+ char *outbuf = termz;
+ size_t outleft = IT_MAX_WORD-1;
+ size_t ret;
+
+ ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
+ &outbuf, &outleft);
+ if (ret == (size_t)(-1))
+ {
+ ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
+ zh->errCode = 125;
+ return -1;
+ }
+ *outbuf = 0;
+ }
+ else
+ {
+ sizez = term->u.general->len;
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.general->buf, sizez);
+ termz[sizez] = '\0';
+ }
+ break;
+ case Z_Term_characterString:
+ sizez = strlen(term->u.characterString);
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.characterString, sizez);
+ termz[sizez] = '\0';
+ break;
+ default:
+ zh->errCode = 124;
+ return -1;
+ }
+ return 0;
+}
+
+/* convert APT SCAN term to internal cmap */
+static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz, int reg_type)
+{
+ char termz0[IT_MAX_WORD];
+
+ if (zapt_term_to_utf8(zh, zapt, termz0))
+ return -1; /* error */
+ else
+ {
+ const char **map;
+ const char *cp = (const char *) termz0;
+ const char *cp_end = cp + strlen(cp);
+ const char *src;
+ int i = 0;
+ const char *space_map = NULL;
+ int len;
+
+ while ((len = (cp_end - cp)) > 0)
+ {
+ map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
+ if (**map == *CHR_SPACE)
+ space_map = *map;
+ else
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
+ termz[i++] = *src;
+ }
+ }
+ termz[i] = '\0';
+ }
+ return 0;
+}
+
+char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char *termz, NMEM stream, unsigned reg_id)
+{
+ WRBUF wrbuf = 0;
+ AttrType truncation;
+ int truncation_value;
+ char *ex_list = 0;
+
+ attr_init (&truncation, zapt, 5);
+ truncation_value = attr_find (&truncation, NULL);
+
+ switch (truncation_value)
+ {
+ default:
+ ex_list = "";
+ break;
+ case 101:
+ ex_list = "#";
+ break;
+ case 102:
+ case 103:
+ ex_list = 0;
+ break;
+ case 104:
+ ex_list = "!#";
+ break;
+ case 105:
+ ex_list = "!*";
+ break;
+ }
+ if (ex_list)
+ wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
+ termz, strlen(termz));
+ if (!wrbuf)
+ return nmem_strdup(stream, termz);
+ else
+ {
+ char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
+ memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
+ buf[wrbuf_len(wrbuf)] = '\0';
+ return buf;
+ }
+}
+
+static void grep_info_delete (struct grep_info *grep_info)
+{
+#ifdef TERM_COUNT
+ xfree(grep_info->term_no);
+#endif
+ xfree (grep_info->isam_p_buf);
+}
+
+static int grep_info_prepare (ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ struct grep_info *grep_info,
+ int reg_type,
+ NMEM stream)
+{
+ AttrType termset;
+ int termset_value_numeric;
+ const char *termset_value_string;
+
+#ifdef TERM_COUNT
+ grep_info->term_no = 0;
+#endif
+ grep_info->isam_p_size = 0;
+ grep_info->isam_p_buf = NULL;
+ grep_info->zh = zh;
+ grep_info->reg_type = reg_type;
+ grep_info->termset = 0;
+
+ if (!zapt)
+ return 0;
+ attr_init (&termset, zapt, 8);
+ termset_value_numeric =
+ attr_find_ex (&termset, NULL, &termset_value_string);
+ if (termset_value_numeric != -1)
+ {
+ char resname[32];
+ const char *termset_name = 0;
+ if (termset_value_numeric != -2)
+ {
+
+ sprintf (resname, "%d", termset_value_numeric);
+ termset_name = resname;
+ }
+ else
+ termset_name = termset_value_string;
+ logf (LOG_LOG, "creating termset set %s", termset_name);
+ grep_info->termset = resultSetAdd (zh, termset_name, 1);
+ if (!grep_info->termset)
+ {
+ zh->errCode = 128;
+ zh->errString = nmem_strdup (stream, termset_name);
+ return -1;
+ }
+ }
+ return 0;
+}
+