1 /* $Id: zrpn.c,v 1.200 2005-06-14 20:28:54 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
51 Z_AttributesPlusTerm *zapt;
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
59 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
64 const char *outp = *out;
65 yaz_log(YLOG_LOG, "---");
68 yaz_log(YLOG_LOG, "%02X", *outp);
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77 struct rpn_char_map_info *map_info)
79 map_info->zm = reg->zebra_maps;
80 map_info->reg_type = reg_type;
81 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85 const char **string_value)
89 num_attributes = src->zapt->attributes->num_attributes;
90 while (src->major < num_attributes)
92 Z_AttributeElement *element;
94 element = src->zapt->attributes->attributes[src->major];
95 if (src->type == *element->attributeType)
97 switch (element->which)
99 case Z_AttributeValue_numeric:
101 if (element->attributeSet && attributeSetP)
105 attrset = oid_getentbyoid(element->attributeSet);
106 *attributeSetP = attrset->value;
108 return *element->value.numeric;
110 case Z_AttributeValue_complex:
111 if (src->minor >= element->value.complex->num_list)
113 if (element->attributeSet && attributeSetP)
117 attrset = oid_getentbyoid(element->attributeSet);
118 *attributeSetP = attrset->value;
120 if (element->value.complex->list[src->minor]->which ==
121 Z_StringOrNumeric_numeric)
125 *element->value.complex->list[src->minor-1]->u.numeric;
127 else if (element->value.complex->list[src->minor]->which ==
128 Z_StringOrNumeric_string)
134 element->value.complex->list[src->minor-1]->u.string;
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
150 return attr_find_ex(src, attributeSetP, 0);
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177 char *dst, const char *src)
182 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
184 if (!cp && len < IT_MAX_WORD-1)
187 while (*cp && len < IT_MAX_WORD-1)
193 static void add_isam_p(const char *name, const char *info,
198 log_level_rpn = yaz_log_module_level("rpn");
201 if (p->isam_p_indx == p->isam_p_size)
203 ISAM_P *new_isam_p_buf;
207 p->isam_p_size = 2*p->isam_p_size + 100;
208 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
212 memcpy(new_isam_p_buf, p->isam_p_buf,
213 p->isam_p_indx * sizeof(*p->isam_p_buf));
214 xfree(p->isam_p_buf);
216 p->isam_p_buf = new_isam_p_buf;
219 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
222 memcpy(new_term_no, p->isam_p_buf,
223 p->isam_p_indx * sizeof(*p->term_no));
226 p->term_no = new_term_no;
229 assert(*info == sizeof(*p->isam_p_buf));
230 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
237 char term_tmp[IT_MAX_WORD];
239 int len = key_SU_decode (&su_code, name);
241 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
242 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243 zebraExplain_lookup_ord (p->zh->reg->zei,
244 su_code, &db, &set, &use);
245 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
247 resultSetAddTerm(p->zh, p->termset, name[len], db,
254 static int grep_handle(char *name, const char *info, void *p)
256 add_isam_p(name, info, (struct grep_info *) p);
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261 const char *ct1, const char *ct2, int first)
263 const char *s1, *s0 = *src;
266 /* skip white space */
269 if (ct1 && strchr(ct1, *s0))
271 if (ct2 && strchr(ct2, *s0))
274 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275 if (**map != *CHR_SPACE)
284 static void esc_str(char *out_buf, int out_size,
285 const char *in_buf, int in_size)
291 assert(out_size > 20);
293 for (k = 0; k<in_size; k++)
295 int c = in_buf[k] & 0xff;
297 if (c < 32 || c > 126)
301 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
302 if (strlen(out_buf) > out_size-20)
304 strcat(out_buf, "..");
310 #define REGEX_CHARS " []()|.*+?!"
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314 const char **src, char *dst, int space_split,
322 const char *space_start = 0;
323 const char *space_end = 0;
325 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
332 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
336 if (**map == *CHR_SPACE)
339 else /* complete subfield only. */
341 if (**map == *CHR_SPACE)
342 { /* save space mapping for later .. */
347 else if (space_start)
348 { /* reload last space */
349 while (space_start < space_end)
351 if (strchr(REGEX_CHARS, *space_start))
353 dst_term[j++] = *space_start;
354 dst[i++] = *space_start++;
357 space_start = space_end = 0;
360 /* add non-space char */
361 memcpy(dst_term+j, s1, s0 - s1);
367 if (strchr(REGEX_CHARS, *s1))
375 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
377 strcpy(dst + i, map[0]);
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389 const char **src, char *dst, int space_split,
397 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
406 dst_term[j++] = *s0++;
412 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
414 if (space_split && **map == *CHR_SPACE)
417 /* add non-space char */
418 memcpy(dst_term+j, s1, s0 - s1);
424 if (strchr(REGEX_CHARS, *s1))
432 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
434 strcpy(dst + i, map[0]);
440 dst_term[j++] = '\0';
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447 char *dst, int *errors, int space_split,
455 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
458 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459 isdigit(((const unsigned char *)s0)[1]))
461 *errors = s0[1] - '0';
468 if (strchr("^\\()[].*+?|-", *s0))
477 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
479 if (space_split && **map == *CHR_SPACE)
482 /* add non-space char */
483 memcpy(dst_term+j, s1, s0 - s1);
489 if (strchr(REGEX_CHARS, *s1))
497 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
499 strcpy(dst + i, map[0]);
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513 char *dst, int space_split, char *dst_term)
515 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522 const char **src, char *dst, int space_split,
530 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
537 dst_term[j++] = *s0++;
538 if (*s0 >= '0' && *s0 <= '9')
541 while (*s0 >= '0' && *s0 <= '9')
543 limit = limit * 10 + (*s0 - '0');
544 dst_term[j++] = *s0++;
564 dst_term[j++] = *s0++;
569 dst_term[j++] = *s0++;
575 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
577 if (space_split && **map == *CHR_SPACE)
580 /* add non-space char */
581 memcpy(dst_term+j, s1, s0 - s1);
587 if (strchr(REGEX_CHARS, *s1))
595 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
597 strcpy(dst + i, map[0]);
603 dst_term[j++] = '\0';
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610 const char **src, char *dst, int space_split,
611 char *dst_term, int right_truncate)
618 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
627 dst_term[j++] = *s0++;
632 dst_term[j++] = *s0++;
638 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
640 if (space_split && **map == *CHR_SPACE)
643 /* add non-space char */
644 memcpy(dst_term+j, s1, s0 - s1);
650 if (strchr(REGEX_CHARS, *s1))
658 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
660 strcpy(dst + i, map[0]);
672 dst_term[j++] = '\0';
678 /* gen_regular_rel - generate regular expression from relation
679 * val: border value (inclusive)
680 * islt: 1 if <=; 0 if >=.
682 static void gen_regular_rel(char *dst, int val, int islt)
689 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
693 strcpy(dst, "(-[0-9]+|(");
701 strcpy(dst, "([0-9]+|-(");
713 sprintf(numstr, "%d", val);
714 for (w = strlen(numstr); --w >= 0; pos++)
733 strcpy(dst + dst_p, numstr);
734 dst_p = strlen(dst) - pos - 1;
762 for (i = 0; i<pos; i++)
775 /* match everything less than 10^(pos-1) */
777 for (i = 1; i<pos; i++)
778 strcat(dst, "[0-9]?");
782 /* match everything greater than 10^pos */
783 for (i = 0; i <= pos; i++)
784 strcat(dst, "[0-9]");
785 strcat(dst, "[0-9]*");
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
792 if (src[*indx] == '\\')
793 *(*term_p)++ = src[(*indx)++];
794 *(*term_p)++ = src[(*indx)++];
798 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
799 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800 * >= abc ([b-].*|a[c-].*|ab[c-].*)
801 * ([^-a].*|a[^-b].*|ab[c-].*)
802 * < abc ([-0].*|a[-a].*|ab[-b].*)
803 * ([^a-].*|a[^b-].*|ab[^c-].*)
804 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
805 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808 const char **term_sub, char *term_dict,
809 oid_value attributeSet,
810 int reg_type, int space_split, char *term_dst,
816 char *term_tmp = term_dict + strlen(term_dict);
817 char term_component[2*IT_MAX_WORD+20];
819 attr_init(&relation, zapt, 2);
820 relation_value = attr_find(&relation, NULL);
823 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824 switch (relation_value)
827 if (!term_100(zh->reg->zebra_maps, reg_type,
828 term_sub, term_component,
829 space_split, term_dst))
831 yaz_log(log_level_rpn, "Relation <");
834 for (i = 0; term_component[i]; )
841 string_rel_add_char(&term_tmp, term_component, &j);
846 string_rel_add_char(&term_tmp, term_component, &i);
853 if ((term_tmp - term_dict) > IT_MAX_WORD)
860 if (!term_100(zh->reg->zebra_maps, reg_type,
861 term_sub, term_component,
862 space_split, term_dst))
864 yaz_log(log_level_rpn, "Relation <=");
867 for (i = 0; term_component[i]; )
872 string_rel_add_char(&term_tmp, term_component, &j);
876 string_rel_add_char(&term_tmp, term_component, &i);
885 if ((term_tmp - term_dict) > IT_MAX_WORD)
888 for (i = 0; term_component[i]; )
889 string_rel_add_char(&term_tmp, term_component, &i);
894 if (!term_100 (zh->reg->zebra_maps, reg_type,
895 term_sub, term_component, space_split, term_dst))
897 yaz_log(log_level_rpn, "Relation >");
900 for (i = 0; term_component[i];)
905 string_rel_add_char(&term_tmp, term_component, &j);
910 string_rel_add_char(&term_tmp, term_component, &i);
918 if ((term_tmp - term_dict) > IT_MAX_WORD)
921 for (i = 0; term_component[i];)
922 string_rel_add_char(&term_tmp, term_component, &i);
929 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930 term_component, space_split, term_dst))
932 yaz_log(log_level_rpn, "Relation >=");
935 for (i = 0; term_component[i];)
942 string_rel_add_char(&term_tmp, term_component, &j);
945 if (term_component[i+1])
949 string_rel_add_char(&term_tmp, term_component, &i);
953 string_rel_add_char(&term_tmp, term_component, &i);
960 if ((term_tmp - term_dict) > IT_MAX_WORD)
969 yaz_log(log_level_rpn, "Relation =");
970 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971 term_component, space_split, term_dst))
973 strcat(term_tmp, "(");
974 strcat(term_tmp, term_component);
975 strcat(term_tmp, ")");
978 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985 const char **term_sub,
986 oid_value attributeSet, NMEM stream,
987 struct grep_info *grep_info,
988 int reg_type, int complete_flag,
989 int num_bases, char **basenames,
990 char *term_dst, int xpath_use,
991 struct ord_list **ol);
993 static ZEBRA_RES term_trunc(ZebraHandle zh,
994 Z_AttributesPlusTerm *zapt,
995 const char **term_sub,
996 oid_value attributeSet, NMEM stream,
997 struct grep_info *grep_info,
998 int reg_type, int complete_flag,
999 int num_bases, char **basenames,
1001 const char *rank_type, int xpath_use,
1004 struct rset_key_control *kc)
1007 struct ord_list *ol;
1009 grep_info->isam_p_indx = 0;
1010 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1011 reg_type, complete_flag, num_bases, basenames,
1012 term_dst, xpath_use, &ol);
1013 if (res != ZEBRA_OK)
1015 if (!*term_sub) /* no more terms ? */
1017 yaz_log(log_level_rpn, "term: %s", term_dst);
1018 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1019 grep_info->isam_p_indx, term_dst,
1020 strlen(term_dst), rank_type, 1 /* preserve pos */,
1021 zapt->term->which, rset_nmem,
1022 kc, kc->scope, ol, reg_type);
1028 static char *nmem_strdup_i(NMEM nmem, int v)
1031 sprintf(val_str, "%d", v);
1032 return nmem_strdup(nmem, val_str);
1035 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1036 const char **term_sub,
1037 oid_value attributeSet, NMEM stream,
1038 struct grep_info *grep_info,
1039 int reg_type, int complete_flag,
1040 int num_bases, char **basenames,
1041 char *term_dst, int xpath_use,
1042 struct ord_list **ol)
1044 char term_dict[2*IT_MAX_WORD+4000];
1046 AttrType truncation;
1047 int truncation_value;
1050 const char *use_string = 0;
1051 oid_value curAttributeSet = attributeSet;
1053 struct rpn_char_map_info rcmi;
1054 int space_split = complete_flag ? 0 : 1;
1056 int bases_ok = 0; /* no of databases with OK attribute */
1057 int errCode = 0; /* err code (if any is not OK) */
1058 char *errString = 0; /* addinfo */
1061 *ol = ord_list_create(stream);
1063 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1064 attr_init(&use, zapt, 1);
1065 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1066 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1067 attr_init(&truncation, zapt, 5);
1068 truncation_value = attr_find(&truncation, NULL);
1069 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1071 if (use_value == -1) /* no attribute - assumy "any" */
1073 for (base_no = 0; base_no < num_bases; base_no++)
1077 int regex_range = 0;
1080 data1_local_attribute id_xpath_attr;
1081 data1_local_attribute *local_attr;
1082 int max_pos, prefix_len = 0;
1087 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1089 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1090 basenames[base_no]);
1093 if (xpath_use > 0 && use_value == -2)
1095 /* xpath mode and we have a string attribute */
1096 attp.local_attributes = &id_xpath_attr;
1097 attp.attset_ordinal = VAL_IDXPATH;
1098 id_xpath_attr.next = 0;
1100 use_value = xpath_use; /* xpath_use as use-attribute now */
1101 id_xpath_attr.local = use_value;
1103 else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1105 /* X-Path attribute, use numeric value directly */
1106 attp.local_attributes = &id_xpath_attr;
1107 attp.attset_ordinal = VAL_IDXPATH;
1108 id_xpath_attr.next = 0;
1109 id_xpath_attr.local = use_value;
1111 else if (use_string &&
1112 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1115 /* we have a match for a raw string attribute */
1120 term_dict[prefix_len++] = '|';
1122 term_dict[prefix_len++] = '(';
1124 ord_len = key_SU_encode (ord, ord_buf);
1125 for (i = 0; i<ord_len; i++)
1127 term_dict[prefix_len++] = 1;
1128 term_dict[prefix_len++] = ord_buf[i];
1130 attp.local_attributes = 0; /* no more attributes */
1131 *ol = ord_list_append(stream, *ol, ord);
1135 /* lookup in the .att files . Allow string as well */
1136 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1139 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1140 curAttributeSet, use_value, r);
1143 /* set was found, but value wasn't defined */
1144 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1146 errString = nmem_strdup(stream, use_string);
1148 errString = nmem_strdup_i (stream, use_value);
1153 struct oident oident;
1155 oident.proto = PROTO_Z3950;
1156 oident.oclass = CLASS_ATTSET;
1157 oident.value = curAttributeSet;
1158 oid_ent_to_oid (&oident, oid);
1160 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1161 errString = nmem_strdup(stream, oident.desc);
1166 for (local_attr = attp.local_attributes; local_attr;
1167 local_attr = local_attr->next)
1172 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1173 attp.attset_ordinal,
1177 *ol = ord_list_append(stream, *ol, ord);
1179 term_dict[prefix_len++] = '|';
1181 term_dict[prefix_len++] = '(';
1183 ord_len = key_SU_encode (ord, ord_buf);
1184 for (i = 0; i<ord_len; i++)
1186 term_dict[prefix_len++] = 1;
1187 term_dict[prefix_len++] = ord_buf[i];
1194 term_dict[prefix_len++] = ')';
1195 term_dict[prefix_len++] = 1;
1196 term_dict[prefix_len++] = reg_type;
1197 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1198 term_dict[prefix_len] = '\0';
1200 switch (truncation_value)
1202 case -1: /* not specified */
1203 case 100: /* do not truncate */
1204 if (!string_relation (zh, zapt, &termp, term_dict,
1206 reg_type, space_split, term_dst,
1211 zebra_setError(zh, relation_error, 0);
1218 case 1: /* right truncation */
1219 term_dict[j++] = '(';
1220 if (!term_100(zh->reg->zebra_maps, reg_type,
1221 &termp, term_dict + j, space_split, term_dst))
1226 strcat(term_dict, ".*)");
1228 case 2: /* keft truncation */
1229 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1230 if (!term_100(zh->reg->zebra_maps, reg_type,
1231 &termp, term_dict + j, space_split, term_dst))
1236 strcat(term_dict, ")");
1238 case 3: /* left&right truncation */
1239 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1240 if (!term_100(zh->reg->zebra_maps, reg_type,
1241 &termp, term_dict + j, space_split, term_dst))
1246 strcat(term_dict, ".*)");
1248 case 101: /* process # in term */
1249 term_dict[j++] = '(';
1250 if (!term_101(zh->reg->zebra_maps, reg_type,
1251 &termp, term_dict + j, space_split, term_dst))
1256 strcat(term_dict, ")");
1258 case 102: /* Regexp-1 */
1259 term_dict[j++] = '(';
1260 if (!term_102(zh->reg->zebra_maps, reg_type,
1261 &termp, term_dict + j, space_split, term_dst))
1266 strcat(term_dict, ")");
1268 case 103: /* Regexp-2 */
1270 term_dict[j++] = '(';
1272 if (!term_103(zh->reg->zebra_maps, reg_type,
1273 &termp, term_dict + j, ®ex_range,
1274 space_split, term_dst))
1279 strcat(term_dict, ")");
1281 case 104: /* process # and ! in term */
1282 term_dict[j++] = '(';
1283 if (!term_104(zh->reg->zebra_maps, reg_type,
1284 &termp, term_dict + j, space_split, term_dst))
1289 strcat(term_dict, ")");
1291 case 105: /* process * and ! in term */
1292 term_dict[j++] = '(';
1293 if (!term_105(zh->reg->zebra_maps, reg_type,
1294 &termp, term_dict + j, space_split, term_dst, 1))
1299 strcat(term_dict, ")");
1301 case 106: /* process * and ! in term */
1302 term_dict[j++] = '(';
1303 if (!term_105(zh->reg->zebra_maps, reg_type,
1304 &termp, term_dict + j, space_split, term_dst, 0))
1309 strcat(term_dict, ")");
1312 zebra_setError_zint(zh,
1313 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1320 const char *input = term_dict + prefix_len;
1321 esc_str(buf, sizeof(buf), input, strlen(input));
1325 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1326 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1327 grep_info, &max_pos, init_pos,
1330 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1335 zebra_setError(zh, errCode, errString);
1339 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1344 /* convert APT search term to UTF8 */
1345 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1349 Z_Term *term = zapt->term;
1351 switch (term->which)
1353 case Z_Term_general:
1354 if (zh->iconv_to_utf8 != 0)
1356 char *inbuf = term->u.general->buf;
1357 size_t inleft = term->u.general->len;
1358 char *outbuf = termz;
1359 size_t outleft = IT_MAX_WORD-1;
1362 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1364 if (ret == (size_t)(-1))
1366 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1369 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1377 sizez = term->u.general->len;
1378 if (sizez > IT_MAX_WORD-1)
1379 sizez = IT_MAX_WORD-1;
1380 memcpy (termz, term->u.general->buf, sizez);
1381 termz[sizez] = '\0';
1384 case Z_Term_characterString:
1385 sizez = strlen(term->u.characterString);
1386 if (sizez > IT_MAX_WORD-1)
1387 sizez = IT_MAX_WORD-1;
1388 memcpy (termz, term->u.characterString, sizez);
1389 termz[sizez] = '\0';
1392 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1398 /* convert APT SCAN term to internal cmap */
1399 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1400 char *termz, int reg_type)
1402 char termz0[IT_MAX_WORD];
1404 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1405 return ZEBRA_FAIL; /* error */
1409 const char *cp = (const char *) termz0;
1410 const char *cp_end = cp + strlen(cp);
1413 const char *space_map = NULL;
1416 while ((len = (cp_end - cp)) > 0)
1418 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1419 if (**map == *CHR_SPACE)
1424 for (src = space_map; *src; src++)
1427 for (src = *map; *src; src++)
1436 static void grep_info_delete(struct grep_info *grep_info)
1439 xfree(grep_info->term_no);
1441 xfree(grep_info->isam_p_buf);
1444 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1445 Z_AttributesPlusTerm *zapt,
1446 struct grep_info *grep_info,
1450 int termset_value_numeric;
1451 const char *termset_value_string;
1454 grep_info->term_no = 0;
1456 grep_info->isam_p_size = 0;
1457 grep_info->isam_p_buf = NULL;
1459 grep_info->reg_type = reg_type;
1460 grep_info->termset = 0;
1464 attr_init(&termset, zapt, 8);
1465 termset_value_numeric =
1466 attr_find_ex(&termset, NULL, &termset_value_string);
1467 if (termset_value_numeric != -1)
1470 const char *termset_name = 0;
1471 if (termset_value_numeric != -2)
1474 sprintf(resname, "%d", termset_value_numeric);
1475 termset_name = resname;
1478 termset_name = termset_value_string;
1479 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1480 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1481 if (!grep_info->termset)
1483 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1491 \brief Create result set(s) for list of terms
1492 \param zh Zebra Handle
1493 \param termz term as used in query but converted to UTF-8
1494 \param attributeSet default attribute set
1495 \param stream memory for result
1496 \param reg_type register type ('w', 'p',..)
1497 \param complete_flag whether it's phrases or not
1498 \param rank_type term flags for ranking
1499 \param xpath_use use attribute for X-Path (-1 for no X-path)
1500 \param num_bases number of databases
1501 \param basenames array of databases
1502 \param rset_mem memory for result sets
1503 \param result_sets output result set for each term in list (output)
1504 \param number number of output result sets
1505 \param kc rset key control to be used for created result sets
1507 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1508 Z_AttributesPlusTerm *zapt,
1510 oid_value attributeSet,
1512 int reg_type, int complete_flag,
1513 const char *rank_type, int xpath_use,
1514 int num_bases, char **basenames,
1516 RSET **result_sets, int *num_result_sets,
1517 struct rset_key_control *kc)
1519 char term_dst[IT_MAX_WORD+1];
1520 struct grep_info grep_info;
1521 const char *termp = termz;
1524 *num_result_sets = 0;
1526 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1532 if (alloc_sets == *num_result_sets)
1535 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1538 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1539 alloc_sets = alloc_sets + add;
1540 *result_sets = rnew;
1542 res = term_trunc(zh, zapt, &termp, attributeSet,
1544 reg_type, complete_flag,
1545 num_bases, basenames,
1546 term_dst, rank_type,
1547 xpath_use, rset_nmem,
1548 &(*result_sets)[*num_result_sets],
1550 if (res != ZEBRA_OK)
1553 for (i = 0; i < *num_result_sets; i++)
1554 rset_delete((*result_sets)[i]);
1555 grep_info_delete (&grep_info);
1558 if ((*result_sets)[*num_result_sets] == 0)
1560 (*num_result_sets)++;
1562 grep_info_delete(&grep_info);
1566 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1567 Z_AttributesPlusTerm *zapt,
1568 const char *termz_org,
1569 oid_value attributeSet,
1571 int reg_type, int complete_flag,
1572 const char *rank_type, int xpath_use,
1573 int num_bases, char **basenames,
1576 struct rset_key_control *kc)
1578 RSET *result_sets = 0;
1579 int num_result_sets = 0;
1581 term_list_trunc(zh, zapt, termz_org, attributeSet,
1582 stream, reg_type, complete_flag,
1583 rank_type, xpath_use,
1584 num_bases, basenames,
1586 &result_sets, &num_result_sets, kc);
1587 if (res != ZEBRA_OK)
1589 if (num_result_sets == 0)
1590 *rset = rsnull_create (rset_nmem, kc, 0);
1591 else if (num_result_sets == 1)
1592 *rset = result_sets[0];
1594 *rset = rsprox_create(rset_nmem, kc, kc->scope,
1595 num_result_sets, result_sets,
1596 1 /* ordered */, 0 /* exclusion */,
1597 3 /* relation */, 1 /* distance */);
1603 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1604 Z_AttributesPlusTerm *zapt,
1605 const char *termz_org,
1606 oid_value attributeSet,
1608 int reg_type, int complete_flag,
1609 const char *rank_type,
1611 int num_bases, char **basenames,
1614 struct rset_key_control *kc)
1616 RSET *result_sets = 0;
1617 int num_result_sets = 0;
1619 term_list_trunc(zh, zapt, termz_org, attributeSet,
1620 stream, reg_type, complete_flag,
1621 rank_type, xpath_use,
1622 num_bases, basenames,
1624 &result_sets, &num_result_sets, kc);
1625 if (res != ZEBRA_OK)
1627 if (num_result_sets == 0)
1628 *rset = rsnull_create (rset_nmem, kc, 0);
1629 else if (num_result_sets == 1)
1630 *rset = result_sets[0];
1632 *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1633 num_result_sets, result_sets);
1639 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1640 Z_AttributesPlusTerm *zapt,
1641 const char *termz_org,
1642 oid_value attributeSet,
1644 int reg_type, int complete_flag,
1645 const char *rank_type,
1647 int num_bases, char **basenames,
1650 struct rset_key_control *kc)
1652 RSET *result_sets = 0;
1653 int num_result_sets = 0;
1655 term_list_trunc(zh, zapt, termz_org, attributeSet,
1656 stream, reg_type, complete_flag,
1657 rank_type, xpath_use,
1658 num_bases, basenames,
1660 &result_sets, &num_result_sets,
1662 if (res != ZEBRA_OK)
1664 if (num_result_sets == 0)
1665 *rset = rsnull_create (rset_nmem, kc, 0);
1666 else if (num_result_sets == 1)
1667 *rset = result_sets[0];
1669 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1670 num_result_sets, result_sets);
1676 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1677 const char **term_sub,
1679 oid_value attributeSet,
1680 struct grep_info *grep_info,
1690 char *term_tmp = term_dict + strlen(term_dict);
1693 attr_init(&relation, zapt, 2);
1694 relation_value = attr_find(&relation, NULL);
1696 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1698 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1701 term_value = atoi (term_tmp);
1702 switch (relation_value)
1705 yaz_log(log_level_rpn, "Relation <");
1706 gen_regular_rel(term_tmp, term_value-1, 1);
1709 yaz_log(log_level_rpn, "Relation <=");
1710 gen_regular_rel(term_tmp, term_value, 1);
1713 yaz_log(log_level_rpn, "Relation >=");
1714 gen_regular_rel(term_tmp, term_value, 0);
1717 yaz_log(log_level_rpn, "Relation >");
1718 gen_regular_rel(term_tmp, term_value+1, 0);
1722 yaz_log(log_level_rpn, "Relation =");
1723 sprintf(term_tmp, "(0*%d)", term_value);
1726 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1729 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1730 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1733 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1734 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1738 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739 const char **term_sub,
1740 oid_value attributeSet,
1741 struct grep_info *grep_info,
1742 int reg_type, int complete_flag,
1743 int num_bases, char **basenames,
1744 char *term_dst, int xpath_use, NMEM stream)
1746 char term_dict[2*IT_MAX_WORD+2];
1750 const char *use_string = 0;
1751 oid_value curAttributeSet = attributeSet;
1753 struct rpn_char_map_info rcmi;
1755 int bases_ok = 0; /* no of databases with OK attribute */
1756 int errCode = 0; /* err code (if any is not OK) */
1757 char *errString = 0; /* addinfo */
1759 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1760 attr_init(&use, zapt, 1);
1761 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1763 if (use_value == -1)
1766 for (base_no = 0; base_no < num_bases; base_no++)
1769 data1_local_attribute id_xpath_attr;
1770 data1_local_attribute *local_attr;
1771 int max_pos, prefix_len = 0;
1772 int relation_error = 0;
1775 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1777 use_value = xpath_use;
1778 attp.local_attributes = &id_xpath_attr;
1779 attp.attset_ordinal = VAL_IDXPATH;
1780 id_xpath_attr.next = 0;
1781 id_xpath_attr.local = use_value;
1783 else if (curAttributeSet == VAL_IDXPATH)
1785 attp.local_attributes = &id_xpath_attr;
1786 attp.attset_ordinal = VAL_IDXPATH;
1787 id_xpath_attr.next = 0;
1788 id_xpath_attr.local = use_value;
1792 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1795 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1796 curAttributeSet, use_value, r);
1799 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1801 errString = nmem_strdup(stream, use_string);
1803 errString = nmem_strdup_i (stream, use_value);
1806 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1810 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1812 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1813 basenames[base_no]);
1816 for (local_attr = attp.local_attributes; local_attr;
1817 local_attr = local_attr->next)
1823 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1824 attp.attset_ordinal,
1829 term_dict[prefix_len++] = '|';
1831 term_dict[prefix_len++] = '(';
1833 ord_len = key_SU_encode (ord, ord_buf);
1834 for (i = 0; i<ord_len; i++)
1836 term_dict[prefix_len++] = 1;
1837 term_dict[prefix_len++] = ord_buf[i];
1842 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1846 term_dict[prefix_len++] = ')';
1847 term_dict[prefix_len++] = 1;
1848 term_dict[prefix_len++] = reg_type;
1849 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1850 term_dict[prefix_len] = '\0';
1851 if (!numeric_relation(zh, zapt, &termp, term_dict,
1852 attributeSet, grep_info, &max_pos, reg_type,
1853 term_dst, &relation_error))
1857 zebra_setError(zh, relation_error, 0);
1866 zebra_setError(zh, errCode, errString);
1870 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1874 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1875 Z_AttributesPlusTerm *zapt,
1877 oid_value attributeSet,
1879 int reg_type, int complete_flag,
1880 const char *rank_type, int xpath_use,
1881 int num_bases, char **basenames,
1884 struct rset_key_control *kc)
1886 char term_dst[IT_MAX_WORD+1];
1887 const char *termp = termz;
1888 RSET *result_sets = 0;
1889 int num_result_sets = 0;
1891 struct grep_info grep_info;
1894 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1895 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1899 if (alloc_sets == num_result_sets)
1902 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1905 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1906 alloc_sets = alloc_sets + add;
1909 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1910 grep_info.isam_p_indx = 0;
1911 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1912 reg_type, complete_flag, num_bases, basenames,
1913 term_dst, xpath_use,
1915 if (res == ZEBRA_FAIL || termp == 0)
1917 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1918 result_sets[num_result_sets] =
1919 rset_trunc(zh, grep_info.isam_p_buf,
1920 grep_info.isam_p_indx, term_dst,
1921 strlen(term_dst), rank_type,
1922 0 /* preserve position */,
1923 zapt->term->which, rset_nmem,
1924 kc, kc->scope, 0, reg_type);
1925 if (!result_sets[num_result_sets])
1929 grep_info_delete(&grep_info);
1933 for (i = 0; i<num_result_sets; i++)
1934 rset_delete(result_sets[i]);
1937 if (num_result_sets == 0)
1938 *rset = rsnull_create(rset_nmem, kc, 0);
1939 if (num_result_sets == 1)
1940 *rset = result_sets[0];
1942 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1943 num_result_sets, result_sets);
1949 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1950 Z_AttributesPlusTerm *zapt,
1952 oid_value attributeSet,
1954 const char *rank_type, NMEM rset_nmem,
1956 struct rset_key_control *kc)
1961 *rset = rstemp_create(rset_nmem, kc, kc->scope,
1962 res_get (zh->res, "setTmpDir"),0 );
1963 rsfd = rset_open(*rset, RSETF_WRITE);
1971 rset_write (rsfd, &key);
1976 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1977 oid_value attributeSet, NMEM stream,
1978 Z_SortKeySpecList *sort_sequence,
1979 const char *rank_type,
1982 struct rset_key_control *kc)
1985 int sort_relation_value;
1986 AttrType sort_relation_type;
1993 attr_init(&sort_relation_type, zapt, 7);
1994 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1996 if (!sort_sequence->specs)
1998 sort_sequence->num_specs = 10;
1999 sort_sequence->specs = (Z_SortKeySpec **)
2000 nmem_malloc(stream, sort_sequence->num_specs *
2001 sizeof(*sort_sequence->specs));
2002 for (i = 0; i<sort_sequence->num_specs; i++)
2003 sort_sequence->specs[i] = 0;
2005 if (zapt->term->which != Z_Term_general)
2008 i = atoi_n ((char *) zapt->term->u.general->buf,
2009 zapt->term->u.general->len);
2010 if (i >= sort_sequence->num_specs)
2012 sprintf(termz, "%d", i);
2014 oe.proto = PROTO_Z3950;
2015 oe.oclass = CLASS_ATTSET;
2016 oe.value = attributeSet;
2017 if (!oid_ent_to_oid (&oe, oid))
2020 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2021 sks->sortElement = (Z_SortElement *)
2022 nmem_malloc(stream, sizeof(*sks->sortElement));
2023 sks->sortElement->which = Z_SortElement_generic;
2024 sk = sks->sortElement->u.generic = (Z_SortKey *)
2025 nmem_malloc(stream, sizeof(*sk));
2026 sk->which = Z_SortKey_sortAttributes;
2027 sk->u.sortAttributes = (Z_SortAttributes *)
2028 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2030 sk->u.sortAttributes->id = oid;
2031 sk->u.sortAttributes->list = zapt->attributes;
2033 sks->sortRelation = (int *)
2034 nmem_malloc(stream, sizeof(*sks->sortRelation));
2035 if (sort_relation_value == 1)
2036 *sks->sortRelation = Z_SortKeySpec_ascending;
2037 else if (sort_relation_value == 2)
2038 *sks->sortRelation = Z_SortKeySpec_descending;
2040 *sks->sortRelation = Z_SortKeySpec_ascending;
2042 sks->caseSensitivity = (int *)
2043 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2044 *sks->caseSensitivity = 0;
2046 sks->which = Z_SortKeySpec_null;
2047 sks->u.null = odr_nullval ();
2048 sort_sequence->specs[i] = sks;
2049 *rset = rsnull_create (rset_nmem, kc, 0);
2054 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2055 oid_value attributeSet,
2056 struct xpath_location_step *xpath, int max, NMEM mem)
2058 oid_value curAttributeSet = attributeSet;
2060 const char *use_string = 0;
2062 attr_init(&use, zapt, 1);
2063 attr_find_ex(&use, &curAttributeSet, &use_string);
2065 if (!use_string || *use_string != '/')
2068 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2073 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2074 int reg_type, const char *term, int use,
2075 oid_value curAttributeSet, NMEM rset_nmem,
2076 struct rset_key_control *kc)
2079 struct grep_info grep_info;
2080 char term_dict[2048];
2083 int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2084 int ord_len, i, r, max_pos;
2085 int term_type = Z_Term_characterString;
2086 const char *flags = "void";
2088 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2089 return rsnull_create(rset_nmem, kc, 0);
2092 return rsnull_create(rset_nmem, kc, 0);
2094 term_dict[prefix_len++] = '|';
2096 term_dict[prefix_len++] = '(';
2098 ord_len = key_SU_encode (ord, ord_buf);
2099 for (i = 0; i<ord_len; i++)
2101 term_dict[prefix_len++] = 1;
2102 term_dict[prefix_len++] = ord_buf[i];
2104 term_dict[prefix_len++] = ')';
2105 term_dict[prefix_len++] = 1;
2106 term_dict[prefix_len++] = reg_type;
2108 strcpy(term_dict+prefix_len, term);
2110 grep_info.isam_p_indx = 0;
2111 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2112 &grep_info, &max_pos, 0, grep_handle);
2113 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2114 grep_info.isam_p_indx);
2115 rset = rset_trunc(zh, grep_info.isam_p_buf,
2116 grep_info.isam_p_indx, term, strlen(term),
2117 flags, 1, term_type,rset_nmem,
2118 kc, kc->scope, 0, reg_type);
2119 grep_info_delete(&grep_info);
2124 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2125 oid_value attributeSet,
2126 int num_bases, char **basenames,
2127 NMEM stream, const char *rank_type, RSET rset,
2128 int xpath_len, struct xpath_location_step *xpath,
2131 struct rset_key_control *kc)
2133 oid_value curAttributeSet = attributeSet;
2143 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2144 for (i = 0; i<xpath_len; i++)
2146 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2150 curAttributeSet = VAL_IDXPATH;
2160 a[@attr = value]/b[@other = othervalue]
2162 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2163 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2164 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2165 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2166 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2167 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2171 dict_grep_cmap (zh->reg->dict, 0, 0);
2173 for (base_no = 0; base_no < num_bases; base_no++)
2175 int level = xpath_len;
2178 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2180 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2181 basenames[base_no]);
2185 while (--level >= 0)
2187 char xpath_rev[128];
2189 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2193 for (i = level; i >= 1; --i)
2195 const char *cp = xpath[i].part;
2201 memcpy (xpath_rev + len, "[^/]*", 5);
2204 else if (*cp == ' ')
2207 xpath_rev[len++] = 1;
2208 xpath_rev[len++] = ' ';
2212 xpath_rev[len++] = *cp;
2213 xpath_rev[len++] = '/';
2215 else if (i == 1) /* // case */
2217 xpath_rev[len++] = '.';
2218 xpath_rev[len++] = '*';
2223 if (xpath[level].predicate &&
2224 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2225 xpath[level].predicate->u.relation.name[0])
2227 WRBUF wbuf = wrbuf_alloc();
2228 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2229 if (xpath[level].predicate->u.relation.value)
2231 const char *cp = xpath[level].predicate->u.relation.value;
2232 wrbuf_putc(wbuf, '=');
2236 if (strchr(REGEX_CHARS, *cp))
2237 wrbuf_putc(wbuf, '\\');
2238 wrbuf_putc(wbuf, *cp);
2242 wrbuf_puts(wbuf, "");
2243 rset_attr = xpath_trunc(
2244 zh, stream, '0', wrbuf_buf(wbuf), 3,
2245 curAttributeSet, rset_nmem, kc);
2246 wrbuf_free(wbuf, 1);
2253 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2254 if (strlen(xpath_rev))
2256 rset_start_tag = xpath_trunc(zh, stream, '0',
2257 xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2259 rset_end_tag = xpath_trunc(zh, stream, '0',
2260 xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2262 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2263 rset_start_tag, rset,
2264 rset_end_tag, rset_attr);
2273 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2274 oid_value attributeSet, NMEM stream,
2275 Z_SortKeySpecList *sort_sequence,
2276 int num_bases, char **basenames,
2279 struct rset_key_control *kc)
2281 ZEBRA_RES res = ZEBRA_OK;
2283 char *search_type = NULL;
2284 char rank_type[128];
2287 char termz[IT_MAX_WORD+1];
2290 struct xpath_location_step xpath[10];
2294 log_level_rpn = yaz_log_module_level("rpn");
2297 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2298 rank_type, &complete_flag, &sort_flag);
2300 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2301 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2302 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2303 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2305 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2309 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2310 rank_type, rset_nmem, rset, kc);
2311 /* consider if an X-Path query is used */
2312 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2315 xpath_use = 1016; /* searching for element by default */
2316 if (xpath[xpath_len-1].part[0] == '@')
2317 xpath_use = 1015; /* last step an attribute .. */
2320 /* search using one of the various search type strategies
2321 termz is our UTF-8 search term
2322 attributeSet is top-level default attribute set
2323 stream is ODR for search
2324 reg_id is the register type
2325 complete_flag is 1 for complete subfield, 0 for incomplete
2326 xpath_use is use-attribute to be used for X-Path search, 0 for none
2328 if (!strcmp(search_type, "phrase"))
2330 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2331 reg_id, complete_flag, rank_type,
2333 num_bases, basenames, rset_nmem,
2336 else if (!strcmp(search_type, "and-list"))
2338 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2339 reg_id, complete_flag, rank_type,
2341 num_bases, basenames, rset_nmem,
2344 else if (!strcmp(search_type, "or-list"))
2346 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2347 reg_id, complete_flag, rank_type,
2349 num_bases, basenames, rset_nmem,
2352 else if (!strcmp(search_type, "local"))
2354 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2355 rank_type, rset_nmem, rset, kc);
2357 else if (!strcmp(search_type, "numeric"))
2359 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2360 reg_id, complete_flag, rank_type,
2362 num_bases, basenames, rset_nmem,
2367 zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2370 if (res != ZEBRA_OK)
2374 return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2375 stream, rank_type, *rset,
2376 xpath_len, xpath, rset_nmem, rset, kc);
2379 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2380 oid_value attributeSet,
2381 NMEM stream, NMEM rset_nmem,
2382 Z_SortKeySpecList *sort_sequence,
2383 int num_bases, char **basenames,
2384 RSET **result_sets, int *num_result_sets,
2385 Z_Operator *parent_op,
2386 struct rset_key_control *kc);
2388 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2389 oid_value attributeSet,
2390 NMEM stream, NMEM rset_nmem,
2391 Z_SortKeySpecList *sort_sequence,
2392 int num_bases, char **basenames,
2395 RSET *result_sets = 0;
2396 int num_result_sets = 0;
2398 struct rset_key_control *kc = zebra_key_control_create(zh);
2400 res = rpn_search_structure(zh, zs, attributeSet,
2403 num_bases, basenames,
2404 &result_sets, &num_result_sets,
2405 0 /* no parent op */,
2407 if (res != ZEBRA_OK)
2410 for (i = 0; i<num_result_sets; i++)
2411 rset_delete(result_sets[i]);
2416 assert(num_result_sets == 1);
2417 assert(result_sets);
2418 assert(*result_sets);
2419 *result_set = *result_sets;
2425 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2426 oid_value attributeSet,
2427 NMEM stream, NMEM rset_nmem,
2428 Z_SortKeySpecList *sort_sequence,
2429 int num_bases, char **basenames,
2430 RSET **result_sets, int *num_result_sets,
2431 Z_Operator *parent_op,
2432 struct rset_key_control *kc)
2434 *num_result_sets = 0;
2435 if (zs->which == Z_RPNStructure_complex)
2438 Z_Operator *zop = zs->u.complex->roperator;
2439 RSET *result_sets_l = 0;
2440 int num_result_sets_l = 0;
2441 RSET *result_sets_r = 0;
2442 int num_result_sets_r = 0;
2444 res = rpn_search_structure(zh, zs->u.complex->s1,
2445 attributeSet, stream, rset_nmem,
2447 num_bases, basenames,
2448 &result_sets_l, &num_result_sets_l,
2450 if (res != ZEBRA_OK)
2453 for (i = 0; i<num_result_sets_l; i++)
2454 rset_delete(result_sets_l[i]);
2457 res = rpn_search_structure(zh, zs->u.complex->s2,
2458 attributeSet, stream, rset_nmem,
2460 num_bases, basenames,
2461 &result_sets_r, &num_result_sets_r,
2463 if (res != ZEBRA_OK)
2466 for (i = 0; i<num_result_sets_l; i++)
2467 rset_delete(result_sets_l[i]);
2468 for (i = 0; i<num_result_sets_r; i++)
2469 rset_delete(result_sets_r[i]);
2473 /* make a new list of result for all children */
2474 *num_result_sets = num_result_sets_l + num_result_sets_r;
2475 *result_sets = nmem_malloc(stream, *num_result_sets *
2476 sizeof(**result_sets));
2477 memcpy(*result_sets, result_sets_l,
2478 num_result_sets_l * sizeof(**result_sets));
2479 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2480 num_result_sets_r * sizeof(**result_sets));
2482 if (!parent_op || parent_op->which != zop->which
2483 || (zop->which != Z_Operator_and &&
2484 zop->which != Z_Operator_or))
2486 /* parent node different from this one (or non-present) */
2487 /* we must combine result sets now */
2491 case Z_Operator_and:
2492 rset = rsmulti_and_create(rset_nmem, kc,
2494 *num_result_sets, *result_sets);
2497 rset = rsmulti_or_create(rset_nmem, kc,
2498 kc->scope, 0, /* termid */
2499 *num_result_sets, *result_sets);
2501 case Z_Operator_and_not:
2502 rset = rsbool_create_not(rset_nmem, kc,
2507 case Z_Operator_prox:
2508 if (zop->u.prox->which != Z_ProximityOperator_known)
2511 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2515 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2517 zebra_setError_zint(zh,
2518 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2519 *zop->u.prox->u.known);
2524 rset = rsprox_create(rset_nmem, kc,
2526 *num_result_sets, *result_sets,
2527 *zop->u.prox->ordered,
2528 (!zop->u.prox->exclusion ?
2529 0 : *zop->u.prox->exclusion),
2530 *zop->u.prox->relationType,
2531 *zop->u.prox->distance );
2535 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2538 *num_result_sets = 1;
2539 *result_sets = nmem_malloc(stream, *num_result_sets *
2540 sizeof(**result_sets));
2541 (*result_sets)[0] = rset;
2544 else if (zs->which == Z_RPNStructure_simple)
2549 if (zs->u.simple->which == Z_Operand_APT)
2551 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2552 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2553 attributeSet, stream, sort_sequence,
2554 num_bases, basenames, rset_nmem, &rset,
2556 if (res != ZEBRA_OK)
2559 else if (zs->u.simple->which == Z_Operand_resultSetId)
2561 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2562 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2566 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2567 zs->u.simple->u.resultSetId);
2574 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2577 *num_result_sets = 1;
2578 *result_sets = nmem_malloc(stream, *num_result_sets *
2579 sizeof(**result_sets));
2580 (*result_sets)[0] = rset;
2584 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2590 struct scan_info_entry {
2596 struct scan_info_entry *list;
2602 static int scan_handle (char *name, const char *info, int pos, void *client)
2604 int len_prefix, idx;
2605 struct scan_info *scan_info = (struct scan_info *) client;
2607 len_prefix = strlen(scan_info->prefix);
2608 if (memcmp (name, scan_info->prefix, len_prefix))
2611 idx = scan_info->after - pos + scan_info->before;
2617 scan_info->list[idx].term = (char *)
2618 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2619 strcpy(scan_info->list[idx].term, name + len_prefix);
2620 assert (*info == sizeof(ISAM_P));
2621 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2625 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2626 char **dst, const char *src)
2628 char term_src[IT_MAX_WORD];
2629 char term_dst[IT_MAX_WORD];
2631 zebra_term_untrans (zh, reg_type, term_src, src);
2633 if (zh->iconv_from_utf8 != 0)
2636 char *inbuf = term_src;
2637 size_t inleft = strlen(term_src);
2638 char *outbuf = term_dst;
2639 size_t outleft = sizeof(term_dst)-1;
2642 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2644 if (ret == (size_t)(-1))
2647 len = outbuf - term_dst;
2648 *dst = nmem_malloc(stream, len + 1);
2650 memcpy (*dst, term_dst, len);
2654 *dst = nmem_strdup(stream, term_src);
2657 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2663 yaz_log(YLOG_DEBUG, "count_set");
2665 rset->hits_limit = zh->approx_limit;
2668 rfd = rset_open(rset, RSETF_READ);
2669 while (rset_read(rfd, &key,0 /* never mind terms */))
2671 if (key.mem[0] != psysno)
2673 psysno = key.mem[0];
2674 if (rfd->counted_items >= rset->hits_limit)
2679 *count = rset->hits_count;
2682 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2683 oid_value attributeset,
2684 int num_bases, char **basenames,
2685 int *position, int *num_entries, ZebraScanEntry **list,
2686 int *is_partial, RSET limit_set, int return_zero)
2689 int pos = *position;
2690 int num = *num_entries;
2694 char termz[IT_MAX_WORD+20];
2697 const char *use_string = 0;
2698 struct scan_info *scan_info_array;
2699 ZebraScanEntry *glist;
2700 int ords[32], ord_no = 0;
2703 int bases_ok = 0; /* no of databases with OK attribute */
2704 int errCode = 0; /* err code (if any is not OK) */
2705 char *errString = 0; /* addinfo */
2708 char *search_type = NULL;
2709 char rank_type[128];
2712 NMEM rset_nmem = NULL;
2713 struct rset_key_control *kc = 0;
2718 if (attributeset == VAL_NONE)
2719 attributeset = VAL_BIB1;
2724 int termset_value_numeric;
2725 const char *termset_value_string;
2726 attr_init(&termset, zapt, 8);
2727 termset_value_numeric =
2728 attr_find_ex(&termset, NULL, &termset_value_string);
2729 if (termset_value_numeric != -1)
2732 const char *termset_name = 0;
2734 if (termset_value_numeric != -2)
2737 sprintf(resname, "%d", termset_value_numeric);
2738 termset_name = resname;
2741 termset_name = termset_value_string;
2743 limit_set = resultSetRef (zh, termset_name);
2747 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2748 pos, num, attributeset);
2750 attr_init(&use, zapt, 1);
2751 use_value = attr_find_ex(&use, &attributeset, &use_string);
2753 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2754 rank_type, &complete_flag, &sort_flag))
2757 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2760 yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2762 if (use_value == -1)
2764 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2766 data1_local_attribute *local_attr;
2770 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2772 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2773 basenames[base_no]);
2779 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2782 /* we have a match for a raw string attribute */
2784 ords[ord_no++] = ord;
2785 attp.local_attributes = 0; /* no more attributes */
2791 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2794 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2795 attributeset, use_value);
2798 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2800 zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2803 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2808 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2814 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2815 local_attr = local_attr->next)
2817 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2818 attp.attset_ordinal,
2821 ords[ord_no++] = ord;
2824 if (!bases_ok && errCode)
2826 zebra_setError(zh, errCode, errString);
2835 /* prepare dictionary scanning */
2847 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2848 "after=%d before+after=%d",
2849 pos, num, before, after, before+after);
2850 scan_info_array = (struct scan_info *)
2851 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2852 for (i = 0; i < ord_no; i++)
2854 int j, prefix_len = 0;
2855 int before_tmp = before, after_tmp = after;
2856 struct scan_info *scan_info = scan_info_array + i;
2857 struct rpn_char_map_info rcmi;
2859 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2861 scan_info->before = before;
2862 scan_info->after = after;
2863 scan_info->odr = stream;
2865 scan_info->list = (struct scan_info_entry *)
2866 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2867 for (j = 0; j<before+after; j++)
2868 scan_info->list[j].term = NULL;
2870 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2871 termz[prefix_len++] = reg_id;
2872 termz[prefix_len] = 0;
2873 strcpy(scan_info->prefix, termz);
2875 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2878 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2879 scan_info, scan_handle);
2881 glist = (ZebraScanEntry *)
2882 odr_malloc(stream, (before+after)*sizeof(*glist));
2884 rset_nmem = nmem_create();
2885 kc = zebra_key_control_create(zh);
2887 /* consider terms after main term */
2888 for (i = 0; i < ord_no; i++)
2892 for (i = 0; i<after; i++)
2895 const char *mterm = NULL;
2898 int lo = i + pos-1; /* offset in result list */
2900 /* find: j0 is the first of the minimal values */
2901 for (j = 0; j < ord_no; j++)
2903 if (ptr[j] < before+after && ptr[j] >= 0 &&
2904 (tst = scan_info_array[j].list[ptr[j]].term) &&
2905 (!mterm || strcmp (tst, mterm) < 0))
2912 break; /* no value found, stop */
2914 /* get result set for first one , but only if it's within bounds */
2917 /* get result set for first term */
2918 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2919 &glist[lo].term, mterm);
2920 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2921 glist[lo].term, strlen(glist[lo].term),
2922 NULL, 0, zapt->term->which, rset_nmem,
2923 kc, kc->scope, 0, reg_id);
2925 ptr[j0]++; /* move index for this set .. */
2926 /* get result set for remaining scan terms */
2927 for (j = j0+1; j<ord_no; j++)
2929 if (ptr[j] < before+after && ptr[j] >= 0 &&
2930 (tst = scan_info_array[j].list[ptr[j]].term) &&
2931 !strcmp (tst, mterm))
2940 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2942 strlen(glist[lo].term), NULL, 0,
2943 zapt->term->which,rset_nmem,
2944 kc, kc->scope, 0, reg_id);
2945 rset = rsmulti_or_create(rset_nmem, kc,
2946 kc->scope, 0 /* termid */,
2955 /* merge with limit_set if given */
2960 rsets[1] = rset_dup(limit_set);
2962 rset = rsmulti_and_create(rset_nmem, kc,
2967 count_set(zh, rset, &count);
2968 glist[lo].occurrences = count;
2974 *num_entries -= (after-i);
2976 if (*num_entries < 0)
2979 nmem_destroy(rset_nmem);
2984 /* consider terms before main term */
2985 for (i = 0; i<ord_no; i++)
2988 for (i = 0; i<before; i++)
2991 const char *mterm = NULL;
2994 int lo = before-1-i; /* offset in result list */
2997 for (j = 0; j <ord_no; j++)
2999 if (ptr[j] < before && ptr[j] >= 0 &&
3000 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3001 (!mterm || strcmp (tst, mterm) > 0))
3010 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3011 &glist[lo].term, mterm);
3014 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3015 glist[lo].term, strlen(glist[lo].term),
3016 NULL, 0, zapt->term->which, rset_nmem,
3017 kc, kc->scope, 0, reg_id);
3021 for (j = j0+1; j<ord_no; j++)
3023 if (ptr[j] < before && ptr[j] >= 0 &&
3024 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3025 !strcmp (tst, mterm))
3030 rsets[1] = rset_trunc(
3032 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3034 strlen(glist[lo].term), NULL, 0,
3035 zapt->term->which, rset_nmem,
3036 kc, kc->scope, 0, reg_id);
3037 rset = rsmulti_or_create(rset_nmem, kc,
3038 kc->scope, 0 /* termid */, 2, rsets);
3047 rsets[1] = rset_dup(limit_set);
3049 rset = rsmulti_and_create(rset_nmem, kc,
3050 kc->scope, 2, rsets);
3052 count_set(zh, rset, &count);
3053 glist[lo].occurrences = count;
3057 nmem_destroy(rset_nmem);
3064 if (*num_entries <= 0)
3071 *list = glist + i; /* list is set to first 'real' entry */
3073 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3074 *position, *num_entries);