1 /* $Id: zrpn.c,v 1.194 2005-06-02 11:59:54 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
32 #include <yaz/diagbib1.h>
34 #include <zebra_xpath.h>
39 struct rpn_char_map_info
50 Z_AttributesPlusTerm *zapt;
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
59 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
64 const char *outp = *out;
65 yaz_log(YLOG_LOG, "---");
68 yaz_log(YLOG_LOG, "%02X", *outp);
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77 struct rpn_char_map_info *map_info)
79 map_info->zm = reg->zebra_maps;
80 map_info->reg_type = reg_type;
81 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85 const char **string_value)
89 num_attributes = src->zapt->attributes->num_attributes;
90 while (src->major < num_attributes)
92 Z_AttributeElement *element;
94 element = src->zapt->attributes->attributes[src->major];
95 if (src->type == *element->attributeType)
97 switch (element->which)
99 case Z_AttributeValue_numeric:
101 if (element->attributeSet && attributeSetP)
105 attrset = oid_getentbyoid(element->attributeSet);
106 *attributeSetP = attrset->value;
108 return *element->value.numeric;
110 case Z_AttributeValue_complex:
111 if (src->minor >= element->value.complex->num_list)
113 if (element->attributeSet && attributeSetP)
117 attrset = oid_getentbyoid(element->attributeSet);
118 *attributeSetP = attrset->value;
120 if (element->value.complex->list[src->minor]->which ==
121 Z_StringOrNumeric_numeric)
125 *element->value.complex->list[src->minor-1]->u.numeric;
127 else if (element->value.complex->list[src->minor]->which ==
128 Z_StringOrNumeric_string)
134 element->value.complex->list[src->minor-1]->u.string;
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
150 return attr_find_ex(src, attributeSetP, 0);
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177 char *dst, const char *src)
182 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
184 if (!cp && len < IT_MAX_WORD-1)
187 while (*cp && len < IT_MAX_WORD-1)
193 static void add_isam_p(const char *name, const char *info,
198 log_level_rpn = yaz_log_module_level("rpn");
201 if (p->isam_p_indx == p->isam_p_size)
203 ISAM_P *new_isam_p_buf;
207 p->isam_p_size = 2*p->isam_p_size + 100;
208 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
212 memcpy(new_isam_p_buf, p->isam_p_buf,
213 p->isam_p_indx * sizeof(*p->isam_p_buf));
214 xfree(p->isam_p_buf);
216 p->isam_p_buf = new_isam_p_buf;
219 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
222 memcpy(new_term_no, p->isam_p_buf,
223 p->isam_p_indx * sizeof(*p->term_no));
226 p->term_no = new_term_no;
229 assert(*info == sizeof(*p->isam_p_buf));
230 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
237 char term_tmp[IT_MAX_WORD];
239 int len = key_SU_decode (&su_code, name);
241 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
242 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243 zebraExplain_lookup_ord (p->zh->reg->zei,
244 su_code, &db, &set, &use);
245 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
247 resultSetAddTerm(p->zh, p->termset, name[len], db,
254 static int grep_handle(char *name, const char *info, void *p)
256 add_isam_p(name, info, (struct grep_info *) p);
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261 const char *ct1, const char *ct2, int first)
263 const char *s1, *s0 = *src;
266 /* skip white space */
269 if (ct1 && strchr(ct1, *s0))
271 if (ct2 && strchr(ct2, *s0))
274 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275 if (**map != *CHR_SPACE)
284 static void esc_str(char *out_buf, int out_size,
285 const char *in_buf, int in_size)
291 assert(out_size > 20);
293 for (k = 0; k<in_size; k++)
295 int c = in_buf[k] & 0xff;
297 if (c < 32 || c > 126)
301 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
302 if (strlen(out_buf) > out_size-20)
304 strcat(out_buf, "..");
310 #define REGEX_CHARS " []()|.*+?!"
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314 const char **src, char *dst, int space_split,
322 const char *space_start = 0;
323 const char *space_end = 0;
325 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
332 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
336 if (**map == *CHR_SPACE)
339 else /* complete subfield only. */
341 if (**map == *CHR_SPACE)
342 { /* save space mapping for later .. */
347 else if (space_start)
348 { /* reload last space */
349 while (space_start < space_end)
351 if (strchr(REGEX_CHARS, *space_start))
353 dst_term[j++] = *space_start;
354 dst[i++] = *space_start++;
357 space_start = space_end = 0;
360 /* add non-space char */
361 memcpy(dst_term+j, s1, s0 - s1);
367 if (strchr(REGEX_CHARS, *s1))
375 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
377 strcpy(dst + i, map[0]);
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389 const char **src, char *dst, int space_split,
397 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
406 dst_term[j++] = *s0++;
412 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
414 if (space_split && **map == *CHR_SPACE)
417 /* add non-space char */
418 memcpy(dst_term+j, s1, s0 - s1);
424 if (strchr(REGEX_CHARS, *s1))
432 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
434 strcpy(dst + i, map[0]);
440 dst_term[j++] = '\0';
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447 char *dst, int *errors, int space_split,
455 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
458 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459 isdigit(((const unsigned char *)s0)[1]))
461 *errors = s0[1] - '0';
468 if (strchr("^\\()[].*+?|-", *s0))
477 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
479 if (space_split && **map == *CHR_SPACE)
482 /* add non-space char */
483 memcpy(dst_term+j, s1, s0 - s1);
489 if (strchr(REGEX_CHARS, *s1))
497 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
499 strcpy(dst + i, map[0]);
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513 char *dst, int space_split, char *dst_term)
515 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522 const char **src, char *dst, int space_split,
530 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
537 dst_term[j++] = *s0++;
538 if (*s0 >= '0' && *s0 <= '9')
541 while (*s0 >= '0' && *s0 <= '9')
543 limit = limit * 10 + (*s0 - '0');
544 dst_term[j++] = *s0++;
564 dst_term[j++] = *s0++;
569 dst_term[j++] = *s0++;
575 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
577 if (space_split && **map == *CHR_SPACE)
580 /* add non-space char */
581 memcpy(dst_term+j, s1, s0 - s1);
587 if (strchr(REGEX_CHARS, *s1))
595 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
597 strcpy(dst + i, map[0]);
603 dst_term[j++] = '\0';
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610 const char **src, char *dst, int space_split,
611 char *dst_term, int right_truncate)
618 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
627 dst_term[j++] = *s0++;
632 dst_term[j++] = *s0++;
638 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
640 if (space_split && **map == *CHR_SPACE)
643 /* add non-space char */
644 memcpy(dst_term+j, s1, s0 - s1);
650 if (strchr(REGEX_CHARS, *s1))
658 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
660 strcpy(dst + i, map[0]);
672 dst_term[j++] = '\0';
678 /* gen_regular_rel - generate regular expression from relation
679 * val: border value (inclusive)
680 * islt: 1 if <=; 0 if >=.
682 static void gen_regular_rel(char *dst, int val, int islt)
689 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
693 strcpy(dst, "(-[0-9]+|(");
701 strcpy(dst, "([0-9]+|-(");
713 sprintf(numstr, "%d", val);
714 for (w = strlen(numstr); --w >= 0; pos++)
733 strcpy(dst + dst_p, numstr);
734 dst_p = strlen(dst) - pos - 1;
762 for (i = 0; i<pos; i++)
775 /* match everything less than 10^(pos-1) */
777 for (i = 1; i<pos; i++)
778 strcat(dst, "[0-9]?");
782 /* match everything greater than 10^pos */
783 for (i = 0; i <= pos; i++)
784 strcat(dst, "[0-9]");
785 strcat(dst, "[0-9]*");
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
792 if (src[*indx] == '\\')
793 *(*term_p)++ = src[(*indx)++];
794 *(*term_p)++ = src[(*indx)++];
798 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
799 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800 * >= abc ([b-].*|a[c-].*|ab[c-].*)
801 * ([^-a].*|a[^-b].*|ab[c-].*)
802 * < abc ([-0].*|a[-a].*|ab[-b].*)
803 * ([^a-].*|a[^b-].*|ab[^c-].*)
804 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
805 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808 const char **term_sub, char *term_dict,
809 oid_value attributeSet,
810 int reg_type, int space_split, char *term_dst,
816 char *term_tmp = term_dict + strlen(term_dict);
817 char term_component[2*IT_MAX_WORD+20];
819 attr_init(&relation, zapt, 2);
820 relation_value = attr_find(&relation, NULL);
823 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824 switch (relation_value)
827 if (!term_100(zh->reg->zebra_maps, reg_type,
828 term_sub, term_component,
829 space_split, term_dst))
831 yaz_log(log_level_rpn, "Relation <");
834 for (i = 0; term_component[i]; )
841 string_rel_add_char(&term_tmp, term_component, &j);
846 string_rel_add_char(&term_tmp, term_component, &i);
853 if ((term_tmp - term_dict) > IT_MAX_WORD)
860 if (!term_100(zh->reg->zebra_maps, reg_type,
861 term_sub, term_component,
862 space_split, term_dst))
864 yaz_log(log_level_rpn, "Relation <=");
867 for (i = 0; term_component[i]; )
872 string_rel_add_char(&term_tmp, term_component, &j);
876 string_rel_add_char(&term_tmp, term_component, &i);
885 if ((term_tmp - term_dict) > IT_MAX_WORD)
888 for (i = 0; term_component[i]; )
889 string_rel_add_char(&term_tmp, term_component, &i);
894 if (!term_100 (zh->reg->zebra_maps, reg_type,
895 term_sub, term_component, space_split, term_dst))
897 yaz_log(log_level_rpn, "Relation >");
900 for (i = 0; term_component[i];)
905 string_rel_add_char(&term_tmp, term_component, &j);
910 string_rel_add_char(&term_tmp, term_component, &i);
918 if ((term_tmp - term_dict) > IT_MAX_WORD)
921 for (i = 0; term_component[i];)
922 string_rel_add_char(&term_tmp, term_component, &i);
929 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930 term_component, space_split, term_dst))
932 yaz_log(log_level_rpn, "Relation >=");
935 for (i = 0; term_component[i];)
942 string_rel_add_char(&term_tmp, term_component, &j);
945 if (term_component[i+1])
949 string_rel_add_char(&term_tmp, term_component, &i);
953 string_rel_add_char(&term_tmp, term_component, &i);
960 if ((term_tmp - term_dict) > IT_MAX_WORD)
969 yaz_log(log_level_rpn, "Relation =");
970 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971 term_component, space_split, term_dst))
973 strcat(term_tmp, "(");
974 strcat(term_tmp, term_component);
975 strcat(term_tmp, ")");
978 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985 const char **term_sub,
986 oid_value attributeSet, NMEM stream,
987 struct grep_info *grep_info,
988 int reg_type, int complete_flag,
989 int num_bases, char **basenames,
990 char *term_dst, int xpath_use);
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993 Z_AttributesPlusTerm *zapt,
994 const char **term_sub,
995 oid_value attributeSet, NMEM stream,
996 struct grep_info *grep_info,
997 int reg_type, int complete_flag,
998 int num_bases, char **basenames,
1000 const char *rank_type, int xpath_use,
1003 struct rset_key_control *kc)
1007 grep_info->isam_p_indx = 0;
1008 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1009 reg_type, complete_flag, num_bases, basenames,
1010 term_dst, xpath_use);
1011 if (res != ZEBRA_OK)
1013 if (!*term_sub) /* no more terms ? */
1015 yaz_log(log_level_rpn, "term: %s", term_dst);
1016 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1017 grep_info->isam_p_indx, term_dst,
1018 strlen(term_dst), rank_type, 1 /* preserve pos */,
1019 zapt->term->which, rset_nmem,
1026 static char *nmem_strdup_i(NMEM nmem, int v)
1029 sprintf(val_str, "%d", v);
1030 return nmem_strdup(nmem, val_str);
1033 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1034 const char **term_sub,
1035 oid_value attributeSet, NMEM stream,
1036 struct grep_info *grep_info,
1037 int reg_type, int complete_flag,
1038 int num_bases, char **basenames,
1039 char *term_dst, int xpath_use)
1041 char term_dict[2*IT_MAX_WORD+4000];
1043 AttrType truncation;
1044 int truncation_value;
1047 const char *use_string = 0;
1048 oid_value curAttributeSet = attributeSet;
1050 struct rpn_char_map_info rcmi;
1051 int space_split = complete_flag ? 0 : 1;
1053 int bases_ok = 0; /* no of databases with OK attribute */
1054 int errCode = 0; /* err code (if any is not OK) */
1055 char *errString = 0; /* addinfo */
1057 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1058 attr_init(&use, zapt, 1);
1059 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1060 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1061 attr_init(&truncation, zapt, 5);
1062 truncation_value = attr_find(&truncation, NULL);
1063 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1065 if (use_value == -1) /* no attribute - assumy "any" */
1067 for (base_no = 0; base_no < num_bases; base_no++)
1071 int regex_range = 0;
1074 data1_local_attribute id_xpath_attr;
1075 data1_local_attribute *local_attr;
1076 int max_pos, prefix_len = 0;
1081 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1083 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1084 basenames[base_no]);
1087 if (xpath_use > 0 && use_value == -2)
1089 /* xpath mode and we have a string attribute */
1090 attp.local_attributes = &id_xpath_attr;
1091 attp.attset_ordinal = VAL_IDXPATH;
1092 id_xpath_attr.next = 0;
1094 use_value = xpath_use; /* xpath_use as use-attribute now */
1095 id_xpath_attr.local = use_value;
1097 else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1099 /* X-Path attribute, use numeric value directly */
1100 attp.local_attributes = &id_xpath_attr;
1101 attp.attset_ordinal = VAL_IDXPATH;
1102 id_xpath_attr.next = 0;
1103 id_xpath_attr.local = use_value;
1105 else if (use_string &&
1106 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1109 /* we have a match for a raw string attribute */
1114 term_dict[prefix_len++] = '|';
1116 term_dict[prefix_len++] = '(';
1118 ord_len = key_SU_encode (ord, ord_buf);
1119 for (i = 0; i<ord_len; i++)
1121 term_dict[prefix_len++] = 1;
1122 term_dict[prefix_len++] = ord_buf[i];
1124 attp.local_attributes = 0; /* no more attributes */
1128 /* lookup in the .att files . Allow string as well */
1129 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1132 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1133 curAttributeSet, use_value, r);
1136 /* set was found, but value wasn't defined */
1137 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1139 errString = nmem_strdup(stream, use_string);
1141 errString = nmem_strdup_i (stream, use_value);
1146 struct oident oident;
1148 oident.proto = PROTO_Z3950;
1149 oident.oclass = CLASS_ATTSET;
1150 oident.value = curAttributeSet;
1151 oid_ent_to_oid (&oident, oid);
1153 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1154 errString = nmem_strdup(stream, oident.desc);
1159 for (local_attr = attp.local_attributes; local_attr;
1160 local_attr = local_attr->next)
1165 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1166 attp.attset_ordinal,
1171 term_dict[prefix_len++] = '|';
1173 term_dict[prefix_len++] = '(';
1175 ord_len = key_SU_encode (ord, ord_buf);
1176 for (i = 0; i<ord_len; i++)
1178 term_dict[prefix_len++] = 1;
1179 term_dict[prefix_len++] = ord_buf[i];
1186 term_dict[prefix_len++] = ')';
1187 term_dict[prefix_len++] = 1;
1188 term_dict[prefix_len++] = reg_type;
1189 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1190 term_dict[prefix_len] = '\0';
1192 switch (truncation_value)
1194 case -1: /* not specified */
1195 case 100: /* do not truncate */
1196 if (!string_relation (zh, zapt, &termp, term_dict,
1198 reg_type, space_split, term_dst,
1203 zebra_setError(zh, relation_error, 0);
1210 case 1: /* right truncation */
1211 term_dict[j++] = '(';
1212 if (!term_100(zh->reg->zebra_maps, reg_type,
1213 &termp, term_dict + j, space_split, term_dst))
1218 strcat(term_dict, ".*)");
1220 case 2: /* keft truncation */
1221 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1222 if (!term_100(zh->reg->zebra_maps, reg_type,
1223 &termp, term_dict + j, space_split, term_dst))
1228 strcat(term_dict, ")");
1230 case 3: /* left&right truncation */
1231 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1232 if (!term_100(zh->reg->zebra_maps, reg_type,
1233 &termp, term_dict + j, space_split, term_dst))
1238 strcat(term_dict, ".*)");
1240 case 101: /* process # in term */
1241 term_dict[j++] = '(';
1242 if (!term_101(zh->reg->zebra_maps, reg_type,
1243 &termp, term_dict + j, space_split, term_dst))
1248 strcat(term_dict, ")");
1250 case 102: /* Regexp-1 */
1251 term_dict[j++] = '(';
1252 if (!term_102(zh->reg->zebra_maps, reg_type,
1253 &termp, term_dict + j, space_split, term_dst))
1258 strcat(term_dict, ")");
1260 case 103: /* Regexp-2 */
1262 term_dict[j++] = '(';
1264 if (!term_103(zh->reg->zebra_maps, reg_type,
1265 &termp, term_dict + j, ®ex_range,
1266 space_split, term_dst))
1271 strcat(term_dict, ")");
1273 case 104: /* process # and ! in term */
1274 term_dict[j++] = '(';
1275 if (!term_104(zh->reg->zebra_maps, reg_type,
1276 &termp, term_dict + j, space_split, term_dst))
1281 strcat(term_dict, ")");
1283 case 105: /* process * and ! in term */
1284 term_dict[j++] = '(';
1285 if (!term_105(zh->reg->zebra_maps, reg_type,
1286 &termp, term_dict + j, space_split, term_dst, 1))
1291 strcat(term_dict, ")");
1293 case 106: /* process * and ! in term */
1294 term_dict[j++] = '(';
1295 if (!term_105(zh->reg->zebra_maps, reg_type,
1296 &termp, term_dict + j, space_split, term_dst, 0))
1301 strcat(term_dict, ")");
1304 zebra_setError_zint(zh,
1305 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1312 const char *input = term_dict + prefix_len;
1313 esc_str(buf, sizeof(buf), input, strlen(input));
1317 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1318 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1319 grep_info, &max_pos, init_pos,
1322 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1327 zebra_setError(zh, errCode, errString);
1331 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1336 /* convert APT search term to UTF8 */
1337 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1341 Z_Term *term = zapt->term;
1343 switch (term->which)
1345 case Z_Term_general:
1346 if (zh->iconv_to_utf8 != 0)
1348 char *inbuf = term->u.general->buf;
1349 size_t inleft = term->u.general->len;
1350 char *outbuf = termz;
1351 size_t outleft = IT_MAX_WORD-1;
1354 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1356 if (ret == (size_t)(-1))
1358 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1361 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1369 sizez = term->u.general->len;
1370 if (sizez > IT_MAX_WORD-1)
1371 sizez = IT_MAX_WORD-1;
1372 memcpy (termz, term->u.general->buf, sizez);
1373 termz[sizez] = '\0';
1376 case Z_Term_characterString:
1377 sizez = strlen(term->u.characterString);
1378 if (sizez > IT_MAX_WORD-1)
1379 sizez = IT_MAX_WORD-1;
1380 memcpy (termz, term->u.characterString, sizez);
1381 termz[sizez] = '\0';
1384 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1390 /* convert APT SCAN term to internal cmap */
1391 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1392 char *termz, int reg_type)
1394 char termz0[IT_MAX_WORD];
1396 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1397 return ZEBRA_FAIL; /* error */
1401 const char *cp = (const char *) termz0;
1402 const char *cp_end = cp + strlen(cp);
1405 const char *space_map = NULL;
1408 while ((len = (cp_end - cp)) > 0)
1410 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1411 if (**map == *CHR_SPACE)
1416 for (src = space_map; *src; src++)
1419 for (src = *map; *src; src++)
1428 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1429 const char *termz, NMEM stream, unsigned reg_id)
1432 AttrType truncation;
1433 int truncation_value;
1436 attr_init(&truncation, zapt, 5);
1437 truncation_value = attr_find(&truncation, NULL);
1439 switch (truncation_value)
1459 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1460 termz, strlen(termz));
1462 return nmem_strdup(stream, termz);
1465 char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1466 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1467 buf[wrbuf_len(wrbuf)] = '\0';
1472 static void grep_info_delete(struct grep_info *grep_info)
1475 xfree(grep_info->term_no);
1477 xfree(grep_info->isam_p_buf);
1480 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1481 Z_AttributesPlusTerm *zapt,
1482 struct grep_info *grep_info,
1486 int termset_value_numeric;
1487 const char *termset_value_string;
1490 grep_info->term_no = 0;
1492 grep_info->isam_p_size = 0;
1493 grep_info->isam_p_buf = NULL;
1495 grep_info->reg_type = reg_type;
1496 grep_info->termset = 0;
1500 attr_init(&termset, zapt, 8);
1501 termset_value_numeric =
1502 attr_find_ex(&termset, NULL, &termset_value_string);
1503 if (termset_value_numeric != -1)
1506 const char *termset_name = 0;
1507 if (termset_value_numeric != -2)
1510 sprintf(resname, "%d", termset_value_numeric);
1511 termset_name = resname;
1514 termset_name = termset_value_string;
1515 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1516 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1517 if (!grep_info->termset)
1519 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1527 \brief Create result set(s) for list of terms
1528 \param zh Zebra Handle
1529 \param termz_org term as used in query but converted to UTF-8
1530 \param attributeSet default attribute set
1531 \param stream memory for result
1532 \param reg_type register type ('w', 'p',..)
1533 \param complete_flag whether it's phrases or not
1534 \param rank_type term flags for ranking
1535 \param xpath_use use attribute for X-Path (-1 for no X-path)
1536 \param num_bases number of databases
1537 \param basenames array of databases
1538 \param rset_mem memory for result sets
1539 \param result_sets output result set for each term in list (output)
1540 \param number number of output result sets
1541 \param kc rset key control to be used for created result sets
1543 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1544 Z_AttributesPlusTerm *zapt,
1545 const char *termz_org,
1546 oid_value attributeSet,
1548 int reg_type, int complete_flag,
1549 const char *rank_type, int xpath_use,
1550 int num_bases, char **basenames,
1552 RSET **result_sets, int *num_result_sets,
1553 struct rset_key_control *kc)
1555 char term_dst[IT_MAX_WORD+1];
1556 struct grep_info grep_info;
1557 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1558 const char *termp = termz;
1561 *num_result_sets = 0;
1563 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1569 if (alloc_sets == *num_result_sets)
1572 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1575 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1576 alloc_sets = alloc_sets + add;
1577 *result_sets = rnew;
1579 res = term_trunc(zh, zapt, &termp, attributeSet,
1581 reg_type, complete_flag,
1582 num_bases, basenames,
1583 term_dst, rank_type,
1584 xpath_use, rset_nmem,
1585 &(*result_sets)[*num_result_sets],
1587 if (res != ZEBRA_OK)
1590 for (i = 0; i < *num_result_sets; i++)
1591 rset_delete((*result_sets)[i]);
1592 grep_info_delete (&grep_info);
1595 if ((*result_sets)[*num_result_sets] == 0)
1597 (*num_result_sets)++;
1599 grep_info_delete(&grep_info);
1603 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1604 Z_AttributesPlusTerm *zapt,
1605 const char *termz_org,
1606 oid_value attributeSet,
1608 int reg_type, int complete_flag,
1609 const char *rank_type, int xpath_use,
1610 int num_bases, char **basenames,
1613 struct rset_key_control *kc)
1615 RSET *result_sets = 0;
1616 int num_result_sets = 0;
1618 term_list_trunc(zh, zapt, termz_org, attributeSet,
1619 stream, reg_type, complete_flag,
1620 rank_type, xpath_use,
1621 num_bases, basenames,
1623 &result_sets, &num_result_sets, kc);
1624 if (res != ZEBRA_OK)
1626 if (num_result_sets == 0)
1627 *rset = rsnull_create (rset_nmem, kc, 0);
1628 else if (num_result_sets == 1)
1629 *rset = result_sets[0];
1631 *rset = rsprox_create(rset_nmem, kc, kc->scope,
1632 num_result_sets, result_sets,
1633 1 /* ordered */, 0 /* exclusion */,
1634 3 /* relation */, 1 /* distance */);
1640 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1641 Z_AttributesPlusTerm *zapt,
1642 const char *termz_org,
1643 oid_value attributeSet,
1645 int reg_type, int complete_flag,
1646 const char *rank_type,
1648 int num_bases, char **basenames,
1651 struct rset_key_control *kc)
1653 RSET *result_sets = 0;
1654 int num_result_sets = 0;
1656 term_list_trunc(zh, zapt, termz_org, attributeSet,
1657 stream, reg_type, complete_flag,
1658 rank_type, xpath_use,
1659 num_bases, basenames,
1661 &result_sets, &num_result_sets, kc);
1662 if (res != ZEBRA_OK)
1664 if (num_result_sets == 0)
1665 *rset = rsnull_create (rset_nmem, kc, 0);
1666 else if (num_result_sets == 1)
1667 *rset = result_sets[0];
1669 *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1670 num_result_sets, result_sets);
1676 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1677 Z_AttributesPlusTerm *zapt,
1678 const char *termz_org,
1679 oid_value attributeSet,
1681 int reg_type, int complete_flag,
1682 const char *rank_type,
1684 int num_bases, char **basenames,
1687 struct rset_key_control *kc)
1689 RSET *result_sets = 0;
1690 int num_result_sets = 0;
1692 term_list_trunc(zh, zapt, termz_org, attributeSet,
1693 stream, reg_type, complete_flag,
1694 rank_type, xpath_use,
1695 num_bases, basenames,
1697 &result_sets, &num_result_sets,
1699 if (res != ZEBRA_OK)
1701 if (num_result_sets == 0)
1702 *rset = rsnull_create (rset_nmem, kc, 0);
1703 else if (num_result_sets == 1)
1704 *rset = result_sets[0];
1706 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1707 num_result_sets, result_sets);
1713 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714 const char **term_sub,
1716 oid_value attributeSet,
1717 struct grep_info *grep_info,
1727 char *term_tmp = term_dict + strlen(term_dict);
1730 attr_init(&relation, zapt, 2);
1731 relation_value = attr_find(&relation, NULL);
1733 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1735 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1738 term_value = atoi (term_tmp);
1739 switch (relation_value)
1742 yaz_log(log_level_rpn, "Relation <");
1743 gen_regular_rel(term_tmp, term_value-1, 1);
1746 yaz_log(log_level_rpn, "Relation <=");
1747 gen_regular_rel(term_tmp, term_value, 1);
1750 yaz_log(log_level_rpn, "Relation >=");
1751 gen_regular_rel(term_tmp, term_value, 0);
1754 yaz_log(log_level_rpn, "Relation >");
1755 gen_regular_rel(term_tmp, term_value+1, 0);
1759 yaz_log(log_level_rpn, "Relation =");
1760 sprintf(term_tmp, "(0*%d)", term_value);
1763 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1766 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1767 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1770 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1771 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1775 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1776 const char **term_sub,
1777 oid_value attributeSet,
1778 struct grep_info *grep_info,
1779 int reg_type, int complete_flag,
1780 int num_bases, char **basenames,
1781 char *term_dst, int xpath_use, NMEM stream)
1783 char term_dict[2*IT_MAX_WORD+2];
1787 const char *use_string = 0;
1788 oid_value curAttributeSet = attributeSet;
1790 struct rpn_char_map_info rcmi;
1792 int bases_ok = 0; /* no of databases with OK attribute */
1793 int errCode = 0; /* err code (if any is not OK) */
1794 char *errString = 0; /* addinfo */
1796 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1797 attr_init(&use, zapt, 1);
1798 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1800 if (use_value == -1)
1803 for (base_no = 0; base_no < num_bases; base_no++)
1806 data1_local_attribute id_xpath_attr;
1807 data1_local_attribute *local_attr;
1808 int max_pos, prefix_len = 0;
1809 int relation_error = 0;
1812 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1814 use_value = xpath_use;
1815 attp.local_attributes = &id_xpath_attr;
1816 attp.attset_ordinal = VAL_IDXPATH;
1817 id_xpath_attr.next = 0;
1818 id_xpath_attr.local = use_value;
1820 else if (curAttributeSet == VAL_IDXPATH)
1822 attp.local_attributes = &id_xpath_attr;
1823 attp.attset_ordinal = VAL_IDXPATH;
1824 id_xpath_attr.next = 0;
1825 id_xpath_attr.local = use_value;
1829 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1832 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1833 curAttributeSet, use_value, r);
1836 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1838 errString = nmem_strdup(stream, use_string);
1840 errString = nmem_strdup_i (stream, use_value);
1843 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1847 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1849 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1850 basenames[base_no]);
1853 for (local_attr = attp.local_attributes; local_attr;
1854 local_attr = local_attr->next)
1860 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1861 attp.attset_ordinal,
1866 term_dict[prefix_len++] = '|';
1868 term_dict[prefix_len++] = '(';
1870 ord_len = key_SU_encode (ord, ord_buf);
1871 for (i = 0; i<ord_len; i++)
1873 term_dict[prefix_len++] = 1;
1874 term_dict[prefix_len++] = ord_buf[i];
1879 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1883 term_dict[prefix_len++] = ')';
1884 term_dict[prefix_len++] = 1;
1885 term_dict[prefix_len++] = reg_type;
1886 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1887 term_dict[prefix_len] = '\0';
1888 if (!numeric_relation(zh, zapt, &termp, term_dict,
1889 attributeSet, grep_info, &max_pos, reg_type,
1890 term_dst, &relation_error))
1894 zebra_setError(zh, relation_error, 0);
1903 zebra_setError(zh, errCode, errString);
1907 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1911 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1912 Z_AttributesPlusTerm *zapt,
1914 oid_value attributeSet,
1916 int reg_type, int complete_flag,
1917 const char *rank_type, int xpath_use,
1918 int num_bases, char **basenames,
1921 struct rset_key_control *kc)
1923 char term_dst[IT_MAX_WORD+1];
1924 const char *termp = termz;
1925 RSET *result_sets = 0;
1926 int num_result_sets = 0;
1928 struct grep_info grep_info;
1931 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1932 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1936 if (alloc_sets == num_result_sets)
1939 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1942 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1943 alloc_sets = alloc_sets + add;
1946 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1947 grep_info.isam_p_indx = 0;
1948 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1949 reg_type, complete_flag, num_bases, basenames,
1950 term_dst, xpath_use,
1952 if (res == ZEBRA_FAIL || termp == 0)
1954 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1955 result_sets[num_result_sets] =
1956 rset_trunc(zh, grep_info.isam_p_buf,
1957 grep_info.isam_p_indx, term_dst,
1958 strlen(term_dst), rank_type,
1959 0 /* preserve position */,
1960 zapt->term->which, rset_nmem,
1962 if (!result_sets[num_result_sets])
1966 grep_info_delete(&grep_info);
1970 for (i = 0; i<num_result_sets; i++)
1971 rset_delete(result_sets[i]);
1974 if (num_result_sets == 0)
1975 *rset = rsnull_create(rset_nmem, kc, 0);
1976 if (num_result_sets == 1)
1977 *rset = result_sets[0];
1979 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1980 num_result_sets, result_sets);
1986 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1987 Z_AttributesPlusTerm *zapt,
1989 oid_value attributeSet,
1991 const char *rank_type, NMEM rset_nmem,
1993 struct rset_key_control *kc)
1998 *rset = rstemp_create(rset_nmem, kc, kc->scope,
1999 res_get (zh->res, "setTmpDir"),0 );
2000 rsfd = rset_open(*rset, RSETF_WRITE);
2008 rset_write (rsfd, &key);
2013 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2014 oid_value attributeSet, NMEM stream,
2015 Z_SortKeySpecList *sort_sequence,
2016 const char *rank_type,
2019 struct rset_key_control *kc)
2022 int sort_relation_value;
2023 AttrType sort_relation_type;
2030 attr_init(&sort_relation_type, zapt, 7);
2031 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2033 if (!sort_sequence->specs)
2035 sort_sequence->num_specs = 10;
2036 sort_sequence->specs = (Z_SortKeySpec **)
2037 nmem_malloc(stream, sort_sequence->num_specs *
2038 sizeof(*sort_sequence->specs));
2039 for (i = 0; i<sort_sequence->num_specs; i++)
2040 sort_sequence->specs[i] = 0;
2042 if (zapt->term->which != Z_Term_general)
2045 i = atoi_n ((char *) zapt->term->u.general->buf,
2046 zapt->term->u.general->len);
2047 if (i >= sort_sequence->num_specs)
2049 sprintf(termz, "%d", i);
2051 oe.proto = PROTO_Z3950;
2052 oe.oclass = CLASS_ATTSET;
2053 oe.value = attributeSet;
2054 if (!oid_ent_to_oid (&oe, oid))
2057 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2058 sks->sortElement = (Z_SortElement *)
2059 nmem_malloc(stream, sizeof(*sks->sortElement));
2060 sks->sortElement->which = Z_SortElement_generic;
2061 sk = sks->sortElement->u.generic = (Z_SortKey *)
2062 nmem_malloc(stream, sizeof(*sk));
2063 sk->which = Z_SortKey_sortAttributes;
2064 sk->u.sortAttributes = (Z_SortAttributes *)
2065 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2067 sk->u.sortAttributes->id = oid;
2068 sk->u.sortAttributes->list = zapt->attributes;
2070 sks->sortRelation = (int *)
2071 nmem_malloc(stream, sizeof(*sks->sortRelation));
2072 if (sort_relation_value == 1)
2073 *sks->sortRelation = Z_SortKeySpec_ascending;
2074 else if (sort_relation_value == 2)
2075 *sks->sortRelation = Z_SortKeySpec_descending;
2077 *sks->sortRelation = Z_SortKeySpec_ascending;
2079 sks->caseSensitivity = (int *)
2080 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2081 *sks->caseSensitivity = 0;
2083 sks->which = Z_SortKeySpec_null;
2084 sks->u.null = odr_nullval ();
2085 sort_sequence->specs[i] = sks;
2086 *rset = rsnull_create (rset_nmem, kc, 0);
2091 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2092 oid_value attributeSet,
2093 struct xpath_location_step *xpath, int max, NMEM mem)
2095 oid_value curAttributeSet = attributeSet;
2097 const char *use_string = 0;
2099 attr_init(&use, zapt, 1);
2100 attr_find_ex(&use, &curAttributeSet, &use_string);
2102 if (!use_string || *use_string != '/')
2105 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2110 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2111 int reg_type, const char *term, int use,
2112 oid_value curAttributeSet, NMEM rset_nmem,
2113 struct rset_key_control *kc)
2116 struct grep_info grep_info;
2117 char term_dict[2048];
2120 int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2121 int ord_len, i, r, max_pos;
2122 int term_type = Z_Term_characterString;
2123 const char *flags = "void";
2125 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2126 return rsnull_create(rset_nmem, kc, 0);
2129 return rsnull_create(rset_nmem, kc, 0);
2131 term_dict[prefix_len++] = '|';
2133 term_dict[prefix_len++] = '(';
2135 ord_len = key_SU_encode (ord, ord_buf);
2136 for (i = 0; i<ord_len; i++)
2138 term_dict[prefix_len++] = 1;
2139 term_dict[prefix_len++] = ord_buf[i];
2141 term_dict[prefix_len++] = ')';
2142 term_dict[prefix_len++] = 1;
2143 term_dict[prefix_len++] = reg_type;
2145 strcpy(term_dict+prefix_len, term);
2147 grep_info.isam_p_indx = 0;
2148 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2149 &grep_info, &max_pos, 0, grep_handle);
2150 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2151 grep_info.isam_p_indx);
2152 rset = rset_trunc(zh, grep_info.isam_p_buf,
2153 grep_info.isam_p_indx, term, strlen(term),
2154 flags, 1, term_type,rset_nmem,
2156 grep_info_delete(&grep_info);
2161 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2162 oid_value attributeSet,
2163 int num_bases, char **basenames,
2164 NMEM stream, const char *rank_type, RSET rset,
2165 int xpath_len, struct xpath_location_step *xpath,
2168 struct rset_key_control *kc)
2170 oid_value curAttributeSet = attributeSet;
2180 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2181 for (i = 0; i<xpath_len; i++)
2183 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2187 curAttributeSet = VAL_IDXPATH;
2197 a[@attr = value]/b[@other = othervalue]
2199 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2200 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2201 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2202 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2203 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2204 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2208 dict_grep_cmap (zh->reg->dict, 0, 0);
2210 for (base_no = 0; base_no < num_bases; base_no++)
2212 int level = xpath_len;
2215 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2217 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2218 basenames[base_no]);
2222 while (--level >= 0)
2224 char xpath_rev[128];
2226 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2230 for (i = level; i >= 1; --i)
2232 const char *cp = xpath[i].part;
2238 memcpy (xpath_rev + len, "[^/]*", 5);
2241 else if (*cp == ' ')
2244 xpath_rev[len++] = 1;
2245 xpath_rev[len++] = ' ';
2249 xpath_rev[len++] = *cp;
2250 xpath_rev[len++] = '/';
2252 else if (i == 1) /* // case */
2254 xpath_rev[len++] = '.';
2255 xpath_rev[len++] = '*';
2260 if (xpath[level].predicate &&
2261 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262 xpath[level].predicate->u.relation.name[0])
2264 WRBUF wbuf = wrbuf_alloc();
2265 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266 if (xpath[level].predicate->u.relation.value)
2268 const char *cp = xpath[level].predicate->u.relation.value;
2269 wrbuf_putc(wbuf, '=');
2273 if (strchr(REGEX_CHARS, *cp))
2274 wrbuf_putc(wbuf, '\\');
2275 wrbuf_putc(wbuf, *cp);
2279 wrbuf_puts(wbuf, "");
2280 rset_attr = xpath_trunc(
2281 zh, stream, '0', wrbuf_buf(wbuf), 3,
2282 curAttributeSet, rset_nmem, kc);
2283 wrbuf_free(wbuf, 1);
2290 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2291 if (strlen(xpath_rev))
2293 rset_start_tag = xpath_trunc(zh, stream, '0',
2294 xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2296 rset_end_tag = xpath_trunc(zh, stream, '0',
2297 xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2299 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2300 rset_start_tag, rset,
2301 rset_end_tag, rset_attr);
2310 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2311 oid_value attributeSet, NMEM stream,
2312 Z_SortKeySpecList *sort_sequence,
2313 int num_bases, char **basenames,
2316 struct rset_key_control *kc)
2318 ZEBRA_RES res = ZEBRA_OK;
2320 char *search_type = NULL;
2321 char rank_type[128];
2324 char termz[IT_MAX_WORD+1];
2327 struct xpath_location_step xpath[10];
2331 log_level_rpn = yaz_log_module_level("rpn");
2334 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2335 rank_type, &complete_flag, &sort_flag);
2337 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2338 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2339 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2340 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2342 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2346 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2347 rank_type, rset_nmem, rset, kc);
2348 /* consider if an X-Path query is used */
2349 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2352 xpath_use = 1016; /* searching for element by default */
2353 if (xpath[xpath_len-1].part[0] == '@')
2354 xpath_use = 1015; /* last step an attribute .. */
2357 /* search using one of the various search type strategies
2358 termz is our UTF-8 search term
2359 attributeSet is top-level default attribute set
2360 stream is ODR for search
2361 reg_id is the register type
2362 complete_flag is 1 for complete subfield, 0 for incomplete
2363 xpath_use is use-attribute to be used for X-Path search, 0 for none
2365 if (!strcmp(search_type, "phrase"))
2367 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2368 reg_id, complete_flag, rank_type,
2370 num_bases, basenames, rset_nmem,
2373 else if (!strcmp(search_type, "and-list"))
2375 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2376 reg_id, complete_flag, rank_type,
2378 num_bases, basenames, rset_nmem,
2381 else if (!strcmp(search_type, "or-list"))
2383 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2384 reg_id, complete_flag, rank_type,
2386 num_bases, basenames, rset_nmem,
2389 else if (!strcmp(search_type, "local"))
2391 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2392 rank_type, rset_nmem, rset, kc);
2394 else if (!strcmp(search_type, "numeric"))
2396 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2397 reg_id, complete_flag, rank_type,
2399 num_bases, basenames, rset_nmem,
2404 zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2407 if (res != ZEBRA_OK)
2411 return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2412 stream, rank_type, *rset,
2413 xpath_len, xpath, rset_nmem, rset, kc);
2416 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2417 oid_value attributeSet,
2418 NMEM stream, NMEM rset_nmem,
2419 Z_SortKeySpecList *sort_sequence,
2420 int num_bases, char **basenames,
2421 RSET **result_sets, int *num_result_sets,
2422 Z_Operator *parent_op,
2423 struct rset_key_control *kc);
2425 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2426 oid_value attributeSet,
2427 NMEM stream, NMEM rset_nmem,
2428 Z_SortKeySpecList *sort_sequence,
2429 int num_bases, char **basenames,
2432 RSET *result_sets = 0;
2433 int num_result_sets = 0;
2435 struct rset_key_control *kc = zebra_key_control_create(zh);
2437 res = rpn_search_structure(zh, zs, attributeSet,
2440 num_bases, basenames,
2441 &result_sets, &num_result_sets,
2442 0 /* no parent op */,
2444 if (res != ZEBRA_OK)
2447 for (i = 0; i<num_result_sets; i++)
2448 rset_delete(result_sets[i]);
2453 assert(num_result_sets == 1);
2454 assert(result_sets);
2455 assert(*result_sets);
2456 *result_set = *result_sets;
2462 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2463 oid_value attributeSet,
2464 NMEM stream, NMEM rset_nmem,
2465 Z_SortKeySpecList *sort_sequence,
2466 int num_bases, char **basenames,
2467 RSET **result_sets, int *num_result_sets,
2468 Z_Operator *parent_op,
2469 struct rset_key_control *kc)
2471 *num_result_sets = 0;
2472 if (zs->which == Z_RPNStructure_complex)
2475 Z_Operator *zop = zs->u.complex->roperator;
2476 RSET *result_sets_l = 0;
2477 int num_result_sets_l = 0;
2478 RSET *result_sets_r = 0;
2479 int num_result_sets_r = 0;
2481 res = rpn_search_structure(zh, zs->u.complex->s1,
2482 attributeSet, stream, rset_nmem,
2484 num_bases, basenames,
2485 &result_sets_l, &num_result_sets_l,
2487 if (res != ZEBRA_OK)
2490 for (i = 0; i<num_result_sets_l; i++)
2491 rset_delete(result_sets_l[i]);
2494 res = rpn_search_structure(zh, zs->u.complex->s2,
2495 attributeSet, stream, rset_nmem,
2497 num_bases, basenames,
2498 &result_sets_r, &num_result_sets_r,
2500 if (res != ZEBRA_OK)
2503 for (i = 0; i<num_result_sets_l; i++)
2504 rset_delete(result_sets_l[i]);
2505 for (i = 0; i<num_result_sets_r; i++)
2506 rset_delete(result_sets_r[i]);
2510 /* make a new list of result for all children */
2511 *num_result_sets = num_result_sets_l + num_result_sets_r;
2512 *result_sets = nmem_malloc(stream, *num_result_sets *
2513 sizeof(**result_sets));
2514 memcpy(*result_sets, result_sets_l,
2515 num_result_sets_l * sizeof(**result_sets));
2516 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2517 num_result_sets_r * sizeof(**result_sets));
2519 if (!parent_op || parent_op->which != zop->which
2520 || (zop->which != Z_Operator_and &&
2521 zop->which != Z_Operator_or))
2523 /* parent node different from this one (or non-present) */
2524 /* we must combine result sets now */
2528 case Z_Operator_and:
2529 rset = rsmulti_and_create(rset_nmem, kc,
2531 *num_result_sets, *result_sets);
2534 rset = rsmulti_or_create(rset_nmem, kc,
2535 kc->scope, 0, /* termid */
2536 *num_result_sets, *result_sets);
2538 case Z_Operator_and_not:
2539 rset = rsbool_create_not(rset_nmem, kc,
2544 case Z_Operator_prox:
2545 if (zop->u.prox->which != Z_ProximityOperator_known)
2548 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2552 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2554 zebra_setError_zint(zh,
2555 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2556 *zop->u.prox->u.known);
2561 rset = rsprox_create(rset_nmem, kc,
2563 *num_result_sets, *result_sets,
2564 *zop->u.prox->ordered,
2565 (!zop->u.prox->exclusion ?
2566 0 : *zop->u.prox->exclusion),
2567 *zop->u.prox->relationType,
2568 *zop->u.prox->distance );
2572 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2575 *num_result_sets = 1;
2576 *result_sets = nmem_malloc(stream, *num_result_sets *
2577 sizeof(**result_sets));
2578 (*result_sets)[0] = rset;
2581 else if (zs->which == Z_RPNStructure_simple)
2586 if (zs->u.simple->which == Z_Operand_APT)
2588 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2589 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2590 attributeSet, stream, sort_sequence,
2591 num_bases, basenames, rset_nmem, &rset,
2593 if (res != ZEBRA_OK)
2596 else if (zs->u.simple->which == Z_Operand_resultSetId)
2598 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2599 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2603 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2604 zs->u.simple->u.resultSetId);
2611 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2614 *num_result_sets = 1;
2615 *result_sets = nmem_malloc(stream, *num_result_sets *
2616 sizeof(**result_sets));
2617 (*result_sets)[0] = rset;
2621 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2627 struct scan_info_entry {
2633 struct scan_info_entry *list;
2639 static int scan_handle (char *name, const char *info, int pos, void *client)
2641 int len_prefix, idx;
2642 struct scan_info *scan_info = (struct scan_info *) client;
2644 len_prefix = strlen(scan_info->prefix);
2645 if (memcmp (name, scan_info->prefix, len_prefix))
2648 idx = scan_info->after - pos + scan_info->before;
2654 scan_info->list[idx].term = (char *)
2655 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2656 strcpy(scan_info->list[idx].term, name + len_prefix);
2657 assert (*info == sizeof(ISAM_P));
2658 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2662 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2663 char **dst, const char *src)
2665 char term_src[IT_MAX_WORD];
2666 char term_dst[IT_MAX_WORD];
2668 zebra_term_untrans (zh, reg_type, term_src, src);
2670 if (zh->iconv_from_utf8 != 0)
2673 char *inbuf = term_src;
2674 size_t inleft = strlen(term_src);
2675 char *outbuf = term_dst;
2676 size_t outleft = sizeof(term_dst)-1;
2679 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2681 if (ret == (size_t)(-1))
2684 len = outbuf - term_dst;
2685 *dst = nmem_malloc(stream, len + 1);
2687 memcpy (*dst, term_dst, len);
2691 *dst = nmem_strdup(stream, term_src);
2694 static void count_set (RSET r, int *count)
2701 yaz_log(YLOG_DEBUG, "count_set");
2704 rfd = rset_open (r, RSETF_READ);
2705 while (rset_read (rfd, &key,0 /* never mind terms */))
2707 if (key.mem[0] != psysno)
2709 psysno = key.mem[0];
2715 yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2718 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2719 oid_value attributeset,
2720 int num_bases, char **basenames,
2721 int *position, int *num_entries, ZebraScanEntry **list,
2722 int *is_partial, RSET limit_set, int return_zero)
2725 int pos = *position;
2726 int num = *num_entries;
2730 char termz[IT_MAX_WORD+20];
2733 const char *use_string = 0;
2734 struct scan_info *scan_info_array;
2735 ZebraScanEntry *glist;
2736 int ords[32], ord_no = 0;
2739 int bases_ok = 0; /* no of databases with OK attribute */
2740 int errCode = 0; /* err code (if any is not OK) */
2741 char *errString = 0; /* addinfo */
2744 char *search_type = NULL;
2745 char rank_type[128];
2748 NMEM rset_nmem = NULL;
2749 struct rset_key_control *kc = 0;
2754 if (attributeset == VAL_NONE)
2755 attributeset = VAL_BIB1;
2760 int termset_value_numeric;
2761 const char *termset_value_string;
2762 attr_init(&termset, zapt, 8);
2763 termset_value_numeric =
2764 attr_find_ex(&termset, NULL, &termset_value_string);
2765 if (termset_value_numeric != -1)
2768 const char *termset_name = 0;
2770 if (termset_value_numeric != -2)
2773 sprintf(resname, "%d", termset_value_numeric);
2774 termset_name = resname;
2777 termset_name = termset_value_string;
2779 limit_set = resultSetRef (zh, termset_name);
2783 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2784 pos, num, attributeset);
2786 attr_init(&use, zapt, 1);
2787 use_value = attr_find_ex(&use, &attributeset, &use_string);
2789 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2790 rank_type, &complete_flag, &sort_flag))
2793 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2796 yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2798 if (use_value == -1)
2800 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2802 data1_local_attribute *local_attr;
2806 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2808 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2809 basenames[base_no]);
2815 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2818 /* we have a match for a raw string attribute */
2820 ords[ord_no++] = ord;
2821 attp.local_attributes = 0; /* no more attributes */
2827 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2830 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2831 attributeset, use_value);
2834 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2836 zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2839 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2844 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2850 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2851 local_attr = local_attr->next)
2853 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2854 attp.attset_ordinal,
2857 ords[ord_no++] = ord;
2860 if (!bases_ok && errCode)
2862 zebra_setError(zh, errCode, errString);
2871 /* prepare dictionary scanning */
2883 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2884 "after=%d before+after=%d",
2885 pos, num, before, after, before+after);
2886 scan_info_array = (struct scan_info *)
2887 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2888 for (i = 0; i < ord_no; i++)
2890 int j, prefix_len = 0;
2891 int before_tmp = before, after_tmp = after;
2892 struct scan_info *scan_info = scan_info_array + i;
2893 struct rpn_char_map_info rcmi;
2895 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2897 scan_info->before = before;
2898 scan_info->after = after;
2899 scan_info->odr = stream;
2901 scan_info->list = (struct scan_info_entry *)
2902 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2903 for (j = 0; j<before+after; j++)
2904 scan_info->list[j].term = NULL;
2906 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2907 termz[prefix_len++] = reg_id;
2908 termz[prefix_len] = 0;
2909 strcpy(scan_info->prefix, termz);
2911 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2914 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2915 scan_info, scan_handle);
2917 glist = (ZebraScanEntry *)
2918 odr_malloc(stream, (before+after)*sizeof(*glist));
2920 rset_nmem = nmem_create();
2921 kc = zebra_key_control_create(zh);
2923 /* consider terms after main term */
2924 for (i = 0; i < ord_no; i++)
2928 for (i = 0; i<after; i++)
2931 const char *mterm = NULL;
2934 int lo = i + pos-1; /* offset in result list */
2936 /* find: j0 is the first of the minimal values */
2937 for (j = 0; j < ord_no; j++)
2939 if (ptr[j] < before+after && ptr[j] >= 0 &&
2940 (tst = scan_info_array[j].list[ptr[j]].term) &&
2941 (!mterm || strcmp (tst, mterm) < 0))
2948 break; /* no value found, stop */
2950 /* get result set for first one , but only if it's within bounds */
2953 /* get result set for first term */
2954 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2955 &glist[lo].term, mterm);
2956 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2957 glist[lo].term, strlen(glist[lo].term),
2958 NULL, 0, zapt->term->which, rset_nmem,
2961 ptr[j0]++; /* move index for this set .. */
2962 /* get result set for remaining scan terms */
2963 for (j = j0+1; j<ord_no; j++)
2965 if (ptr[j] < before+after && ptr[j] >= 0 &&
2966 (tst = scan_info_array[j].list[ptr[j]].term) &&
2967 !strcmp (tst, mterm))
2976 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2978 strlen(glist[lo].term), NULL, 0,
2979 zapt->term->which,rset_nmem,
2981 rset = rsmulti_or_create(rset_nmem, kc,
2982 kc->scope, 0 /* termid */,
2990 /* merge with limit_set if given */
2995 rsets[1] = rset_dup(limit_set);
2997 rset = rsmulti_and_create(rset_nmem, kc,
3002 count_set(rset, &glist[lo].occurrences);
3008 *num_entries -= (after-i);
3010 if (*num_entries < 0)
3013 nmem_destroy(rset_nmem);
3018 /* consider terms before main term */
3019 for (i = 0; i<ord_no; i++)
3022 for (i = 0; i<before; i++)
3025 const char *mterm = NULL;
3028 int lo = before-1-i; /* offset in result list */
3030 for (j = 0; j <ord_no; j++)
3032 if (ptr[j] < before && ptr[j] >= 0 &&
3033 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3034 (!mterm || strcmp (tst, mterm) > 0))
3043 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3044 &glist[lo].term, mterm);
3047 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3048 glist[lo].term, strlen(glist[lo].term),
3049 NULL, 0, zapt->term->which, rset_nmem,
3054 for (j = j0+1; j<ord_no; j++)
3056 if (ptr[j] < before && ptr[j] >= 0 &&
3057 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3058 !strcmp (tst, mterm))
3063 rsets[1] = rset_trunc(
3065 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3067 strlen(glist[lo].term), NULL, 0,
3068 zapt->term->which, rset_nmem,
3070 rset = rsmulti_or_create(rset_nmem, kc,
3071 kc->scope, 0 /* termid */, 2, rsets);
3080 rsets[1] = rset_dup(limit_set);
3082 rset = rsmulti_and_create(rset_nmem, kc,
3083 kc->scope, 2, rsets);
3085 count_set (rset, &glist[lo].occurrences);
3089 nmem_destroy(rset_nmem);
3096 if (*num_entries <= 0)
3103 *list = glist + i; /* list is set to first 'real' entry */
3105 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3106 *position, *num_entries);