1 /* $Id: zrpn.c,v 1.220 2006-06-22 23:06:06 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
51 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
56 const char *outp = *out;
57 yaz_log(YLOG_LOG, "---");
60 yaz_log(YLOG_LOG, "%02X", *outp);
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69 struct rpn_char_map_info *map_info)
71 map_info->zm = reg->zebra_maps;
72 map_info->reg_type = reg_type;
73 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91 char *dst, const char *src)
96 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
100 if (len < IT_MAX_WORD-1)
105 while (*cp && len < IT_MAX_WORD-1)
111 static void add_isam_p(const char *name, const char *info,
116 log_level_rpn = yaz_log_module_level("rpn");
119 if (p->isam_p_indx == p->isam_p_size)
121 ISAM_P *new_isam_p_buf;
125 p->isam_p_size = 2*p->isam_p_size + 100;
126 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
130 memcpy(new_isam_p_buf, p->isam_p_buf,
131 p->isam_p_indx * sizeof(*p->isam_p_buf));
132 xfree(p->isam_p_buf);
134 p->isam_p_buf = new_isam_p_buf;
137 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140 memcpy(new_term_no, p->isam_p_buf,
141 p->isam_p_indx * sizeof(*p->term_no));
144 p->term_no = new_term_no;
147 assert(*info == sizeof(*p->isam_p_buf));
148 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
154 char term_tmp[IT_MAX_WORD];
156 const char *index_name;
157 int len = key_SU_decode (&ord, (const unsigned char *) name);
159 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
160 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161 zebraExplain_lookup_ord(p->zh->reg->zei,
162 ord, 0 /* index_type */, &db, &index_name);
163 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
165 resultSetAddTerm(p->zh, p->termset, name[len], db,
166 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
890 yaz_log(log_level_rpn, "Relation =");
891 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892 term_component, space_split, term_dst))
894 strcat(term_tmp, "(");
895 strcat(term_tmp, term_component);
896 strcat(term_tmp, ")");
899 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906 const char **term_sub,
907 oid_value attributeSet, NMEM stream,
908 struct grep_info *grep_info,
909 int reg_type, int complete_flag,
910 int num_bases, char **basenames,
912 const char *xpath_use,
913 struct ord_list **ol);
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916 Z_AttributesPlusTerm *zapt,
917 zint *hits_limit_value,
918 const char **term_ref_id_str,
921 AttrType term_ref_id_attr;
922 AttrType hits_limit_attr;
925 attr_init_APT(&hits_limit_attr, zapt, 9);
926 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
928 attr_init_APT(&term_ref_id_attr, zapt, 10);
929 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930 if (term_ref_id_int >= 0)
932 char *res = nmem_malloc(nmem, 20);
933 sprintf(res, "%d", term_ref_id_int);
934 *term_ref_id_str = res;
937 /* no limit given ? */
938 if (*hits_limit_value == -1)
940 if (*term_ref_id_str)
942 /* use global if term_ref is present */
943 *hits_limit_value = zh->approx_limit;
947 /* no counting if term_ref is not present */
948 *hits_limit_value = 0;
951 else if (*hits_limit_value == 0)
953 /* 0 is the same as global limit */
954 *hits_limit_value = zh->approx_limit;
956 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957 *term_ref_id_str ? *term_ref_id_str : "none",
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963 Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
965 oid_value attributeSet, NMEM stream,
966 struct grep_info *grep_info,
967 int reg_type, int complete_flag,
968 int num_bases, char **basenames,
970 const char *rank_type,
971 const char *xpath_use,
974 struct rset_key_control *kc)
978 zint hits_limit_value;
979 const char *term_ref_id_str = 0;
982 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
984 grep_info->isam_p_indx = 0;
985 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986 reg_type, complete_flag, num_bases, basenames,
987 term_dst, xpath_use, &ol);
990 if (!*term_sub) /* no more terms ? */
992 yaz_log(log_level_rpn, "term: %s", term_dst);
993 *rset = rset_trunc(zh, grep_info->isam_p_buf,
994 grep_info->isam_p_indx, term_dst,
995 strlen(term_dst), rank_type, 1 /* preserve pos */,
996 zapt->term->which, rset_nmem,
997 kc, kc->scope, ol, reg_type, hits_limit_value,
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005 const char **term_sub,
1006 oid_value attributeSet, NMEM stream,
1007 struct grep_info *grep_info,
1008 int reg_type, int complete_flag,
1009 int num_bases, char **basenames,
1011 const char *xpath_use,
1012 struct ord_list **ol)
1014 char term_dict[2*IT_MAX_WORD+4000];
1016 AttrType truncation;
1017 int truncation_value;
1019 struct rpn_char_map_info rcmi;
1020 int space_split = complete_flag ? 0 : 1;
1022 int bases_ok = 0; /* no of databases with OK attribute */
1024 *ol = ord_list_create(stream);
1026 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1027 attr_init_APT(&truncation, zapt, 5);
1028 truncation_value = attr_find(&truncation, NULL);
1029 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1031 for (base_no = 0; base_no < num_bases; base_no++)
1034 int regex_range = 0;
1035 int max_pos, prefix_len = 0;
1042 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1044 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1045 basenames[base_no]);
1049 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1050 reg_type, xpath_use,
1051 attributeSet, &ord) != ZEBRA_OK)
1054 *ol = ord_list_append(stream, *ol, ord);
1057 term_dict[prefix_len++] = '|';
1059 term_dict[prefix_len++] = '(';
1061 ord_len = key_SU_encode (ord, ord_buf);
1062 for (i = 0; i<ord_len; i++)
1064 term_dict[prefix_len++] = 1;
1065 term_dict[prefix_len++] = ord_buf[i];
1070 term_dict[prefix_len++] = ')';
1071 term_dict[prefix_len] = '\0';
1073 switch (truncation_value)
1075 case -1: /* not specified */
1076 case 100: /* do not truncate */
1077 if (!string_relation (zh, zapt, &termp, term_dict,
1079 reg_type, space_split, term_dst,
1084 zebra_setError(zh, relation_error, 0);
1091 case 1: /* right truncation */
1092 term_dict[j++] = '(';
1093 if (!term_100(zh->reg->zebra_maps, reg_type,
1094 &termp, term_dict + j, space_split, term_dst))
1099 strcat(term_dict, ".*)");
1101 case 2: /* keft truncation */
1102 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1103 if (!term_100(zh->reg->zebra_maps, reg_type,
1104 &termp, term_dict + j, space_split, term_dst))
1109 strcat(term_dict, ")");
1111 case 3: /* left&right truncation */
1112 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1113 if (!term_100(zh->reg->zebra_maps, reg_type,
1114 &termp, term_dict + j, space_split, term_dst))
1119 strcat(term_dict, ".*)");
1121 case 101: /* process # in term */
1122 term_dict[j++] = '(';
1123 if (!term_101(zh->reg->zebra_maps, reg_type,
1124 &termp, term_dict + j, space_split, term_dst))
1129 strcat(term_dict, ")");
1131 case 102: /* Regexp-1 */
1132 term_dict[j++] = '(';
1133 if (!term_102(zh->reg->zebra_maps, reg_type,
1134 &termp, term_dict + j, space_split, term_dst))
1139 strcat(term_dict, ")");
1141 case 103: /* Regexp-2 */
1143 term_dict[j++] = '(';
1144 if (!term_103(zh->reg->zebra_maps, reg_type,
1145 &termp, term_dict + j, ®ex_range,
1146 space_split, term_dst))
1151 strcat(term_dict, ")");
1153 case 104: /* process # and ! in term */
1154 term_dict[j++] = '(';
1155 if (!term_104(zh->reg->zebra_maps, reg_type,
1156 &termp, term_dict + j, space_split, term_dst))
1161 strcat(term_dict, ")");
1163 case 105: /* process * and ! in term */
1164 term_dict[j++] = '(';
1165 if (!term_105(zh->reg->zebra_maps, reg_type,
1166 &termp, term_dict + j, space_split, term_dst, 1))
1171 strcat(term_dict, ")");
1173 case 106: /* process * and ! in term */
1174 term_dict[j++] = '(';
1175 if (!term_105(zh->reg->zebra_maps, reg_type,
1176 &termp, term_dict + j, space_split, term_dst, 0))
1181 strcat(term_dict, ")");
1184 zebra_setError_zint(zh,
1185 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1192 const char *input = term_dict + prefix_len;
1193 esc_str(buf, sizeof(buf), input, strlen(input));
1195 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1196 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1197 grep_info, &max_pos,
1198 ord_len /* number of "exact" chars */,
1201 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1206 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1211 /* convert APT search term to UTF8 */
1212 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1216 Z_Term *term = zapt->term;
1218 switch (term->which)
1220 case Z_Term_general:
1221 if (zh->iconv_to_utf8 != 0)
1223 char *inbuf = (char *) term->u.general->buf;
1224 size_t inleft = term->u.general->len;
1225 char *outbuf = termz;
1226 size_t outleft = IT_MAX_WORD-1;
1229 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1231 if (ret == (size_t)(-1))
1233 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1236 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1244 sizez = term->u.general->len;
1245 if (sizez > IT_MAX_WORD-1)
1246 sizez = IT_MAX_WORD-1;
1247 memcpy (termz, term->u.general->buf, sizez);
1248 termz[sizez] = '\0';
1251 case Z_Term_characterString:
1252 sizez = strlen(term->u.characterString);
1253 if (sizez > IT_MAX_WORD-1)
1254 sizez = IT_MAX_WORD-1;
1255 memcpy (termz, term->u.characterString, sizez);
1256 termz[sizez] = '\0';
1259 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1265 /* convert APT SCAN term to internal cmap */
1266 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267 char *termz, int reg_type)
1269 char termz0[IT_MAX_WORD];
1271 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1272 return ZEBRA_FAIL; /* error */
1276 const char *cp = (const char *) termz0;
1277 const char *cp_end = cp + strlen(cp);
1280 const char *space_map = NULL;
1283 while ((len = (cp_end - cp)) > 0)
1285 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1286 if (**map == *CHR_SPACE)
1291 for (src = space_map; *src; src++)
1294 for (src = *map; *src; src++)
1303 static void grep_info_delete(struct grep_info *grep_info)
1306 xfree(grep_info->term_no);
1308 xfree(grep_info->isam_p_buf);
1311 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1312 Z_AttributesPlusTerm *zapt,
1313 struct grep_info *grep_info,
1317 int termset_value_numeric;
1318 const char *termset_value_string;
1321 grep_info->term_no = 0;
1323 grep_info->isam_p_size = 0;
1324 grep_info->isam_p_buf = NULL;
1326 grep_info->reg_type = reg_type;
1327 grep_info->termset = 0;
1331 attr_init_APT(&termset, zapt, 8);
1332 termset_value_numeric =
1333 attr_find_ex(&termset, NULL, &termset_value_string);
1334 if (termset_value_numeric != -1)
1337 const char *termset_name = 0;
1338 if (termset_value_numeric != -2)
1341 sprintf(resname, "%d", termset_value_numeric);
1342 termset_name = resname;
1345 termset_name = termset_value_string;
1346 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1347 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1348 if (!grep_info->termset)
1350 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1358 \brief Create result set(s) for list of terms
1359 \param zh Zebra Handle
1360 \param termz term as used in query but converted to UTF-8
1361 \param attributeSet default attribute set
1362 \param stream memory for result
1363 \param reg_type register type ('w', 'p',..)
1364 \param complete_flag whether it's phrases or not
1365 \param rank_type term flags for ranking
1366 \param xpath_use use attribute for X-Path (-1 for no X-path)
1367 \param num_bases number of databases
1368 \param basenames array of databases
1369 \param rset_mem memory for result sets
1370 \param result_sets output result set for each term in list (output)
1371 \param number number of output result sets
1372 \param kc rset key control to be used for created result sets
1374 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1375 Z_AttributesPlusTerm *zapt,
1377 oid_value attributeSet,
1379 int reg_type, int complete_flag,
1380 const char *rank_type,
1381 const char *xpath_use,
1382 int num_bases, char **basenames,
1384 RSET **result_sets, int *num_result_sets,
1385 struct rset_key_control *kc)
1387 char term_dst[IT_MAX_WORD+1];
1388 struct grep_info grep_info;
1389 const char *termp = termz;
1391 int empty_term = *termz ? 0 : 1;
1394 *num_result_sets = 0;
1396 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1402 if (alloc_sets == *num_result_sets)
1405 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1408 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1409 alloc_sets = alloc_sets + add;
1410 *result_sets = rnew;
1412 res = term_trunc(zh, zapt, &termp, attributeSet,
1414 reg_type, complete_flag,
1415 num_bases, basenames,
1416 term_dst, rank_type,
1417 xpath_use, rset_nmem,
1418 &(*result_sets)[*num_result_sets],
1420 if (res != ZEBRA_OK)
1423 for (i = 0; i < *num_result_sets; i++)
1424 rset_delete((*result_sets)[i]);
1425 grep_info_delete (&grep_info);
1428 if ((*result_sets)[*num_result_sets] == 0)
1430 (*num_result_sets)++;
1437 grep_info_delete(&grep_info);
1442 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1443 oid_value attributeSet, NMEM stream,
1444 struct grep_info *grep_info,
1445 int reg_type, int complete_flag,
1446 int num_bases, char **basenames,
1447 const char *xpath_use,
1448 struct ord_list **ol)
1450 char term_dict[2*IT_MAX_WORD+4000];
1452 struct rpn_char_map_info rcmi;
1454 int bases_ok = 0; /* no of databases with OK attribute */
1456 *ol = ord_list_create(stream);
1458 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1460 for (base_no = 0; base_no < num_bases; base_no++)
1463 int regex_range = 0;
1464 int max_pos, prefix_len = 0;
1468 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1470 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1471 basenames[base_no]);
1475 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1476 reg_type, xpath_use,
1477 attributeSet, &ord) != ZEBRA_OK)
1480 *ol = ord_list_append(stream, *ol, ord);
1483 term_dict[prefix_len++] = '|';
1485 term_dict[prefix_len++] = '(';
1487 ord_len = key_SU_encode (ord, ord_buf);
1488 for (i = 0; i<ord_len; i++)
1490 term_dict[prefix_len++] = 1;
1491 term_dict[prefix_len++] = ord_buf[i];
1494 term_dict[prefix_len++] = ')';
1495 term_dict[prefix_len] = '\0';
1499 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1500 grep_info, &max_pos,
1501 ord_len /* number of "exact" chars */,
1509 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1510 Z_AttributesPlusTerm *zapt,
1511 const char *termz_org,
1512 oid_value attributeSet,
1514 int reg_type, int complete_flag,
1515 const char *rank_type,
1516 const char *xpath_use,
1517 int num_bases, char **basenames,
1520 struct rset_key_control *kc)
1522 const char *term_dst = "always";
1523 struct grep_info grep_info;
1524 zint hits_limit_value;
1525 const char *term_ref_id_str = 0;
1527 struct ord_list *ol;
1529 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1531 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1534 grep_info.isam_p_indx = 0;
1536 res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1537 reg_type, complete_flag, num_bases, basenames,
1539 if (res == ZEBRA_OK)
1541 *rset = rset_trunc(zh, grep_info.isam_p_buf,
1542 grep_info.isam_p_indx, term_dst, strlen(term_dst),
1543 rank_type, 1 /* preserve pos */,
1544 zapt->term->which, rset_nmem,
1545 kc, kc->scope, ol, reg_type, hits_limit_value,
1550 grep_info_delete (&grep_info);
1554 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1555 Z_AttributesPlusTerm *zapt,
1556 const char *termz_org,
1557 oid_value attributeSet,
1559 int reg_type, int complete_flag,
1560 const char *rank_type,
1561 const char *xpath_use,
1562 int num_bases, char **basenames,
1565 struct rset_key_control *kc)
1567 RSET *result_sets = 0;
1568 int num_result_sets = 0;
1570 term_list_trunc(zh, zapt, termz_org, attributeSet,
1571 stream, reg_type, complete_flag,
1572 rank_type, xpath_use,
1573 num_bases, basenames,
1575 &result_sets, &num_result_sets, kc);
1576 if (res != ZEBRA_OK)
1578 if (num_result_sets == 0)
1579 *rset = rset_create_null(rset_nmem, kc, 0);
1580 else if (num_result_sets == 1)
1581 *rset = result_sets[0];
1583 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1584 num_result_sets, result_sets,
1585 1 /* ordered */, 0 /* exclusion */,
1586 3 /* relation */, 1 /* distance */);
1592 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1593 Z_AttributesPlusTerm *zapt,
1594 const char *termz_org,
1595 oid_value attributeSet,
1597 int reg_type, int complete_flag,
1598 const char *rank_type,
1599 const char *xpath_use,
1600 int num_bases, char **basenames,
1603 struct rset_key_control *kc)
1605 RSET *result_sets = 0;
1606 int num_result_sets = 0;
1608 term_list_trunc(zh, zapt, termz_org, attributeSet,
1609 stream, reg_type, complete_flag,
1610 rank_type, xpath_use,
1611 num_bases, basenames,
1613 &result_sets, &num_result_sets, kc);
1614 if (res != ZEBRA_OK)
1616 if (num_result_sets == 0)
1617 *rset = rset_create_null(rset_nmem, kc, 0);
1618 else if (num_result_sets == 1)
1619 *rset = result_sets[0];
1621 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1622 num_result_sets, result_sets);
1628 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1629 Z_AttributesPlusTerm *zapt,
1630 const char *termz_org,
1631 oid_value attributeSet,
1633 int reg_type, int complete_flag,
1634 const char *rank_type,
1635 const char *xpath_use,
1636 int num_bases, char **basenames,
1639 struct rset_key_control *kc)
1641 RSET *result_sets = 0;
1642 int num_result_sets = 0;
1644 term_list_trunc(zh, zapt, termz_org, attributeSet,
1645 stream, reg_type, complete_flag,
1646 rank_type, xpath_use,
1647 num_bases, basenames,
1649 &result_sets, &num_result_sets,
1651 if (res != ZEBRA_OK)
1653 if (num_result_sets == 0)
1654 *rset = rset_create_null(rset_nmem, kc, 0);
1655 else if (num_result_sets == 1)
1656 *rset = result_sets[0];
1658 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1659 num_result_sets, result_sets);
1665 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1666 const char **term_sub,
1668 oid_value attributeSet,
1669 struct grep_info *grep_info,
1679 char *term_tmp = term_dict + strlen(term_dict);
1682 attr_init_APT(&relation, zapt, 2);
1683 relation_value = attr_find(&relation, NULL);
1685 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1687 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1690 term_value = atoi (term_tmp);
1691 switch (relation_value)
1694 yaz_log(log_level_rpn, "Relation <");
1695 gen_regular_rel(term_tmp, term_value-1, 1);
1698 yaz_log(log_level_rpn, "Relation <=");
1699 gen_regular_rel(term_tmp, term_value, 1);
1702 yaz_log(log_level_rpn, "Relation >=");
1703 gen_regular_rel(term_tmp, term_value, 0);
1706 yaz_log(log_level_rpn, "Relation >");
1707 gen_regular_rel(term_tmp, term_value+1, 0);
1711 yaz_log(log_level_rpn, "Relation =");
1712 sprintf(term_tmp, "(0*%d)", term_value);
1715 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1718 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1719 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1722 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1723 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1727 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1728 const char **term_sub,
1729 oid_value attributeSet, NMEM stream,
1730 struct grep_info *grep_info,
1731 int reg_type, int complete_flag,
1732 int num_bases, char **basenames,
1734 const char *xpath_use,
1735 struct ord_list **ol)
1737 char term_dict[2*IT_MAX_WORD+2];
1740 struct rpn_char_map_info rcmi;
1742 int bases_ok = 0; /* no of databases with OK attribute */
1744 *ol = ord_list_create(stream);
1746 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1748 for (base_no = 0; base_no < num_bases; base_no++)
1750 int max_pos, prefix_len = 0;
1751 int relation_error = 0;
1752 int ord, ord_len, i;
1757 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1759 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1760 basenames[base_no]);
1764 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1765 reg_type, xpath_use, attributeSet, &ord)
1769 *ol = ord_list_append(stream, *ol, ord);
1772 term_dict[prefix_len++] = '|';
1774 term_dict[prefix_len++] = '(';
1776 ord_len = key_SU_encode (ord, ord_buf);
1777 for (i = 0; i < ord_len; i++)
1779 term_dict[prefix_len++] = 1;
1780 term_dict[prefix_len++] = ord_buf[i];
1782 term_dict[prefix_len++] = ')';
1783 term_dict[prefix_len] = '\0';
1785 if (!numeric_relation(zh, zapt, &termp, term_dict,
1786 attributeSet, grep_info, &max_pos, reg_type,
1787 term_dst, &relation_error))
1791 zebra_setError(zh, relation_error, 0);
1801 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1806 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1807 Z_AttributesPlusTerm *zapt,
1809 oid_value attributeSet,
1811 int reg_type, int complete_flag,
1812 const char *rank_type,
1813 const char *xpath_use,
1814 int num_bases, char **basenames,
1817 struct rset_key_control *kc)
1819 char term_dst[IT_MAX_WORD+1];
1820 const char *termp = termz;
1821 RSET *result_sets = 0;
1822 int num_result_sets = 0;
1824 struct grep_info grep_info;
1826 zint hits_limit_value;
1827 const char *term_ref_id_str = 0;
1829 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1831 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1832 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1836 struct ord_list *ol;
1837 if (alloc_sets == num_result_sets)
1840 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1843 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1844 alloc_sets = alloc_sets + add;
1847 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1848 grep_info.isam_p_indx = 0;
1849 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1850 reg_type, complete_flag, num_bases, basenames,
1851 term_dst, xpath_use, &ol);
1852 if (res == ZEBRA_FAIL || termp == 0)
1854 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1855 result_sets[num_result_sets] =
1856 rset_trunc(zh, grep_info.isam_p_buf,
1857 grep_info.isam_p_indx, term_dst,
1858 strlen(term_dst), rank_type,
1859 0 /* preserve position */,
1860 zapt->term->which, rset_nmem,
1861 kc, kc->scope, ol, reg_type,
1864 if (!result_sets[num_result_sets])
1868 grep_info_delete(&grep_info);
1872 for (i = 0; i<num_result_sets; i++)
1873 rset_delete(result_sets[i]);
1876 if (num_result_sets == 0)
1877 *rset = rset_create_null(rset_nmem, kc, 0);
1878 if (num_result_sets == 1)
1879 *rset = result_sets[0];
1881 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1882 num_result_sets, result_sets);
1888 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1889 Z_AttributesPlusTerm *zapt,
1891 oid_value attributeSet,
1893 const char *rank_type, NMEM rset_nmem,
1895 struct rset_key_control *kc)
1900 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1901 res_get (zh->res, "setTmpDir"),0 );
1902 rsfd = rset_open(*rset, RSETF_WRITE);
1910 rset_write (rsfd, &key);
1915 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1916 oid_value attributeSet, NMEM stream,
1917 Z_SortKeySpecList *sort_sequence,
1918 const char *rank_type,
1921 struct rset_key_control *kc)
1924 int sort_relation_value;
1925 AttrType sort_relation_type;
1932 attr_init_APT(&sort_relation_type, zapt, 7);
1933 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1935 if (!sort_sequence->specs)
1937 sort_sequence->num_specs = 10;
1938 sort_sequence->specs = (Z_SortKeySpec **)
1939 nmem_malloc(stream, sort_sequence->num_specs *
1940 sizeof(*sort_sequence->specs));
1941 for (i = 0; i<sort_sequence->num_specs; i++)
1942 sort_sequence->specs[i] = 0;
1944 if (zapt->term->which != Z_Term_general)
1947 i = atoi_n ((char *) zapt->term->u.general->buf,
1948 zapt->term->u.general->len);
1949 if (i >= sort_sequence->num_specs)
1951 sprintf(termz, "%d", i);
1953 oe.proto = PROTO_Z3950;
1954 oe.oclass = CLASS_ATTSET;
1955 oe.value = attributeSet;
1956 if (!oid_ent_to_oid (&oe, oid))
1959 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1960 sks->sortElement = (Z_SortElement *)
1961 nmem_malloc(stream, sizeof(*sks->sortElement));
1962 sks->sortElement->which = Z_SortElement_generic;
1963 sk = sks->sortElement->u.generic = (Z_SortKey *)
1964 nmem_malloc(stream, sizeof(*sk));
1965 sk->which = Z_SortKey_sortAttributes;
1966 sk->u.sortAttributes = (Z_SortAttributes *)
1967 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1969 sk->u.sortAttributes->id = oid;
1970 sk->u.sortAttributes->list = zapt->attributes;
1972 sks->sortRelation = (int *)
1973 nmem_malloc(stream, sizeof(*sks->sortRelation));
1974 if (sort_relation_value == 1)
1975 *sks->sortRelation = Z_SortKeySpec_ascending;
1976 else if (sort_relation_value == 2)
1977 *sks->sortRelation = Z_SortKeySpec_descending;
1979 *sks->sortRelation = Z_SortKeySpec_ascending;
1981 sks->caseSensitivity = (int *)
1982 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1983 *sks->caseSensitivity = 0;
1985 sks->which = Z_SortKeySpec_null;
1986 sks->u.null = odr_nullval ();
1987 sort_sequence->specs[i] = sks;
1988 *rset = rset_create_null(rset_nmem, kc, 0);
1993 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1994 oid_value attributeSet,
1995 struct xpath_location_step *xpath, int max,
1998 oid_value curAttributeSet = attributeSet;
2000 const char *use_string = 0;
2002 attr_init_APT(&use, zapt, 1);
2003 attr_find_ex(&use, &curAttributeSet, &use_string);
2005 if (!use_string || *use_string != '/')
2008 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2013 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2014 int reg_type, const char *term,
2015 const char *xpath_use,
2017 struct rset_key_control *kc)
2020 struct grep_info grep_info;
2021 char term_dict[2048];
2024 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2025 zinfo_index_category_index,
2028 int ord_len, i, r, max_pos;
2029 int term_type = Z_Term_characterString;
2030 const char *flags = "void";
2032 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2033 return rset_create_null(rset_nmem, kc, 0);
2036 return rset_create_null(rset_nmem, kc, 0);
2038 term_dict[prefix_len++] = '|';
2040 term_dict[prefix_len++] = '(';
2042 ord_len = key_SU_encode (ord, ord_buf);
2043 for (i = 0; i<ord_len; i++)
2045 term_dict[prefix_len++] = 1;
2046 term_dict[prefix_len++] = ord_buf[i];
2048 term_dict[prefix_len++] = ')';
2049 strcpy(term_dict+prefix_len, term);
2051 grep_info.isam_p_indx = 0;
2052 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2053 &grep_info, &max_pos, 0, grep_handle);
2054 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2055 grep_info.isam_p_indx);
2056 rset = rset_trunc(zh, grep_info.isam_p_buf,
2057 grep_info.isam_p_indx, term, strlen(term),
2058 flags, 1, term_type,rset_nmem,
2059 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2060 0 /* term_ref_id_str */);
2061 grep_info_delete(&grep_info);
2066 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2067 int num_bases, char **basenames,
2068 NMEM stream, const char *rank_type, RSET rset,
2069 int xpath_len, struct xpath_location_step *xpath,
2072 struct rset_key_control *kc)
2076 int always_matches = rset ? 0 : 1;
2084 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2085 for (i = 0; i<xpath_len; i++)
2087 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2099 a[@attr = value]/b[@other = othervalue]
2101 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2102 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2103 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2104 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2105 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2106 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2110 dict_grep_cmap (zh->reg->dict, 0, 0);
2112 for (base_no = 0; base_no < num_bases; base_no++)
2114 int level = xpath_len;
2117 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2119 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2120 basenames[base_no]);
2124 while (--level >= 0)
2126 WRBUF xpath_rev = wrbuf_alloc();
2128 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2130 for (i = level; i >= 1; --i)
2132 const char *cp = xpath[i].part;
2138 wrbuf_puts(xpath_rev, "[^/]*");
2139 else if (*cp == ' ')
2140 wrbuf_puts(xpath_rev, "\001 ");
2142 wrbuf_putc(xpath_rev, *cp);
2144 /* wrbuf_putc does not null-terminate , but
2145 wrbuf_puts below ensures it does.. so xpath_rev
2146 is OK iff length is > 0 */
2148 wrbuf_puts(xpath_rev, "/");
2150 else if (i == 1) /* // case */
2151 wrbuf_puts(xpath_rev, ".*");
2153 if (xpath[level].predicate &&
2154 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2155 xpath[level].predicate->u.relation.name[0])
2157 WRBUF wbuf = wrbuf_alloc();
2158 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2159 if (xpath[level].predicate->u.relation.value)
2161 const char *cp = xpath[level].predicate->u.relation.value;
2162 wrbuf_putc(wbuf, '=');
2166 if (strchr(REGEX_CHARS, *cp))
2167 wrbuf_putc(wbuf, '\\');
2168 wrbuf_putc(wbuf, *cp);
2172 wrbuf_puts(wbuf, "");
2173 rset_attr = xpath_trunc(
2174 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2176 wrbuf_free(wbuf, 1);
2182 wrbuf_free(xpath_rev, 1);
2186 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2187 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2188 if (wrbuf_len(xpath_rev))
2190 rset_start_tag = xpath_trunc(zh, stream, '0',
2191 wrbuf_buf(xpath_rev),
2192 ZEBRA_XPATH_ELM_BEGIN,
2195 rset = rset_start_tag;
2198 rset_end_tag = xpath_trunc(zh, stream, '0',
2199 wrbuf_buf(xpath_rev),
2200 ZEBRA_XPATH_ELM_END,
2203 rset = rset_create_between(rset_nmem, kc, kc->scope,
2204 rset_start_tag, rset,
2205 rset_end_tag, rset_attr);
2208 wrbuf_free(xpath_rev, 1);
2216 #define MAX_XPATH_STEPS 10
2218 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2219 oid_value attributeSet, NMEM stream,
2220 Z_SortKeySpecList *sort_sequence,
2221 int num_bases, char **basenames,
2224 struct rset_key_control *kc)
2226 ZEBRA_RES res = ZEBRA_OK;
2228 char *search_type = NULL;
2229 char rank_type[128];
2232 char termz[IT_MAX_WORD+1];
2234 const char *xpath_use = 0;
2235 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2239 log_level_rpn = yaz_log_module_level("rpn");
2242 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2243 rank_type, &complete_flag, &sort_flag);
2245 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2246 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2247 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2248 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2250 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2254 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2255 rank_type, rset_nmem, rset, kc);
2256 /* consider if an X-Path query is used */
2257 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2258 xpath, MAX_XPATH_STEPS, stream);
2261 if (xpath[xpath_len-1].part[0] == '@')
2262 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2264 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2267 /* search using one of the various search type strategies
2268 termz is our UTF-8 search term
2269 attributeSet is top-level default attribute set
2270 stream is ODR for search
2271 reg_id is the register type
2272 complete_flag is 1 for complete subfield, 0 for incomplete
2273 xpath_use is use-attribute to be used for X-Path search, 0 for none
2275 if (!strcmp(search_type, "phrase"))
2277 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2278 reg_id, complete_flag, rank_type,
2280 num_bases, basenames, rset_nmem,
2283 else if (!strcmp(search_type, "and-list"))
2285 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2286 reg_id, complete_flag, rank_type,
2288 num_bases, basenames, rset_nmem,
2291 else if (!strcmp(search_type, "or-list"))
2293 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2294 reg_id, complete_flag, rank_type,
2296 num_bases, basenames, rset_nmem,
2299 else if (!strcmp(search_type, "local"))
2301 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2302 rank_type, rset_nmem, rset, kc);
2304 else if (!strcmp(search_type, "numeric"))
2306 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2307 reg_id, complete_flag, rank_type,
2309 num_bases, basenames, rset_nmem,
2312 else if (!strcmp(search_type, "always"))
2314 if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2316 *rset = 0; /* signal no "term" set */
2317 return rpn_search_xpath(zh, num_bases, basenames,
2318 stream, rank_type, *rset,
2319 xpath_len, xpath, rset_nmem, rset, kc);
2323 res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2324 attributeSet, stream,
2325 reg_id, complete_flag,
2328 num_bases, basenames, rset_nmem,
2334 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2337 if (res != ZEBRA_OK)
2341 return rpn_search_xpath(zh, num_bases, basenames,
2342 stream, rank_type, *rset,
2343 xpath_len, xpath, rset_nmem, rset, kc);
2346 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2347 oid_value attributeSet,
2348 NMEM stream, NMEM rset_nmem,
2349 Z_SortKeySpecList *sort_sequence,
2350 int num_bases, char **basenames,
2351 RSET **result_sets, int *num_result_sets,
2352 Z_Operator *parent_op,
2353 struct rset_key_control *kc);
2355 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2356 oid_value attributeSet,
2357 NMEM stream, NMEM rset_nmem,
2358 Z_SortKeySpecList *sort_sequence,
2359 int num_bases, char **basenames,
2362 RSET *result_sets = 0;
2363 int num_result_sets = 0;
2365 struct rset_key_control *kc = zebra_key_control_create(zh);
2367 res = rpn_search_structure(zh, zs, attributeSet,
2370 num_bases, basenames,
2371 &result_sets, &num_result_sets,
2372 0 /* no parent op */,
2374 if (res != ZEBRA_OK)
2377 for (i = 0; i<num_result_sets; i++)
2378 rset_delete(result_sets[i]);
2383 assert(num_result_sets == 1);
2384 assert(result_sets);
2385 assert(*result_sets);
2386 *result_set = *result_sets;
2392 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2393 oid_value attributeSet,
2394 NMEM stream, NMEM rset_nmem,
2395 Z_SortKeySpecList *sort_sequence,
2396 int num_bases, char **basenames,
2397 RSET **result_sets, int *num_result_sets,
2398 Z_Operator *parent_op,
2399 struct rset_key_control *kc)
2401 *num_result_sets = 0;
2402 if (zs->which == Z_RPNStructure_complex)
2405 Z_Operator *zop = zs->u.complex->roperator;
2406 RSET *result_sets_l = 0;
2407 int num_result_sets_l = 0;
2408 RSET *result_sets_r = 0;
2409 int num_result_sets_r = 0;
2411 res = rpn_search_structure(zh, zs->u.complex->s1,
2412 attributeSet, stream, rset_nmem,
2414 num_bases, basenames,
2415 &result_sets_l, &num_result_sets_l,
2417 if (res != ZEBRA_OK)
2420 for (i = 0; i<num_result_sets_l; i++)
2421 rset_delete(result_sets_l[i]);
2424 res = rpn_search_structure(zh, zs->u.complex->s2,
2425 attributeSet, stream, rset_nmem,
2427 num_bases, basenames,
2428 &result_sets_r, &num_result_sets_r,
2430 if (res != ZEBRA_OK)
2433 for (i = 0; i<num_result_sets_l; i++)
2434 rset_delete(result_sets_l[i]);
2435 for (i = 0; i<num_result_sets_r; i++)
2436 rset_delete(result_sets_r[i]);
2440 /* make a new list of result for all children */
2441 *num_result_sets = num_result_sets_l + num_result_sets_r;
2442 *result_sets = nmem_malloc(stream, *num_result_sets *
2443 sizeof(**result_sets));
2444 memcpy(*result_sets, result_sets_l,
2445 num_result_sets_l * sizeof(**result_sets));
2446 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2447 num_result_sets_r * sizeof(**result_sets));
2449 if (!parent_op || parent_op->which != zop->which
2450 || (zop->which != Z_Operator_and &&
2451 zop->which != Z_Operator_or))
2453 /* parent node different from this one (or non-present) */
2454 /* we must combine result sets now */
2458 case Z_Operator_and:
2459 rset = rset_create_and(rset_nmem, kc,
2461 *num_result_sets, *result_sets);
2464 rset = rset_create_or(rset_nmem, kc,
2465 kc->scope, 0, /* termid */
2466 *num_result_sets, *result_sets);
2468 case Z_Operator_and_not:
2469 rset = rset_create_not(rset_nmem, kc,
2474 case Z_Operator_prox:
2475 if (zop->u.prox->which != Z_ProximityOperator_known)
2478 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2482 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2484 zebra_setError_zint(zh,
2485 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2486 *zop->u.prox->u.known);
2491 rset = rset_create_prox(rset_nmem, kc,
2493 *num_result_sets, *result_sets,
2494 *zop->u.prox->ordered,
2495 (!zop->u.prox->exclusion ?
2496 0 : *zop->u.prox->exclusion),
2497 *zop->u.prox->relationType,
2498 *zop->u.prox->distance );
2502 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2505 *num_result_sets = 1;
2506 *result_sets = nmem_malloc(stream, *num_result_sets *
2507 sizeof(**result_sets));
2508 (*result_sets)[0] = rset;
2511 else if (zs->which == Z_RPNStructure_simple)
2516 if (zs->u.simple->which == Z_Operand_APT)
2518 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2519 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2520 attributeSet, stream, sort_sequence,
2521 num_bases, basenames, rset_nmem, &rset,
2523 if (res != ZEBRA_OK)
2526 else if (zs->u.simple->which == Z_Operand_resultSetId)
2528 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2529 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2533 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2534 zs->u.simple->u.resultSetId);
2541 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2544 *num_result_sets = 1;
2545 *result_sets = nmem_malloc(stream, *num_result_sets *
2546 sizeof(**result_sets));
2547 (*result_sets)[0] = rset;
2551 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2557 struct scan_info_entry {
2563 struct scan_info_entry *list;
2569 static int scan_handle (char *name, const char *info, int pos, void *client)
2571 int len_prefix, idx;
2572 struct scan_info *scan_info = (struct scan_info *) client;
2574 len_prefix = strlen(scan_info->prefix);
2575 if (memcmp (name, scan_info->prefix, len_prefix))
2578 idx = scan_info->after - pos + scan_info->before;
2584 scan_info->list[idx].term = (char *)
2585 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2586 strcpy(scan_info->list[idx].term, name + len_prefix);
2587 assert (*info == sizeof(ISAM_P));
2588 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2592 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2593 char **dst, const char *src)
2595 char term_src[IT_MAX_WORD];
2596 char term_dst[IT_MAX_WORD];
2598 zebra_term_untrans (zh, reg_type, term_src, src);
2600 if (zh->iconv_from_utf8 != 0)
2603 char *inbuf = term_src;
2604 size_t inleft = strlen(term_src);
2605 char *outbuf = term_dst;
2606 size_t outleft = sizeof(term_dst)-1;
2609 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2611 if (ret == (size_t)(-1))
2614 len = outbuf - term_dst;
2615 *dst = nmem_malloc(stream, len + 1);
2617 memcpy (*dst, term_dst, len);
2621 *dst = nmem_strdup(stream, term_src);
2624 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2630 yaz_log(YLOG_DEBUG, "count_set");
2632 rset->hits_limit = zh->approx_limit;
2635 rfd = rset_open(rset, RSETF_READ);
2636 while (rset_read(rfd, &key,0 /* never mind terms */))
2638 if (key.mem[0] != psysno)
2640 psysno = key.mem[0];
2641 if (rfd->counted_items >= rset->hits_limit)
2646 *count = rset->hits_count;
2649 #define RPN_MAX_ORDS 32
2651 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2652 oid_value attributeset,
2653 int num_bases, char **basenames,
2654 int *position, int *num_entries, ZebraScanEntry **list,
2655 int *is_partial, RSET limit_set, int return_zero)
2658 int pos = *position;
2659 int num = *num_entries;
2663 char termz[IT_MAX_WORD+20];
2664 struct scan_info *scan_info_array;
2665 ZebraScanEntry *glist;
2666 int ords[RPN_MAX_ORDS], ord_no = 0;
2667 int ptr[RPN_MAX_ORDS];
2669 unsigned index_type;
2670 char *search_type = NULL;
2671 char rank_type[128];
2674 NMEM rset_nmem = NULL;
2675 struct rset_key_control *kc = 0;
2680 if (attributeset == VAL_NONE)
2681 attributeset = VAL_BIB1;
2686 int termset_value_numeric;
2687 const char *termset_value_string;
2688 attr_init_APT(&termset, zapt, 8);
2689 termset_value_numeric =
2690 attr_find_ex(&termset, NULL, &termset_value_string);
2691 if (termset_value_numeric != -1)
2694 const char *termset_name = 0;
2696 if (termset_value_numeric != -2)
2699 sprintf(resname, "%d", termset_value_numeric);
2700 termset_name = resname;
2703 termset_name = termset_value_string;
2705 limit_set = resultSetRef (zh, termset_name);
2709 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2710 pos, num, attributeset);
2712 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2713 rank_type, &complete_flag, &sort_flag))
2716 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2719 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2723 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2725 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2726 basenames[base_no]);
2730 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2731 index_type, 0, attributeset, &ord)
2734 ords[ord_no++] = ord;
2741 /* prepare dictionary scanning */
2753 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2754 "after=%d before+after=%d",
2755 pos, num, before, after, before+after);
2756 scan_info_array = (struct scan_info *)
2757 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2758 for (i = 0; i < ord_no; i++)
2760 int j, prefix_len = 0;
2761 int before_tmp = before, after_tmp = after;
2762 struct scan_info *scan_info = scan_info_array + i;
2763 struct rpn_char_map_info rcmi;
2765 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2767 scan_info->before = before;
2768 scan_info->after = after;
2769 scan_info->odr = stream;
2771 scan_info->list = (struct scan_info_entry *)
2772 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2773 for (j = 0; j<before+after; j++)
2774 scan_info->list[j].term = NULL;
2776 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2777 termz[prefix_len] = 0;
2778 strcpy(scan_info->prefix, termz);
2780 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2784 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2785 scan_info, scan_handle);
2787 glist = (ZebraScanEntry *)
2788 odr_malloc(stream, (before+after)*sizeof(*glist));
2790 rset_nmem = nmem_create();
2791 kc = zebra_key_control_create(zh);
2793 /* consider terms after main term */
2794 for (i = 0; i < ord_no; i++)
2798 for (i = 0; i<after; i++)
2801 const char *mterm = NULL;
2804 int lo = i + pos-1; /* offset in result list */
2806 /* find: j0 is the first of the minimal values */
2807 for (j = 0; j < ord_no; j++)
2809 if (ptr[j] < before+after && ptr[j] >= 0 &&
2810 (tst = scan_info_array[j].list[ptr[j]].term) &&
2811 (!mterm || strcmp (tst, mterm) < 0))
2818 break; /* no value found, stop */
2820 /* get result set for first one , but only if it's within bounds */
2823 /* get result set for first term */
2824 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2825 &glist[lo].term, mterm);
2826 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2827 glist[lo].term, strlen(glist[lo].term),
2828 NULL, 0, zapt->term->which, rset_nmem,
2829 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2830 0 /* term_ref_id_str */);
2832 ptr[j0]++; /* move index for this set .. */
2833 /* get result set for remaining scan terms */
2834 for (j = j0+1; j<ord_no; j++)
2836 if (ptr[j] < before+after && ptr[j] >= 0 &&
2837 (tst = scan_info_array[j].list[ptr[j]].term) &&
2838 !strcmp (tst, mterm))
2847 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2849 strlen(glist[lo].term), NULL, 0,
2850 zapt->term->which,rset_nmem,
2851 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2852 0 /* term_ref_id_str */ );
2853 rset = rset_create_or(rset_nmem, kc,
2854 kc->scope, 0 /* termid */,
2863 /* merge with limit_set if given */
2868 rsets[1] = rset_dup(limit_set);
2870 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2873 count_set(zh, rset, &count);
2874 glist[lo].occurrences = count;
2880 *num_entries -= (after-i);
2882 if (*num_entries < 0)
2885 nmem_destroy(rset_nmem);
2890 /* consider terms before main term */
2891 for (i = 0; i<ord_no; i++)
2894 for (i = 0; i<before; i++)
2897 const char *mterm = NULL;
2900 int lo = before-1-i; /* offset in result list */
2903 for (j = 0; j <ord_no; j++)
2905 if (ptr[j] < before && ptr[j] >= 0 &&
2906 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2907 (!mterm || strcmp (tst, mterm) > 0))
2916 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2917 &glist[lo].term, mterm);
2920 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2921 glist[lo].term, strlen(glist[lo].term),
2922 NULL, 0, zapt->term->which, rset_nmem,
2923 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2924 0 /* term_ref_id_str */);
2928 for (j = j0+1; j<ord_no; j++)
2930 if (ptr[j] < before && ptr[j] >= 0 &&
2931 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2932 !strcmp (tst, mterm))
2937 rsets[1] = rset_trunc(
2939 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2941 strlen(glist[lo].term), NULL, 0,
2942 zapt->term->which, rset_nmem,
2943 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2944 0 /* term_ref_id_str */);
2945 rset = rset_create_or(rset_nmem, kc,
2946 kc->scope, 0 /* termid */, 2, rsets);
2955 rsets[1] = rset_dup(limit_set);
2957 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2959 count_set(zh, rset, &count);
2960 glist[lo].occurrences = count;
2964 nmem_destroy(rset_nmem);
2971 if (*num_entries <= 0)
2978 *list = glist + i; /* list is set to first 'real' entry */
2980 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2981 *position, *num_entries);
2988 * indent-tabs-mode: nil
2990 * vim: shiftwidth=4 tabstop=8 expandtab