1 /* $Id: rpnsearch.c,v 1.3 2006-11-30 10:33:19 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
86 static void add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 if (p->isam_p_indx == p->isam_p_size)
96 ISAM_P *new_isam_p_buf;
100 p->isam_p_size = 2*p->isam_p_size + 100;
101 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
105 memcpy(new_isam_p_buf, p->isam_p_buf,
106 p->isam_p_indx * sizeof(*p->isam_p_buf));
107 xfree(p->isam_p_buf);
109 p->isam_p_buf = new_isam_p_buf;
112 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
115 memcpy(new_term_no, p->isam_p_buf,
116 p->isam_p_indx * sizeof(*p->term_no));
119 p->term_no = new_term_no;
122 assert(*info == sizeof(*p->isam_p_buf));
123 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
128 char term_tmp[IT_MAX_WORD];
130 const char *index_name;
131 int len = key_SU_decode (&ord, (const unsigned char *) name);
133 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
134 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
135 zebraExplain_lookup_ord(p->zh->reg->zei,
136 ord, 0 /* index_type */, &db, &index_name);
137 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
139 resultSetAddTerm(p->zh, p->termset, name[len], db,
140 index_name, term_tmp);
145 static int grep_handle(char *name, const char *info, void *p)
147 add_isam_p(name, info, (struct grep_info *) p);
151 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
152 const char *ct1, const char *ct2, int first)
154 const char *s1, *s0 = *src;
157 /* skip white space */
160 if (ct1 && strchr(ct1, *s0))
162 if (ct2 && strchr(ct2, *s0))
165 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
166 if (**map != *CHR_SPACE)
175 static void esc_str(char *out_buf, size_t out_size,
176 const char *in_buf, int in_size)
182 assert(out_size > 20);
184 for (k = 0; k<in_size; k++)
186 int c = in_buf[k] & 0xff;
188 if (c < 32 || c > 126)
192 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
193 if (strlen(out_buf) > out_size-20)
195 strcat(out_buf, "..");
201 #define REGEX_CHARS " []()|.*+?!"
203 /* term_100: handle term, where trunc = none(no operators at all) */
204 static int term_100(ZebraMaps zebra_maps, int reg_type,
205 const char **src, char *dst, int space_split,
213 const char *space_start = 0;
214 const char *space_end = 0;
216 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
227 if (**map == *CHR_SPACE)
230 else /* complete subfield only. */
232 if (**map == *CHR_SPACE)
233 { /* save space mapping for later .. */
238 else if (space_start)
239 { /* reload last space */
240 while (space_start < space_end)
242 if (strchr(REGEX_CHARS, *space_start))
244 dst_term[j++] = *space_start;
245 dst[i++] = *space_start++;
248 space_start = space_end = 0;
251 /* add non-space char */
252 memcpy(dst_term+j, s1, s0 - s1);
258 if (strchr(REGEX_CHARS, *s1))
266 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
268 strcpy(dst + i, map[0]);
278 /* term_101: handle term, where trunc = Process # */
279 static int term_101(ZebraMaps zebra_maps, int reg_type,
280 const char **src, char *dst, int space_split,
288 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
297 dst_term[j++] = *s0++;
303 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
305 if (space_split && **map == *CHR_SPACE)
308 /* add non-space char */
309 memcpy(dst_term+j, s1, s0 - s1);
315 if (strchr(REGEX_CHARS, *s1))
323 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
325 strcpy(dst + i, map[0]);
331 dst_term[j++] = '\0';
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338 char *dst, int *errors, int space_split,
346 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
349 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350 isdigit(((const unsigned char *)s0)[1]))
352 *errors = s0[1] - '0';
359 if (strchr("^\\()[].*+?|-", *s0))
368 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
370 if (space_split && **map == *CHR_SPACE)
373 /* add non-space char */
374 memcpy(dst_term+j, s1, s0 - s1);
380 if (strchr(REGEX_CHARS, *s1))
388 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
390 strcpy(dst + i, map[0]);
402 /* term_103: handle term, where trunc = re-1 (regular expressions) */
403 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
404 char *dst, int space_split, char *dst_term)
406 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
411 /* term_104: handle term, where trunc = Process # and ! */
412 static int term_104(ZebraMaps zebra_maps, int reg_type,
413 const char **src, char *dst, int space_split,
421 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428 dst_term[j++] = *s0++;
429 if (*s0 >= '0' && *s0 <= '9')
432 while (*s0 >= '0' && *s0 <= '9')
434 limit = limit * 10 + (*s0 - '0');
435 dst_term[j++] = *s0++;
455 dst_term[j++] = *s0++;
460 dst_term[j++] = *s0++;
466 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
468 if (space_split && **map == *CHR_SPACE)
471 /* add non-space char */
472 memcpy(dst_term+j, s1, s0 - s1);
478 if (strchr(REGEX_CHARS, *s1))
486 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
488 strcpy(dst + i, map[0]);
494 dst_term[j++] = '\0';
499 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
500 static int term_105(ZebraMaps zebra_maps, int reg_type,
501 const char **src, char *dst, int space_split,
502 char *dst_term, int right_truncate)
509 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
518 dst_term[j++] = *s0++;
523 dst_term[j++] = *s0++;
529 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
531 if (space_split && **map == *CHR_SPACE)
534 /* add non-space char */
535 memcpy(dst_term+j, s1, s0 - s1);
541 if (strchr(REGEX_CHARS, *s1))
549 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
551 strcpy(dst + i, map[0]);
563 dst_term[j++] = '\0';
569 /* gen_regular_rel - generate regular expression from relation
570 * val: border value (inclusive)
571 * islt: 1 if <=; 0 if >=.
573 static void gen_regular_rel(char *dst, int val, int islt)
580 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
584 strcpy(dst, "(-[0-9]+|(");
592 strcpy(dst, "([0-9]+|-(");
604 sprintf(numstr, "%d", val);
605 for (w = strlen(numstr); --w >= 0; pos++)
624 strcpy(dst + dst_p, numstr);
625 dst_p = strlen(dst) - pos - 1;
653 for (i = 0; i<pos; i++)
666 /* match everything less than 10^(pos-1) */
668 for (i = 1; i<pos; i++)
669 strcat(dst, "[0-9]?");
673 /* match everything greater than 10^pos */
674 for (i = 0; i <= pos; i++)
675 strcat(dst, "[0-9]");
676 strcat(dst, "[0-9]*");
681 void string_rel_add_char(char **term_p, const char *src, int *indx)
683 if (src[*indx] == '\\')
684 *(*term_p)++ = src[(*indx)++];
685 *(*term_p)++ = src[(*indx)++];
689 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
690 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
691 * >= abc ([b-].*|a[c-].*|ab[c-].*)
692 * ([^-a].*|a[^-b].*|ab[c-].*)
693 * < abc ([-0].*|a[-a].*|ab[-b].*)
694 * ([^a-].*|a[^b-].*|ab[^c-].*)
695 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
696 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
698 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
699 const char **term_sub, char *term_dict,
700 oid_value attributeSet,
701 int reg_type, int space_split, char *term_dst,
707 char *term_tmp = term_dict + strlen(term_dict);
708 char term_component[2*IT_MAX_WORD+20];
710 attr_init_APT(&relation, zapt, 2);
711 relation_value = attr_find(&relation, NULL);
714 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
715 switch (relation_value)
718 if (!term_100(zh->reg->zebra_maps, reg_type,
719 term_sub, term_component,
720 space_split, term_dst))
722 yaz_log(log_level_rpn, "Relation <");
725 for (i = 0; term_component[i]; )
732 string_rel_add_char(&term_tmp, term_component, &j);
739 *term_tmp++ = FIRST_IN_FIELD_CHAR;
741 string_rel_add_char(&term_tmp, term_component, &i);
748 if ((term_tmp - term_dict) > IT_MAX_WORD)
753 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
756 if (!term_100(zh->reg->zebra_maps, reg_type,
757 term_sub, term_component,
758 space_split, term_dst))
760 yaz_log(log_level_rpn, "Relation <=");
763 for (i = 0; term_component[i]; )
768 string_rel_add_char(&term_tmp, term_component, &j);
774 *term_tmp++ = FIRST_IN_FIELD_CHAR;
776 string_rel_add_char(&term_tmp, term_component, &i);
785 if ((term_tmp - term_dict) > IT_MAX_WORD)
788 for (i = 0; term_component[i]; )
789 string_rel_add_char(&term_tmp, term_component, &i);
794 if (!term_100 (zh->reg->zebra_maps, reg_type,
795 term_sub, term_component, space_split, term_dst))
797 yaz_log(log_level_rpn, "Relation >");
800 for (i = 0; term_component[i];)
805 string_rel_add_char(&term_tmp, term_component, &j);
810 string_rel_add_char(&term_tmp, term_component, &i);
818 if ((term_tmp - term_dict) > IT_MAX_WORD)
821 for (i = 0; term_component[i];)
822 string_rel_add_char(&term_tmp, term_component, &i);
829 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
830 term_component, space_split, term_dst))
832 yaz_log(log_level_rpn, "Relation >=");
835 for (i = 0; term_component[i];)
842 string_rel_add_char(&term_tmp, term_component, &j);
845 if (term_component[i+1])
849 string_rel_add_char(&term_tmp, term_component, &i);
853 string_rel_add_char(&term_tmp, term_component, &i);
860 if ((term_tmp - term_dict) > IT_MAX_WORD)
871 yaz_log(log_level_rpn, "Relation =");
872 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
873 term_component, space_split, term_dst))
875 strcat(term_tmp, "(");
876 strcat(term_tmp, term_component);
877 strcat(term_tmp, ")");
880 yaz_log(log_level_rpn, "Relation always matches");
881 /* skip to end of term (we don't care what it is) */
882 while (**term_sub != '\0')
886 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
892 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
893 const char **term_sub,
894 oid_value attributeSet, NMEM stream,
895 struct grep_info *grep_info,
896 int reg_type, int complete_flag,
897 int num_bases, char **basenames,
899 const char *xpath_use,
900 struct ord_list **ol);
902 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
903 Z_AttributesPlusTerm *zapt,
904 zint *hits_limit_value,
905 const char **term_ref_id_str,
908 AttrType term_ref_id_attr;
909 AttrType hits_limit_attr;
912 attr_init_APT(&hits_limit_attr, zapt, 11);
913 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
915 attr_init_APT(&term_ref_id_attr, zapt, 10);
916 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
917 if (term_ref_id_int >= 0)
919 char *res = nmem_malloc(nmem, 20);
920 sprintf(res, "%d", term_ref_id_int);
921 *term_ref_id_str = res;
924 /* no limit given ? */
925 if (*hits_limit_value == -1)
927 if (*term_ref_id_str)
929 /* use global if term_ref is present */
930 *hits_limit_value = zh->approx_limit;
934 /* no counting if term_ref is not present */
935 *hits_limit_value = 0;
938 else if (*hits_limit_value == 0)
940 /* 0 is the same as global limit */
941 *hits_limit_value = zh->approx_limit;
943 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
944 *term_ref_id_str ? *term_ref_id_str : "none",
949 static ZEBRA_RES term_trunc(ZebraHandle zh,
950 Z_AttributesPlusTerm *zapt,
951 const char **term_sub,
952 oid_value attributeSet, NMEM stream,
953 struct grep_info *grep_info,
954 int reg_type, int complete_flag,
955 int num_bases, char **basenames,
957 const char *rank_type,
958 const char *xpath_use,
961 struct rset_key_control *kc)
965 zint hits_limit_value;
966 const char *term_ref_id_str = 0;
969 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
970 grep_info->isam_p_indx = 0;
971 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
972 reg_type, complete_flag, num_bases, basenames,
973 term_dst, xpath_use, &ol);
976 if (!*term_sub) /* no more terms ? */
978 yaz_log(log_level_rpn, "term: %s", term_dst);
979 *rset = rset_trunc(zh, grep_info->isam_p_buf,
980 grep_info->isam_p_indx, term_dst,
981 strlen(term_dst), rank_type, 1 /* preserve pos */,
982 zapt->term->which, rset_nmem,
983 kc, kc->scope, ol, reg_type, hits_limit_value,
990 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
991 const char **term_sub,
992 oid_value attributeSet, NMEM stream,
993 struct grep_info *grep_info,
994 int reg_type, int complete_flag,
995 int num_bases, char **basenames,
997 const char *xpath_use,
998 struct ord_list **ol)
1000 char term_dict[2*IT_MAX_WORD+4000];
1002 AttrType truncation;
1003 int truncation_value;
1005 struct rpn_char_map_info rcmi;
1006 int space_split = complete_flag ? 0 : 1;
1008 int bases_ok = 0; /* no of databases with OK attribute */
1010 *ol = ord_list_create(stream);
1012 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1013 attr_init_APT(&truncation, zapt, 5);
1014 truncation_value = attr_find(&truncation, NULL);
1015 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1017 for (base_no = 0; base_no < num_bases; base_no++)
1020 int regex_range = 0;
1021 int max_pos, prefix_len = 0;
1026 termp = *term_sub; /* start of term for each database */
1028 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1030 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1031 basenames[base_no]);
1035 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1036 attributeSet, &ord) != ZEBRA_OK)
1041 *ol = ord_list_append(stream, *ol, ord);
1042 ord_len = key_SU_encode (ord, ord_buf);
1044 term_dict[prefix_len++] = '(';
1045 for (i = 0; i<ord_len; i++)
1047 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1048 term_dict[prefix_len++] = ord_buf[i];
1050 term_dict[prefix_len++] = ')';
1051 term_dict[prefix_len] = '\0';
1053 switch (truncation_value)
1055 case -1: /* not specified */
1056 case 100: /* do not truncate */
1057 if (!string_relation(zh, zapt, &termp, term_dict,
1059 reg_type, space_split, term_dst,
1064 zebra_setError(zh, relation_error, 0);
1071 case 1: /* right truncation */
1072 term_dict[j++] = '(';
1073 if (!term_100(zh->reg->zebra_maps, reg_type,
1074 &termp, term_dict + j, space_split, term_dst))
1079 strcat(term_dict, ".*)");
1081 case 2: /* keft truncation */
1082 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083 if (!term_100(zh->reg->zebra_maps, reg_type,
1084 &termp, term_dict + j, space_split, term_dst))
1089 strcat(term_dict, ")");
1091 case 3: /* left&right truncation */
1092 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1093 if (!term_100(zh->reg->zebra_maps, reg_type,
1094 &termp, term_dict + j, space_split, term_dst))
1099 strcat(term_dict, ".*)");
1101 case 101: /* process # in term */
1102 term_dict[j++] = '(';
1103 if (!term_101(zh->reg->zebra_maps, reg_type,
1104 &termp, term_dict + j, space_split, term_dst))
1109 strcat(term_dict, ")");
1111 case 102: /* Regexp-1 */
1112 term_dict[j++] = '(';
1113 if (!term_102(zh->reg->zebra_maps, reg_type,
1114 &termp, term_dict + j, space_split, term_dst))
1119 strcat(term_dict, ")");
1121 case 103: /* Regexp-2 */
1123 term_dict[j++] = '(';
1124 if (!term_103(zh->reg->zebra_maps, reg_type,
1125 &termp, term_dict + j, ®ex_range,
1126 space_split, term_dst))
1131 strcat(term_dict, ")");
1133 case 104: /* process # and ! in term */
1134 term_dict[j++] = '(';
1135 if (!term_104(zh->reg->zebra_maps, reg_type,
1136 &termp, term_dict + j, space_split, term_dst))
1141 strcat(term_dict, ")");
1143 case 105: /* process * and ! in term */
1144 term_dict[j++] = '(';
1145 if (!term_105(zh->reg->zebra_maps, reg_type,
1146 &termp, term_dict + j, space_split, term_dst, 1))
1151 strcat(term_dict, ")");
1153 case 106: /* process * and ! in term */
1154 term_dict[j++] = '(';
1155 if (!term_105(zh->reg->zebra_maps, reg_type,
1156 &termp, term_dict + j, space_split, term_dst, 0))
1161 strcat(term_dict, ")");
1164 zebra_setError_zint(zh,
1165 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172 const char *input = term_dict + prefix_len;
1173 esc_str(buf, sizeof(buf), input, strlen(input));
1175 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1176 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1177 grep_info, &max_pos,
1178 ord_len /* number of "exact" chars */,
1181 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1186 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1192 static void grep_info_delete(struct grep_info *grep_info)
1195 xfree(grep_info->term_no);
1197 xfree(grep_info->isam_p_buf);
1200 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1201 Z_AttributesPlusTerm *zapt,
1202 struct grep_info *grep_info,
1206 int termset_value_numeric;
1207 const char *termset_value_string;
1210 grep_info->term_no = 0;
1212 grep_info->isam_p_size = 0;
1213 grep_info->isam_p_buf = NULL;
1215 grep_info->reg_type = reg_type;
1216 grep_info->termset = 0;
1219 attr_init_APT(&termset, zapt, 8);
1220 termset_value_numeric =
1221 attr_find_ex(&termset, NULL, &termset_value_string);
1222 if (termset_value_numeric != -1)
1225 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1229 const char *termset_name = 0;
1230 if (termset_value_numeric != -2)
1233 sprintf(resname, "%d", termset_value_numeric);
1234 termset_name = resname;
1237 termset_name = termset_value_string;
1238 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1239 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1240 if (!grep_info->termset)
1242 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1251 \brief Create result set(s) for list of terms
1252 \param zh Zebra Handle
1253 \param zapt Attributes Plust Term (RPN leaf)
1254 \param termz term as used in query but converted to UTF-8
1255 \param attributeSet default attribute set
1256 \param stream memory for result
1257 \param reg_type register type ('w', 'p',..)
1258 \param complete_flag whether it's phrases or not
1259 \param rank_type term flags for ranking
1260 \param xpath_use use attribute for X-Path (-1 for no X-path)
1261 \param num_bases number of databases
1262 \param basenames array of databases
1263 \param rset_nmem memory for result sets
1264 \param result_sets output result set for each term in list (output)
1265 \param num_result_sets number of output result sets
1266 \param kc rset key control to be used for created result sets
1268 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1269 Z_AttributesPlusTerm *zapt,
1271 oid_value attributeSet,
1273 int reg_type, int complete_flag,
1274 const char *rank_type,
1275 const char *xpath_use,
1276 int num_bases, char **basenames,
1278 RSET **result_sets, int *num_result_sets,
1279 struct rset_key_control *kc)
1281 char term_dst[IT_MAX_WORD+1];
1282 struct grep_info grep_info;
1283 const char *termp = termz;
1286 *num_result_sets = 0;
1288 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1294 if (alloc_sets == *num_result_sets)
1297 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1300 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1301 alloc_sets = alloc_sets + add;
1302 *result_sets = rnew;
1304 res = term_trunc(zh, zapt, &termp, attributeSet,
1306 reg_type, complete_flag,
1307 num_bases, basenames,
1308 term_dst, rank_type,
1309 xpath_use, rset_nmem,
1310 &(*result_sets)[*num_result_sets],
1312 if (res != ZEBRA_OK)
1315 for (i = 0; i < *num_result_sets; i++)
1316 rset_delete((*result_sets)[i]);
1317 grep_info_delete (&grep_info);
1320 if ((*result_sets)[*num_result_sets] == 0)
1322 (*num_result_sets)++;
1327 grep_info_delete(&grep_info);
1331 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1332 Z_AttributesPlusTerm *zapt,
1333 oid_value attributeSet,
1335 int num_bases, char **basenames,
1338 struct rset_key_control *kc)
1346 attr_init_APT(&position, zapt, 3);
1347 position_value = attr_find(&position, NULL);
1348 switch(position_value)
1357 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1362 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1364 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1369 if (!zh->reg->isamb && !zh->reg->isamc)
1371 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1375 f_set = xmalloc(sizeof(RSET) * num_bases);
1376 for (base_no = 0; base_no < num_bases; base_no++)
1380 char term_dict[100];
1385 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1387 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1388 basenames[base_no]);
1392 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1393 attributeSet, &ord) != ZEBRA_OK)
1396 ord_len = key_SU_encode (ord, ord_buf);
1397 memcpy(term_dict, ord_buf, ord_len);
1398 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1399 val = dict_lookup(zh->reg->dict, term_dict);
1402 assert(*val == sizeof(ISAM_P));
1403 memcpy(&isam_p, val+1, sizeof(isam_p));
1407 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1408 zh->reg->isamb, isam_p, 0);
1409 else if (zh->reg->isamc)
1410 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1411 zh->reg->isamc, isam_p, 0);
1415 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1416 0 /* termid */, num_sets, f_set);
1422 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1423 Z_AttributesPlusTerm *zapt,
1424 const char *termz_org,
1425 oid_value attributeSet,
1427 int reg_type, int complete_flag,
1428 const char *rank_type,
1429 const char *xpath_use,
1430 int num_bases, char **basenames,
1433 struct rset_key_control *kc)
1435 RSET *result_sets = 0;
1436 int num_result_sets = 0;
1438 term_list_trunc(zh, zapt, termz_org, attributeSet,
1439 stream, reg_type, complete_flag,
1440 rank_type, xpath_use,
1441 num_bases, basenames,
1443 &result_sets, &num_result_sets, kc);
1445 if (res != ZEBRA_OK)
1448 if (num_result_sets > 0)
1451 res = rpn_search_APT_position(zh, zapt, attributeSet,
1453 num_bases, basenames,
1454 rset_nmem, &first_set,
1456 if (res != ZEBRA_OK)
1460 RSET *nsets = nmem_malloc(stream,
1461 sizeof(RSET) * (num_result_sets+1));
1462 nsets[0] = first_set;
1463 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1464 result_sets = nsets;
1468 if (num_result_sets == 0)
1469 *rset = rset_create_null(rset_nmem, kc, 0);
1470 else if (num_result_sets == 1)
1471 *rset = result_sets[0];
1473 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1474 num_result_sets, result_sets,
1475 1 /* ordered */, 0 /* exclusion */,
1476 3 /* relation */, 1 /* distance */);
1482 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1483 Z_AttributesPlusTerm *zapt,
1484 const char *termz_org,
1485 oid_value attributeSet,
1487 int reg_type, int complete_flag,
1488 const char *rank_type,
1489 const char *xpath_use,
1490 int num_bases, char **basenames,
1493 struct rset_key_control *kc)
1495 RSET *result_sets = 0;
1496 int num_result_sets = 0;
1499 term_list_trunc(zh, zapt, termz_org, attributeSet,
1500 stream, reg_type, complete_flag,
1501 rank_type, xpath_use,
1502 num_bases, basenames,
1504 &result_sets, &num_result_sets, kc);
1505 if (res != ZEBRA_OK)
1508 for (i = 0; i<num_result_sets; i++)
1511 res = rpn_search_APT_position(zh, zapt, attributeSet,
1513 num_bases, basenames,
1514 rset_nmem, &first_set,
1516 if (res != ZEBRA_OK)
1518 for (i = 0; i<num_result_sets; i++)
1519 rset_delete(result_sets[i]);
1527 tmp_set[0] = first_set;
1528 tmp_set[1] = result_sets[i];
1530 result_sets[i] = rset_create_prox(
1531 rset_nmem, kc, kc->scope,
1533 1 /* ordered */, 0 /* exclusion */,
1534 3 /* relation */, 1 /* distance */);
1537 if (num_result_sets == 0)
1538 *rset = rset_create_null(rset_nmem, kc, 0);
1539 else if (num_result_sets == 1)
1540 *rset = result_sets[0];
1542 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1543 num_result_sets, result_sets);
1549 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1550 Z_AttributesPlusTerm *zapt,
1551 const char *termz_org,
1552 oid_value attributeSet,
1554 int reg_type, int complete_flag,
1555 const char *rank_type,
1556 const char *xpath_use,
1557 int num_bases, char **basenames,
1560 struct rset_key_control *kc)
1562 RSET *result_sets = 0;
1563 int num_result_sets = 0;
1566 term_list_trunc(zh, zapt, termz_org, attributeSet,
1567 stream, reg_type, complete_flag,
1568 rank_type, xpath_use,
1569 num_bases, basenames,
1571 &result_sets, &num_result_sets,
1573 if (res != ZEBRA_OK)
1575 for (i = 0; i<num_result_sets; i++)
1578 res = rpn_search_APT_position(zh, zapt, attributeSet,
1580 num_bases, basenames,
1581 rset_nmem, &first_set,
1583 if (res != ZEBRA_OK)
1585 for (i = 0; i<num_result_sets; i++)
1586 rset_delete(result_sets[i]);
1594 tmp_set[0] = first_set;
1595 tmp_set[1] = result_sets[i];
1597 result_sets[i] = rset_create_prox(
1598 rset_nmem, kc, kc->scope,
1600 1 /* ordered */, 0 /* exclusion */,
1601 3 /* relation */, 1 /* distance */);
1606 if (num_result_sets == 0)
1607 *rset = rset_create_null(rset_nmem, kc, 0);
1608 else if (num_result_sets == 1)
1609 *rset = result_sets[0];
1611 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1612 num_result_sets, result_sets);
1618 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1619 const char **term_sub,
1621 oid_value attributeSet,
1622 struct grep_info *grep_info,
1632 char *term_tmp = term_dict + strlen(term_dict);
1635 attr_init_APT(&relation, zapt, 2);
1636 relation_value = attr_find(&relation, NULL);
1638 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1640 switch (relation_value)
1643 yaz_log(log_level_rpn, "Relation <");
1644 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1647 term_value = atoi (term_tmp);
1648 gen_regular_rel(term_tmp, term_value-1, 1);
1651 yaz_log(log_level_rpn, "Relation <=");
1652 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1655 term_value = atoi (term_tmp);
1656 gen_regular_rel(term_tmp, term_value, 1);
1659 yaz_log(log_level_rpn, "Relation >=");
1660 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1663 term_value = atoi (term_tmp);
1664 gen_regular_rel(term_tmp, term_value, 0);
1667 yaz_log(log_level_rpn, "Relation >");
1668 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1671 term_value = atoi (term_tmp);
1672 gen_regular_rel(term_tmp, term_value+1, 0);
1676 yaz_log(log_level_rpn, "Relation =");
1677 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1680 term_value = atoi (term_tmp);
1681 sprintf(term_tmp, "(0*%d)", term_value);
1684 /* term_tmp untouched.. */
1685 while (**term_sub != '\0')
1689 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1692 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1693 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1696 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1697 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1701 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1702 const char **term_sub,
1703 oid_value attributeSet, NMEM stream,
1704 struct grep_info *grep_info,
1705 int reg_type, int complete_flag,
1706 int num_bases, char **basenames,
1708 const char *xpath_use,
1709 struct ord_list **ol)
1711 char term_dict[2*IT_MAX_WORD+2];
1714 struct rpn_char_map_info rcmi;
1716 int bases_ok = 0; /* no of databases with OK attribute */
1718 *ol = ord_list_create(stream);
1720 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1722 for (base_no = 0; base_no < num_bases; base_no++)
1724 int max_pos, prefix_len = 0;
1725 int relation_error = 0;
1726 int ord, ord_len, i;
1731 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1733 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1734 basenames[base_no]);
1738 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1739 attributeSet, &ord) != ZEBRA_OK)
1743 *ol = ord_list_append(stream, *ol, ord);
1745 ord_len = key_SU_encode (ord, ord_buf);
1747 term_dict[prefix_len++] = '(';
1748 for (i = 0; i < ord_len; i++)
1750 term_dict[prefix_len++] = 1;
1751 term_dict[prefix_len++] = ord_buf[i];
1753 term_dict[prefix_len++] = ')';
1754 term_dict[prefix_len] = '\0';
1756 if (!numeric_relation(zh, zapt, &termp, term_dict,
1757 attributeSet, grep_info, &max_pos, reg_type,
1758 term_dst, &relation_error))
1762 zebra_setError(zh, relation_error, 0);
1772 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1777 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1778 Z_AttributesPlusTerm *zapt,
1780 oid_value attributeSet,
1782 int reg_type, int complete_flag,
1783 const char *rank_type,
1784 const char *xpath_use,
1785 int num_bases, char **basenames,
1788 struct rset_key_control *kc)
1790 char term_dst[IT_MAX_WORD+1];
1791 const char *termp = termz;
1792 RSET *result_sets = 0;
1793 int num_result_sets = 0;
1795 struct grep_info grep_info;
1797 zint hits_limit_value;
1798 const char *term_ref_id_str = 0;
1800 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1802 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1803 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1807 struct ord_list *ol;
1808 if (alloc_sets == num_result_sets)
1811 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1814 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1815 alloc_sets = alloc_sets + add;
1818 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1819 grep_info.isam_p_indx = 0;
1820 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1821 reg_type, complete_flag, num_bases, basenames,
1822 term_dst, xpath_use, &ol);
1823 if (res == ZEBRA_FAIL || termp == 0)
1825 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1826 result_sets[num_result_sets] =
1827 rset_trunc(zh, grep_info.isam_p_buf,
1828 grep_info.isam_p_indx, term_dst,
1829 strlen(term_dst), rank_type,
1830 0 /* preserve position */,
1831 zapt->term->which, rset_nmem,
1832 kc, kc->scope, ol, reg_type,
1835 if (!result_sets[num_result_sets])
1841 grep_info_delete(&grep_info);
1843 if (res != ZEBRA_OK)
1845 if (num_result_sets == 0)
1846 *rset = rset_create_null(rset_nmem, kc, 0);
1847 else if (num_result_sets == 1)
1848 *rset = result_sets[0];
1850 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1851 num_result_sets, result_sets);
1857 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1858 Z_AttributesPlusTerm *zapt,
1860 oid_value attributeSet,
1862 const char *rank_type, NMEM rset_nmem,
1864 struct rset_key_control *kc)
1869 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1870 res_get (zh->res, "setTmpDir"),0 );
1871 rsfd = rset_open(*rset, RSETF_WRITE);
1879 rset_write (rsfd, &key);
1884 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1885 oid_value attributeSet, NMEM stream,
1886 Z_SortKeySpecList *sort_sequence,
1887 const char *rank_type,
1890 struct rset_key_control *kc)
1893 int sort_relation_value;
1894 AttrType sort_relation_type;
1901 attr_init_APT(&sort_relation_type, zapt, 7);
1902 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1904 if (!sort_sequence->specs)
1906 sort_sequence->num_specs = 10;
1907 sort_sequence->specs = (Z_SortKeySpec **)
1908 nmem_malloc(stream, sort_sequence->num_specs *
1909 sizeof(*sort_sequence->specs));
1910 for (i = 0; i<sort_sequence->num_specs; i++)
1911 sort_sequence->specs[i] = 0;
1913 if (zapt->term->which != Z_Term_general)
1916 i = atoi_n ((char *) zapt->term->u.general->buf,
1917 zapt->term->u.general->len);
1918 if (i >= sort_sequence->num_specs)
1920 sprintf(termz, "%d", i);
1922 oe.proto = PROTO_Z3950;
1923 oe.oclass = CLASS_ATTSET;
1924 oe.value = attributeSet;
1925 if (!oid_ent_to_oid (&oe, oid))
1928 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1929 sks->sortElement = (Z_SortElement *)
1930 nmem_malloc(stream, sizeof(*sks->sortElement));
1931 sks->sortElement->which = Z_SortElement_generic;
1932 sk = sks->sortElement->u.generic = (Z_SortKey *)
1933 nmem_malloc(stream, sizeof(*sk));
1934 sk->which = Z_SortKey_sortAttributes;
1935 sk->u.sortAttributes = (Z_SortAttributes *)
1936 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1938 sk->u.sortAttributes->id = oid;
1939 sk->u.sortAttributes->list = zapt->attributes;
1941 sks->sortRelation = (int *)
1942 nmem_malloc(stream, sizeof(*sks->sortRelation));
1943 if (sort_relation_value == 1)
1944 *sks->sortRelation = Z_SortKeySpec_ascending;
1945 else if (sort_relation_value == 2)
1946 *sks->sortRelation = Z_SortKeySpec_descending;
1948 *sks->sortRelation = Z_SortKeySpec_ascending;
1950 sks->caseSensitivity = (int *)
1951 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1952 *sks->caseSensitivity = 0;
1954 sks->which = Z_SortKeySpec_null;
1955 sks->u.null = odr_nullval ();
1956 sort_sequence->specs[i] = sks;
1957 *rset = rset_create_null(rset_nmem, kc, 0);
1962 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1963 oid_value attributeSet,
1964 struct xpath_location_step *xpath, int max,
1967 oid_value curAttributeSet = attributeSet;
1969 const char *use_string = 0;
1971 attr_init_APT(&use, zapt, 1);
1972 attr_find_ex(&use, &curAttributeSet, &use_string);
1974 if (!use_string || *use_string != '/')
1977 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1982 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1983 int reg_type, const char *term,
1984 const char *xpath_use,
1986 struct rset_key_control *kc)
1989 struct grep_info grep_info;
1990 char term_dict[2048];
1993 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1994 zinfo_index_category_index,
1997 int ord_len, i, r, max_pos;
1998 int term_type = Z_Term_characterString;
1999 const char *flags = "void";
2001 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2002 return rset_create_null(rset_nmem, kc, 0);
2005 return rset_create_null(rset_nmem, kc, 0);
2007 term_dict[prefix_len++] = '|';
2009 term_dict[prefix_len++] = '(';
2011 ord_len = key_SU_encode (ord, ord_buf);
2012 for (i = 0; i<ord_len; i++)
2014 term_dict[prefix_len++] = 1;
2015 term_dict[prefix_len++] = ord_buf[i];
2017 term_dict[prefix_len++] = ')';
2018 strcpy(term_dict+prefix_len, term);
2020 grep_info.isam_p_indx = 0;
2021 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2022 &grep_info, &max_pos, 0, grep_handle);
2023 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2024 grep_info.isam_p_indx);
2025 rset = rset_trunc(zh, grep_info.isam_p_buf,
2026 grep_info.isam_p_indx, term, strlen(term),
2027 flags, 1, term_type,rset_nmem,
2028 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2029 0 /* term_ref_id_str */);
2030 grep_info_delete(&grep_info);
2035 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2036 int num_bases, char **basenames,
2037 NMEM stream, const char *rank_type, RSET rset,
2038 int xpath_len, struct xpath_location_step *xpath,
2041 struct rset_key_control *kc)
2045 int always_matches = rset ? 0 : 1;
2053 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2054 for (i = 0; i<xpath_len; i++)
2056 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2068 a[@attr = value]/b[@other = othervalue]
2070 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2071 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2072 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2073 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2074 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2075 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2079 dict_grep_cmap (zh->reg->dict, 0, 0);
2081 for (base_no = 0; base_no < num_bases; base_no++)
2083 int level = xpath_len;
2086 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2088 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2089 basenames[base_no]);
2093 while (--level >= 0)
2095 WRBUF xpath_rev = wrbuf_alloc();
2097 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2099 for (i = level; i >= 1; --i)
2101 const char *cp = xpath[i].part;
2107 wrbuf_puts(xpath_rev, "[^/]*");
2108 else if (*cp == ' ')
2109 wrbuf_puts(xpath_rev, "\001 ");
2111 wrbuf_putc(xpath_rev, *cp);
2113 /* wrbuf_putc does not null-terminate , but
2114 wrbuf_puts below ensures it does.. so xpath_rev
2115 is OK iff length is > 0 */
2117 wrbuf_puts(xpath_rev, "/");
2119 else if (i == 1) /* // case */
2120 wrbuf_puts(xpath_rev, ".*");
2122 if (xpath[level].predicate &&
2123 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2124 xpath[level].predicate->u.relation.name[0])
2126 WRBUF wbuf = wrbuf_alloc();
2127 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2128 if (xpath[level].predicate->u.relation.value)
2130 const char *cp = xpath[level].predicate->u.relation.value;
2131 wrbuf_putc(wbuf, '=');
2135 if (strchr(REGEX_CHARS, *cp))
2136 wrbuf_putc(wbuf, '\\');
2137 wrbuf_putc(wbuf, *cp);
2141 wrbuf_puts(wbuf, "");
2142 rset_attr = xpath_trunc(
2143 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2145 wrbuf_free(wbuf, 1);
2151 wrbuf_free(xpath_rev, 1);
2155 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2156 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2157 if (wrbuf_len(xpath_rev))
2159 rset_start_tag = xpath_trunc(zh, stream, '0',
2160 wrbuf_buf(xpath_rev),
2161 ZEBRA_XPATH_ELM_BEGIN,
2164 rset = rset_start_tag;
2167 rset_end_tag = xpath_trunc(zh, stream, '0',
2168 wrbuf_buf(xpath_rev),
2169 ZEBRA_XPATH_ELM_END,
2172 rset = rset_create_between(rset_nmem, kc, kc->scope,
2173 rset_start_tag, rset,
2174 rset_end_tag, rset_attr);
2177 wrbuf_free(xpath_rev, 1);
2185 #define MAX_XPATH_STEPS 10
2187 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2188 oid_value attributeSet, NMEM stream,
2189 Z_SortKeySpecList *sort_sequence,
2190 int num_bases, char **basenames,
2193 struct rset_key_control *kc)
2195 ZEBRA_RES res = ZEBRA_OK;
2197 char *search_type = NULL;
2198 char rank_type[128];
2201 char termz[IT_MAX_WORD+1];
2203 const char *xpath_use = 0;
2204 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2208 log_level_rpn = yaz_log_module_level("rpn");
2211 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2212 rank_type, &complete_flag, &sort_flag);
2214 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2215 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2216 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2217 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2219 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2223 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2224 rank_type, rset_nmem, rset, kc);
2225 /* consider if an X-Path query is used */
2226 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2227 xpath, MAX_XPATH_STEPS, stream);
2230 if (xpath[xpath_len-1].part[0] == '@')
2231 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2233 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2240 attr_init_APT(&relation, zapt, 2);
2241 relation_value = attr_find(&relation, NULL);
2243 if (relation_value == 103) /* alwaysmatches */
2245 *rset = 0; /* signal no "term" set */
2246 return rpn_search_xpath(zh, num_bases, basenames,
2247 stream, rank_type, *rset,
2248 xpath_len, xpath, rset_nmem, rset, kc);
2253 /* search using one of the various search type strategies
2254 termz is our UTF-8 search term
2255 attributeSet is top-level default attribute set
2256 stream is ODR for search
2257 reg_id is the register type
2258 complete_flag is 1 for complete subfield, 0 for incomplete
2259 xpath_use is use-attribute to be used for X-Path search, 0 for none
2261 if (!strcmp(search_type, "phrase"))
2263 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2264 reg_id, complete_flag, rank_type,
2266 num_bases, basenames, rset_nmem,
2269 else if (!strcmp(search_type, "and-list"))
2271 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2272 reg_id, complete_flag, rank_type,
2274 num_bases, basenames, rset_nmem,
2277 else if (!strcmp(search_type, "or-list"))
2279 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2280 reg_id, complete_flag, rank_type,
2282 num_bases, basenames, rset_nmem,
2285 else if (!strcmp(search_type, "local"))
2287 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2288 rank_type, rset_nmem, rset, kc);
2290 else if (!strcmp(search_type, "numeric"))
2292 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2293 reg_id, complete_flag, rank_type,
2295 num_bases, basenames, rset_nmem,
2300 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2303 if (res != ZEBRA_OK)
2307 return rpn_search_xpath(zh, num_bases, basenames,
2308 stream, rank_type, *rset,
2309 xpath_len, xpath, rset_nmem, rset, kc);
2312 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2313 oid_value attributeSet,
2314 NMEM stream, NMEM rset_nmem,
2315 Z_SortKeySpecList *sort_sequence,
2316 int num_bases, char **basenames,
2317 RSET **result_sets, int *num_result_sets,
2318 Z_Operator *parent_op,
2319 struct rset_key_control *kc);
2321 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2324 ZEBRA_RES res = ZEBRA_OK;
2325 if (zs->which == Z_RPNStructure_complex)
2327 if (res == ZEBRA_OK)
2328 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2330 if (res == ZEBRA_OK)
2331 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2334 else if (zs->which == Z_RPNStructure_simple)
2336 if (zs->u.simple->which == Z_Operand_APT)
2338 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2339 AttrType global_hits_limit_attr;
2342 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2344 l = attr_find(&global_hits_limit_attr, NULL);
2352 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2353 oid_value attributeSet,
2354 NMEM stream, NMEM rset_nmem,
2355 Z_SortKeySpecList *sort_sequence,
2356 int num_bases, char **basenames,
2359 RSET *result_sets = 0;
2360 int num_result_sets = 0;
2362 struct rset_key_control *kc = zebra_key_control_create(zh);
2364 res = rpn_search_structure(zh, zs, attributeSet,
2367 num_bases, basenames,
2368 &result_sets, &num_result_sets,
2369 0 /* no parent op */,
2371 if (res != ZEBRA_OK)
2374 for (i = 0; i<num_result_sets; i++)
2375 rset_delete(result_sets[i]);
2380 assert(num_result_sets == 1);
2381 assert(result_sets);
2382 assert(*result_sets);
2383 *result_set = *result_sets;
2389 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2390 oid_value attributeSet,
2391 NMEM stream, NMEM rset_nmem,
2392 Z_SortKeySpecList *sort_sequence,
2393 int num_bases, char **basenames,
2394 RSET **result_sets, int *num_result_sets,
2395 Z_Operator *parent_op,
2396 struct rset_key_control *kc)
2398 *num_result_sets = 0;
2399 if (zs->which == Z_RPNStructure_complex)
2402 Z_Operator *zop = zs->u.complex->roperator;
2403 RSET *result_sets_l = 0;
2404 int num_result_sets_l = 0;
2405 RSET *result_sets_r = 0;
2406 int num_result_sets_r = 0;
2408 res = rpn_search_structure(zh, zs->u.complex->s1,
2409 attributeSet, stream, rset_nmem,
2411 num_bases, basenames,
2412 &result_sets_l, &num_result_sets_l,
2414 if (res != ZEBRA_OK)
2417 for (i = 0; i<num_result_sets_l; i++)
2418 rset_delete(result_sets_l[i]);
2421 res = rpn_search_structure(zh, zs->u.complex->s2,
2422 attributeSet, stream, rset_nmem,
2424 num_bases, basenames,
2425 &result_sets_r, &num_result_sets_r,
2427 if (res != ZEBRA_OK)
2430 for (i = 0; i<num_result_sets_l; i++)
2431 rset_delete(result_sets_l[i]);
2432 for (i = 0; i<num_result_sets_r; i++)
2433 rset_delete(result_sets_r[i]);
2437 /* make a new list of result for all children */
2438 *num_result_sets = num_result_sets_l + num_result_sets_r;
2439 *result_sets = nmem_malloc(stream, *num_result_sets *
2440 sizeof(**result_sets));
2441 memcpy(*result_sets, result_sets_l,
2442 num_result_sets_l * sizeof(**result_sets));
2443 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2444 num_result_sets_r * sizeof(**result_sets));
2446 if (!parent_op || parent_op->which != zop->which
2447 || (zop->which != Z_Operator_and &&
2448 zop->which != Z_Operator_or))
2450 /* parent node different from this one (or non-present) */
2451 /* we must combine result sets now */
2455 case Z_Operator_and:
2456 rset = rset_create_and(rset_nmem, kc,
2458 *num_result_sets, *result_sets);
2461 rset = rset_create_or(rset_nmem, kc,
2462 kc->scope, 0, /* termid */
2463 *num_result_sets, *result_sets);
2465 case Z_Operator_and_not:
2466 rset = rset_create_not(rset_nmem, kc,
2471 case Z_Operator_prox:
2472 if (zop->u.prox->which != Z_ProximityOperator_known)
2475 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2479 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2481 zebra_setError_zint(zh,
2482 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2483 *zop->u.prox->u.known);
2488 rset = rset_create_prox(rset_nmem, kc,
2490 *num_result_sets, *result_sets,
2491 *zop->u.prox->ordered,
2492 (!zop->u.prox->exclusion ?
2493 0 : *zop->u.prox->exclusion),
2494 *zop->u.prox->relationType,
2495 *zop->u.prox->distance );
2499 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2502 *num_result_sets = 1;
2503 *result_sets = nmem_malloc(stream, *num_result_sets *
2504 sizeof(**result_sets));
2505 (*result_sets)[0] = rset;
2508 else if (zs->which == Z_RPNStructure_simple)
2513 if (zs->u.simple->which == Z_Operand_APT)
2515 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2516 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2517 attributeSet, stream, sort_sequence,
2518 num_bases, basenames, rset_nmem, &rset,
2520 if (res != ZEBRA_OK)
2523 else if (zs->u.simple->which == Z_Operand_resultSetId)
2525 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2526 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2530 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2531 zs->u.simple->u.resultSetId);
2538 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2541 *num_result_sets = 1;
2542 *result_sets = nmem_malloc(stream, *num_result_sets *
2543 sizeof(**result_sets));
2544 (*result_sets)[0] = rset;
2548 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2559 * indent-tabs-mode: nil
2561 * vim: shiftwidth=4 tabstop=8 expandtab