1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2009 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <yaz/diagbib1.h>
32 #include <zebra_xpath.h>
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
40 #define TERMSET_DISABLE 1
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
44 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45 const char **out = zebra_maps_input(p->zm, from, len, 0);
49 const char *outp = *out;
50 yaz_log(YLOG_LOG, "---");
53 yaz_log(YLOG_LOG, "%02X", *outp);
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62 struct rpn_char_map_info *map_info)
65 if (zebra_maps_is_icu(zm))
66 dict_grep_cmap(reg->dict, 0, 0);
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 const char *index_type;
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, const char *ct2, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
167 if (ct2 && strchr(ct2, *s0))
170 map = zebra_maps_input(zm, &s1, strlen(s1), first);
171 if (**map != *CHR_SPACE)
180 static void esc_str(char *out_buf, size_t out_size,
181 const char *in_buf, int in_size)
187 assert(out_size > 20);
189 for (k = 0; k<in_size; k++)
191 int c = in_buf[k] & 0xff;
193 if (c < 32 || c > 126)
197 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
198 if (strlen(out_buf) > out_size-20)
200 strcat(out_buf, "..");
206 #define REGEX_CHARS " []()|.*+?!\"$"
208 static void add_non_space(const char *start, const char *end,
211 const char **map, int q_map_match)
213 size_t sz = end - start;
215 wrbuf_write(display_term, start, sz);
220 if (strchr(REGEX_CHARS, *start))
221 wrbuf_putc(term_dict, '\\');
222 wrbuf_putc(term_dict, *start);
229 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 wrbuf_puts(term_dict, map[0]);
236 static int term_100_icu(zebra_map_t zm,
237 const char **src, WRBUF term_dict, int space_split,
242 const char *res_buf = 0;
244 const char *display_buf;
246 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247 &display_buf, &display_len))
249 *src += strlen(*src);
252 wrbuf_write(display_term, display_buf, display_len);
255 /* ICU sort keys seem to be of the form
256 basechars \x01 accents \x01 length
257 For now we'll just right truncate from basechars . This
258 may give false hits due to accents not being used.
261 while (--i >= 0 && res_buf[i] != '\x01')
265 while (--i >= 0 && res_buf[i] != '\x01')
269 { /* did not find base chars at all. Throw error */
272 res_len = i; /* reduce res_len */
274 for (i = 0; i < res_len; i++)
276 if (strchr(REGEX_CHARS "\\", res_buf[i]))
277 wrbuf_putc(term_dict, '\\');
279 wrbuf_putc(term_dict, 1);
281 wrbuf_putc(term_dict, res_buf[i]);
284 wrbuf_puts(term_dict, ".*");
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290 const char **src, WRBUF term_dict, int space_split,
297 const char *space_start = 0;
298 const char *space_end = 0;
300 if (!term_pre(zm, src, NULL, NULL, !space_split))
307 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
310 if (**map == *CHR_SPACE)
313 else /* complete subfield only. */
315 if (**map == *CHR_SPACE)
316 { /* save space mapping for later .. */
321 else if (space_start)
322 { /* reload last space */
323 while (space_start < space_end)
325 if (strchr(REGEX_CHARS, *space_start))
326 wrbuf_putc(term_dict, '\\');
327 wrbuf_putc(display_term, *space_start);
328 wrbuf_putc(term_dict, *space_start);
333 space_start = space_end = 0;
338 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346 const char **src, WRBUF term_dict, int space_split,
353 if (!term_pre(zm, src, "#", "#", !space_split))
361 wrbuf_puts(term_dict, ".*");
362 wrbuf_putc(display_term, *s0);
369 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370 if (space_split && **map == *CHR_SPACE)
374 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383 WRBUF term_dict, int *errors, int space_split,
390 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
393 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394 isdigit(((const unsigned char *)s0)[1]))
396 *errors = s0[1] - '0';
403 if (strchr("^\\()[].*+?|-", *s0))
405 wrbuf_putc(display_term, *s0);
406 wrbuf_putc(term_dict, *s0);
414 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
415 if (space_split && **map == *CHR_SPACE)
419 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429 WRBUF term_dict, int space_split, WRBUF display_term)
431 return term_103(zm, src, term_dict, NULL, space_split, display_term);
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src,
437 WRBUF term_dict, int space_split, WRBUF display_term)
443 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
451 wrbuf_putc(display_term, *s0);
453 if (*s0 >= '0' && *s0 <= '9')
456 while (*s0 >= '0' && *s0 <= '9')
458 limit = limit * 10 + (*s0 - '0');
459 wrbuf_putc(display_term, *s0);
466 wrbuf_puts(term_dict, ".?");
471 wrbuf_puts(term_dict, ".*");
477 wrbuf_puts(term_dict, ".*");
478 wrbuf_putc(display_term, *s0);
484 wrbuf_puts(term_dict, ".");
485 wrbuf_putc(display_term, *s0);
492 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493 if (space_split && **map == *CHR_SPACE)
497 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src,
506 WRBUF term_dict, int space_split,
507 WRBUF display_term, int right_truncate)
513 if (!term_pre(zm, src, "*!", "*!", !space_split))
521 wrbuf_puts(term_dict, ".*");
522 wrbuf_putc(display_term, *s0);
528 wrbuf_putc(term_dict, '.');
529 wrbuf_putc(display_term, *s0);
536 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537 if (space_split && **map == *CHR_SPACE)
541 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
545 wrbuf_puts(term_dict, ".*");
551 /* gen_regular_rel - generate regular expression from relation
552 * val: border value (inclusive)
553 * islt: 1 if <=; 0 if >=.
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
557 char dst_buf[20*5*20]; /* assuming enough for expansion */
564 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
568 strcpy(dst, "(-[0-9]+|(");
576 strcpy(dst, "([0-9]+|-(");
587 sprintf(numstr, "%d", val);
588 for (w = strlen(numstr); --w >= 0; pos++)
607 strcpy(dst + dst_p, numstr);
608 dst_p = strlen(dst) - pos - 1;
636 for (i = 0; i<pos; i++)
649 /* match everything less than 10^(pos-1) */
651 for (i = 1; i<pos; i++)
652 strcat(dst, "[0-9]?");
656 /* match everything greater than 10^pos */
657 for (i = 0; i <= pos; i++)
658 strcat(dst, "[0-9]");
659 strcat(dst, "[0-9]*");
662 wrbuf_puts(term_dict, dst);
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
667 const char *src = wrbuf_cstr(wsrc);
668 if (src[*indx] == '\\')
670 wrbuf_putc(term_p, src[*indx]);
673 wrbuf_putc(term_p, src[*indx]);
678 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
679 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680 * >= abc ([b-].*|a[c-].*|ab[c-].*)
681 * ([^-a].*|a[^-b].*|ab[c-].*)
682 * < abc ([-0].*|a[-a].*|ab[-b].*)
683 * ([^a-].*|a[^b-].*|ab[^c-].*)
684 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
685 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688 const char **term_sub, WRBUF term_dict,
689 const Odr_oid *attributeSet,
690 zebra_map_t zm, int space_split,
697 WRBUF term_component = wrbuf_alloc();
699 attr_init_APT(&relation, zapt, 2);
700 relation_value = attr_find(&relation, NULL);
703 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704 switch (relation_value)
707 if (!term_100(zm, term_sub, term_component, space_split, display_term))
709 wrbuf_destroy(term_component);
712 yaz_log(log_level_rpn, "Relation <");
714 wrbuf_putc(term_dict, '(');
715 for (i = 0; i < wrbuf_len(term_component); )
720 wrbuf_putc(term_dict, '|');
722 string_rel_add_char(term_dict, term_component, &j);
724 wrbuf_putc(term_dict, '[');
726 wrbuf_putc(term_dict, '^');
728 wrbuf_putc(term_dict, 1);
729 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
731 string_rel_add_char(term_dict, term_component, &i);
732 wrbuf_putc(term_dict, '-');
734 wrbuf_putc(term_dict, ']');
735 wrbuf_putc(term_dict, '.');
736 wrbuf_putc(term_dict, '*');
738 wrbuf_putc(term_dict, ')');
741 if (!term_100(zm, term_sub, term_component, space_split, display_term))
743 wrbuf_destroy(term_component);
746 yaz_log(log_level_rpn, "Relation <=");
748 wrbuf_putc(term_dict, '(');
749 for (i = 0; i < wrbuf_len(term_component); )
754 string_rel_add_char(term_dict, term_component, &j);
755 wrbuf_putc(term_dict, '[');
757 wrbuf_putc(term_dict, '^');
759 wrbuf_putc(term_dict, 1);
760 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
762 string_rel_add_char(term_dict, term_component, &i);
763 wrbuf_putc(term_dict, '-');
765 wrbuf_putc(term_dict, ']');
766 wrbuf_putc(term_dict, '.');
767 wrbuf_putc(term_dict, '*');
769 wrbuf_putc(term_dict, '|');
771 for (i = 0; i < wrbuf_len(term_component); )
772 string_rel_add_char(term_dict, term_component, &i);
773 wrbuf_putc(term_dict, ')');
776 if (!term_100(zm, term_sub, term_component, space_split, display_term))
778 wrbuf_destroy(term_component);
781 yaz_log(log_level_rpn, "Relation >");
783 wrbuf_putc(term_dict, '(');
784 for (i = 0; i < wrbuf_len(term_component); )
789 string_rel_add_char(term_dict, term_component, &j);
790 wrbuf_putc(term_dict, '[');
792 wrbuf_putc(term_dict, '^');
793 wrbuf_putc(term_dict, '-');
794 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, ']');
797 wrbuf_putc(term_dict, '.');
798 wrbuf_putc(term_dict, '*');
800 wrbuf_putc(term_dict, '|');
802 for (i = 0; i < wrbuf_len(term_component); )
803 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, '.');
805 wrbuf_putc(term_dict, '+');
806 wrbuf_putc(term_dict, ')');
809 if (!term_100(zm, term_sub, term_component, space_split, display_term))
811 wrbuf_destroy(term_component);
814 yaz_log(log_level_rpn, "Relation >=");
816 wrbuf_putc(term_dict, '(');
817 for (i = 0; i < wrbuf_len(term_component); )
822 wrbuf_putc(term_dict, '|');
824 string_rel_add_char(term_dict, term_component, &j);
825 wrbuf_putc(term_dict, '[');
827 if (i < wrbuf_len(term_component)-1)
829 wrbuf_putc(term_dict, '^');
830 wrbuf_putc(term_dict, '-');
831 string_rel_add_char(term_dict, term_component, &i);
835 string_rel_add_char(term_dict, term_component, &i);
836 wrbuf_putc(term_dict, '-');
838 wrbuf_putc(term_dict, ']');
839 wrbuf_putc(term_dict, '.');
840 wrbuf_putc(term_dict, '*');
842 wrbuf_putc(term_dict, ')');
849 yaz_log(log_level_rpn, "Relation =");
850 if (!term_100(zm, term_sub, term_component, space_split, display_term))
852 wrbuf_destroy(term_component);
855 wrbuf_puts(term_dict, "(");
856 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857 wrbuf_puts(term_dict, ")");
860 yaz_log(log_level_rpn, "Relation always matches");
861 /* skip to end of term (we don't care what it is) */
862 while (**term_sub != '\0')
866 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867 wrbuf_destroy(term_component);
870 wrbuf_destroy(term_component);
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875 const char **term_sub,
877 const Odr_oid *attributeSet, NMEM stream,
878 struct grep_info *grep_info,
879 const char *index_type, int complete_flag,
881 const char *xpath_use,
882 struct ord_list **ol,
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886 Z_AttributesPlusTerm *zapt,
887 zint *hits_limit_value,
888 const char **term_ref_id_str,
891 AttrType term_ref_id_attr;
892 AttrType hits_limit_attr;
895 attr_init_APT(&hits_limit_attr, zapt, 11);
896 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
898 attr_init_APT(&term_ref_id_attr, zapt, 10);
899 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
900 if (term_ref_id_int >= 0)
902 char *res = nmem_malloc(nmem, 20);
903 sprintf(res, "%d", term_ref_id_int);
904 *term_ref_id_str = res;
907 /* no limit given ? */
908 if (*hits_limit_value == -1)
910 if (*term_ref_id_str)
912 /* use global if term_ref is present */
913 *hits_limit_value = zh->approx_limit;
917 /* no counting if term_ref is not present */
918 *hits_limit_value = 0;
921 else if (*hits_limit_value == 0)
923 /* 0 is the same as global limit */
924 *hits_limit_value = zh->approx_limit;
926 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
927 *term_ref_id_str ? *term_ref_id_str : "none",
932 /** \brief search for term (which may be truncated)
934 static ZEBRA_RES search_term(ZebraHandle zh,
935 Z_AttributesPlusTerm *zapt,
936 const char **term_sub,
937 const Odr_oid *attributeSet, NMEM stream,
938 struct grep_info *grep_info,
939 const char *index_type, int complete_flag,
940 const char *rank_type,
941 const char *xpath_use,
944 struct rset_key_control *kc,
949 zint hits_limit_value;
950 const char *term_ref_id_str = 0;
951 WRBUF term_dict = wrbuf_alloc();
952 WRBUF display_term = wrbuf_alloc();
954 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
956 grep_info->isam_p_indx = 0;
957 res = string_term(zh, zapt, term_sub, term_dict,
958 attributeSet, stream, grep_info,
959 index_type, complete_flag,
960 display_term, xpath_use, &ol, zm);
961 wrbuf_destroy(term_dict);
962 if (res == ZEBRA_OK && *term_sub)
964 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
965 *rset = rset_trunc(zh, grep_info->isam_p_buf,
966 grep_info->isam_p_indx, wrbuf_buf(display_term),
967 wrbuf_len(display_term), rank_type,
968 1 /* preserve pos */,
969 zapt->term->which, rset_nmem,
970 kc, kc->scope, ol, index_type, hits_limit_value,
975 wrbuf_destroy(display_term);
979 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
980 const char **term_sub,
982 const Odr_oid *attributeSet, NMEM stream,
983 struct grep_info *grep_info,
984 const char *index_type, int complete_flag,
986 const char *xpath_use,
987 struct ord_list **ol,
992 int truncation_value;
994 struct rpn_char_map_info rcmi;
996 int space_split = complete_flag ? 0 : 1;
999 int max_pos, prefix_len = 0;
1004 *ol = ord_list_create(stream);
1006 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1007 attr_init_APT(&truncation, zapt, 5);
1008 truncation_value = attr_find(&truncation, NULL);
1009 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1011 termp = *term_sub; /* start of term for each database */
1013 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1014 attributeSet, &ord) != ZEBRA_OK)
1020 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1022 *ol = ord_list_append(stream, *ol, ord);
1023 ord_len = key_SU_encode(ord, ord_buf);
1025 wrbuf_putc(term_dict, '(');
1027 for (i = 0; i<ord_len; i++)
1029 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1030 wrbuf_putc(term_dict, ord_buf[i]);
1032 wrbuf_putc(term_dict, ')');
1034 prefix_len = wrbuf_len(term_dict);
1036 if (zebra_maps_is_icu(zm))
1041 attr_init_APT(&relation, zapt, 2);
1042 relation_value = attr_find(&relation, NULL);
1043 if (relation_value == 103) /* always matches */
1044 termp += strlen(termp); /* move to end of term */
1045 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1048 switch (truncation_value)
1050 case -1: /* not specified */
1051 case 100: /* do not truncate */
1052 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1058 case 1: /* right truncation */
1059 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1066 zebra_setError_zint(zh,
1067 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1074 zebra_setError_zint(zh,
1075 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1082 /* non-ICU case. using string.chr and friends */
1083 switch (truncation_value)
1085 case -1: /* not specified */
1086 case 100: /* do not truncate */
1087 if (!string_relation(zh, zapt, &termp, term_dict,
1089 zm, space_split, display_term,
1094 zebra_setError(zh, relation_error, 0);
1101 case 1: /* right truncation */
1102 wrbuf_putc(term_dict, '(');
1103 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1108 wrbuf_puts(term_dict, ".*)");
1110 case 2: /* left truncation */
1111 wrbuf_puts(term_dict, "(.*");
1112 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1117 wrbuf_putc(term_dict, ')');
1119 case 3: /* left&right truncation */
1120 wrbuf_puts(term_dict, "(.*");
1121 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1126 wrbuf_puts(term_dict, ".*)");
1128 case 101: /* process # in term */
1129 wrbuf_putc(term_dict, '(');
1130 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1135 wrbuf_puts(term_dict, ")");
1137 case 102: /* Regexp-1 */
1138 wrbuf_putc(term_dict, '(');
1139 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1144 wrbuf_putc(term_dict, ')');
1146 case 103: /* Regexp-2 */
1148 wrbuf_putc(term_dict, '(');
1149 if (!term_103(zm, &termp, term_dict, ®ex_range,
1150 space_split, display_term))
1155 wrbuf_putc(term_dict, ')');
1157 case 104: /* process # and ! in term */
1158 wrbuf_putc(term_dict, '(');
1159 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1164 wrbuf_putc(term_dict, ')');
1166 case 105: /* process * and ! in term */
1167 wrbuf_putc(term_dict, '(');
1168 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1173 wrbuf_putc(term_dict, ')');
1175 case 106: /* process * and ! in term */
1176 wrbuf_putc(term_dict, '(');
1177 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1182 wrbuf_putc(term_dict, ')');
1185 zebra_setError_zint(zh,
1186 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1194 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1195 esc_str(buf, sizeof(buf), input, strlen(input));
1198 WRBUF pr_wr = wrbuf_alloc();
1200 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1201 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1202 wrbuf_destroy(pr_wr);
1204 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1205 grep_info, &max_pos,
1206 ord_len /* number of "exact" chars */,
1209 zebra_set_partial_result(zh);
1211 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1213 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1219 static void grep_info_delete(struct grep_info *grep_info)
1222 xfree(grep_info->term_no);
1224 xfree(grep_info->isam_p_buf);
1227 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1228 Z_AttributesPlusTerm *zapt,
1229 struct grep_info *grep_info,
1230 const char *index_type)
1233 grep_info->term_no = 0;
1235 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1236 grep_info->isam_p_size = 0;
1237 grep_info->isam_p_buf = NULL;
1239 grep_info->index_type = index_type;
1240 grep_info->termset = 0;
1246 attr_init_APT(&truncmax, zapt, 13);
1247 truncmax_value = attr_find(&truncmax, NULL);
1248 if (truncmax_value != -1)
1249 grep_info->trunc_max = truncmax_value;
1254 int termset_value_numeric;
1255 const char *termset_value_string;
1257 attr_init_APT(&termset, zapt, 8);
1258 termset_value_numeric =
1259 attr_find_ex(&termset, NULL, &termset_value_string);
1260 if (termset_value_numeric != -1)
1263 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1267 const char *termset_name = 0;
1268 if (termset_value_numeric != -2)
1271 sprintf(resname, "%d", termset_value_numeric);
1272 termset_name = resname;
1275 termset_name = termset_value_string;
1276 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1277 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1278 if (!grep_info->termset)
1280 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1289 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1290 Z_AttributesPlusTerm *zapt,
1292 const Odr_oid *attributeSet,
1294 const char *index_type, int complete_flag,
1295 const char *rank_type,
1296 const char *xpath_use,
1298 RSET **result_sets, int *num_result_sets,
1299 struct rset_key_control *kc,
1302 struct grep_info grep_info;
1303 const char *termp = termz;
1306 *num_result_sets = 0;
1307 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1313 if (alloc_sets == *num_result_sets)
1316 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1319 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1320 alloc_sets = alloc_sets + add;
1321 *result_sets = rnew;
1323 res = search_term(zh, zapt, &termp, attributeSet,
1325 index_type, complete_flag,
1327 xpath_use, rset_nmem,
1328 &(*result_sets)[*num_result_sets],
1330 if (res != ZEBRA_OK)
1333 for (i = 0; i < *num_result_sets; i++)
1334 rset_delete((*result_sets)[i]);
1335 grep_info_delete(&grep_info);
1338 if ((*result_sets)[*num_result_sets] == 0)
1340 (*num_result_sets)++;
1345 grep_info_delete(&grep_info);
1350 \brief Create result set(s) for list of terms
1351 \param zh Zebra Handle
1352 \param zapt Attributes Plust Term (RPN leaf)
1353 \param termz term as used in query but converted to UTF-8
1354 \param attributeSet default attribute set
1355 \param stream memory for result
1356 \param index_type register type ("w", "p",..)
1357 \param complete_flag whether it's phrases or not
1358 \param rank_type term flags for ranking
1359 \param xpath_use use attribute for X-Path (-1 for no X-path)
1360 \param rset_nmem memory for result sets
1361 \param result_sets output result set for each term in list (output)
1362 \param num_result_sets number of output result sets
1363 \param kc rset key control to be used for created result sets
1365 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1366 Z_AttributesPlusTerm *zapt,
1368 const Odr_oid *attributeSet,
1370 const char *index_type, int complete_flag,
1371 const char *rank_type,
1372 const char *xpath_use,
1374 RSET **result_sets, int *num_result_sets,
1375 struct rset_key_control *kc)
1377 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1378 if (zebra_maps_is_icu(zm))
1379 zebra_map_tokenize_start(zm, termz, strlen(termz));
1380 return search_terms_chrmap(zh, zapt, termz, attributeSet,
1381 stream, index_type, complete_flag,
1382 rank_type, xpath_use,
1383 rset_nmem, result_sets, num_result_sets,
1388 /** \brief limit a search by position - returns result set
1390 static ZEBRA_RES search_position(ZebraHandle zh,
1391 Z_AttributesPlusTerm *zapt,
1392 const Odr_oid *attributeSet,
1393 const char *index_type,
1396 struct rset_key_control *kc)
1402 char term_dict[100];
1406 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1408 attr_init_APT(&position, zapt, 3);
1409 position_value = attr_find(&position, NULL);
1410 switch(position_value)
1419 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1425 if (!zebra_maps_is_first_in_field(zm))
1427 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1432 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1433 attributeSet, &ord) != ZEBRA_OK)
1437 ord_len = key_SU_encode(ord, ord_buf);
1438 memcpy(term_dict, ord_buf, ord_len);
1439 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1440 val = dict_lookup(zh->reg->dict, term_dict);
1443 assert(*val == sizeof(ISAM_P));
1444 memcpy(&isam_p, val+1, sizeof(isam_p));
1446 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1452 /** \brief returns result set for phrase search
1454 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1455 Z_AttributesPlusTerm *zapt,
1456 const char *termz_org,
1457 const Odr_oid *attributeSet,
1459 const char *index_type,
1461 const char *rank_type,
1462 const char *xpath_use,
1465 struct rset_key_control *kc)
1467 RSET *result_sets = 0;
1468 int num_result_sets = 0;
1470 search_terms_list(zh, zapt, termz_org, attributeSet,
1471 stream, index_type, complete_flag,
1472 rank_type, xpath_use,
1474 &result_sets, &num_result_sets, kc);
1476 if (res != ZEBRA_OK)
1479 if (num_result_sets > 0)
1482 res = search_position(zh, zapt, attributeSet,
1484 rset_nmem, &first_set,
1486 if (res != ZEBRA_OK)
1489 for (i = 0; i<num_result_sets; i++)
1490 rset_delete(result_sets[i]);
1495 RSET *nsets = nmem_malloc(stream,
1496 sizeof(RSET) * (num_result_sets+1));
1497 nsets[0] = first_set;
1498 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1499 result_sets = nsets;
1503 if (num_result_sets == 0)
1504 *rset = rset_create_null(rset_nmem, kc, 0);
1505 else if (num_result_sets == 1)
1506 *rset = result_sets[0];
1508 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1509 num_result_sets, result_sets,
1510 1 /* ordered */, 0 /* exclusion */,
1511 3 /* relation */, 1 /* distance */);
1517 /** \brief returns result set for or-list search
1519 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1520 Z_AttributesPlusTerm *zapt,
1521 const char *termz_org,
1522 const Odr_oid *attributeSet,
1524 const char *index_type,
1526 const char *rank_type,
1527 const char *xpath_use,
1530 struct rset_key_control *kc)
1532 RSET *result_sets = 0;
1533 int num_result_sets = 0;
1536 search_terms_list(zh, zapt, termz_org, attributeSet,
1537 stream, index_type, complete_flag,
1538 rank_type, xpath_use,
1540 &result_sets, &num_result_sets, kc);
1541 if (res != ZEBRA_OK)
1544 for (i = 0; i<num_result_sets; i++)
1547 res = search_position(zh, zapt, attributeSet,
1549 rset_nmem, &first_set,
1551 if (res != ZEBRA_OK)
1553 for (i = 0; i<num_result_sets; i++)
1554 rset_delete(result_sets[i]);
1562 tmp_set[0] = first_set;
1563 tmp_set[1] = result_sets[i];
1565 result_sets[i] = rset_create_prox(
1566 rset_nmem, kc, kc->scope,
1568 1 /* ordered */, 0 /* exclusion */,
1569 3 /* relation */, 1 /* distance */);
1572 if (num_result_sets == 0)
1573 *rset = rset_create_null(rset_nmem, kc, 0);
1574 else if (num_result_sets == 1)
1575 *rset = result_sets[0];
1577 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1578 num_result_sets, result_sets);
1584 /** \brief returns result set for and-list search
1586 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1587 Z_AttributesPlusTerm *zapt,
1588 const char *termz_org,
1589 const Odr_oid *attributeSet,
1591 const char *index_type,
1593 const char *rank_type,
1594 const char *xpath_use,
1597 struct rset_key_control *kc)
1599 RSET *result_sets = 0;
1600 int num_result_sets = 0;
1603 search_terms_list(zh, zapt, termz_org, attributeSet,
1604 stream, index_type, complete_flag,
1605 rank_type, xpath_use,
1607 &result_sets, &num_result_sets,
1609 if (res != ZEBRA_OK)
1611 for (i = 0; i<num_result_sets; i++)
1614 res = search_position(zh, zapt, attributeSet,
1616 rset_nmem, &first_set,
1618 if (res != ZEBRA_OK)
1620 for (i = 0; i<num_result_sets; i++)
1621 rset_delete(result_sets[i]);
1629 tmp_set[0] = first_set;
1630 tmp_set[1] = result_sets[i];
1632 result_sets[i] = rset_create_prox(
1633 rset_nmem, kc, kc->scope,
1635 1 /* ordered */, 0 /* exclusion */,
1636 3 /* relation */, 1 /* distance */);
1641 if (num_result_sets == 0)
1642 *rset = rset_create_null(rset_nmem, kc, 0);
1643 else if (num_result_sets == 1)
1644 *rset = result_sets[0];
1646 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1647 num_result_sets, result_sets);
1653 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1654 const char **term_sub,
1656 const Odr_oid *attributeSet,
1657 struct grep_info *grep_info,
1667 WRBUF term_num = wrbuf_alloc();
1670 attr_init_APT(&relation, zapt, 2);
1671 relation_value = attr_find(&relation, NULL);
1673 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1675 switch (relation_value)
1678 yaz_log(log_level_rpn, "Relation <");
1679 if (!term_100(zm, term_sub, term_num, 1, display_term))
1681 wrbuf_destroy(term_num);
1684 term_value = atoi(wrbuf_cstr(term_num));
1685 gen_regular_rel(term_dict, term_value-1, 1);
1688 yaz_log(log_level_rpn, "Relation <=");
1689 if (!term_100(zm, term_sub, term_num, 1, display_term))
1691 wrbuf_destroy(term_num);
1694 term_value = atoi(wrbuf_cstr(term_num));
1695 gen_regular_rel(term_dict, term_value, 1);
1698 yaz_log(log_level_rpn, "Relation >=");
1699 if (!term_100(zm, term_sub, term_num, 1, display_term))
1701 wrbuf_destroy(term_num);
1704 term_value = atoi(wrbuf_cstr(term_num));
1705 gen_regular_rel(term_dict, term_value, 0);
1708 yaz_log(log_level_rpn, "Relation >");
1709 if (!term_100(zm, term_sub, term_num, 1, display_term))
1711 wrbuf_destroy(term_num);
1714 term_value = atoi(wrbuf_cstr(term_num));
1715 gen_regular_rel(term_dict, term_value+1, 0);
1719 yaz_log(log_level_rpn, "Relation =");
1720 if (!term_100(zm, term_sub, term_num, 1, display_term))
1722 wrbuf_destroy(term_num);
1725 term_value = atoi(wrbuf_cstr(term_num));
1726 wrbuf_printf(term_dict, "(0*%d)", term_value);
1729 /* term_tmp untouched.. */
1730 while (**term_sub != '\0')
1734 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1735 wrbuf_destroy(term_num);
1738 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1739 0, grep_info, max_pos, 0, grep_handle);
1742 zebra_set_partial_result(zh);
1744 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1745 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1746 wrbuf_destroy(term_num);
1750 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1751 const char **term_sub,
1753 const Odr_oid *attributeSet, NMEM stream,
1754 struct grep_info *grep_info,
1755 const char *index_type, int complete_flag,
1757 const char *xpath_use,
1758 struct ord_list **ol)
1761 struct rpn_char_map_info rcmi;
1763 int relation_error = 0;
1764 int ord, ord_len, i;
1766 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1768 *ol = ord_list_create(stream);
1770 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1774 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1775 attributeSet, &ord) != ZEBRA_OK)
1780 wrbuf_rewind(term_dict);
1782 *ol = ord_list_append(stream, *ol, ord);
1784 ord_len = key_SU_encode(ord, ord_buf);
1786 wrbuf_putc(term_dict, '(');
1787 for (i = 0; i < ord_len; i++)
1789 wrbuf_putc(term_dict, 1);
1790 wrbuf_putc(term_dict, ord_buf[i]);
1792 wrbuf_putc(term_dict, ')');
1794 if (!numeric_relation(zh, zapt, &termp, term_dict,
1795 attributeSet, grep_info, &max_pos, zm,
1796 display_term, &relation_error))
1800 zebra_setError(zh, relation_error, 0);
1807 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1812 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1813 Z_AttributesPlusTerm *zapt,
1815 const Odr_oid *attributeSet,
1817 const char *index_type,
1819 const char *rank_type,
1820 const char *xpath_use,
1823 struct rset_key_control *kc)
1825 const char *termp = termz;
1826 RSET *result_sets = 0;
1827 int num_result_sets = 0;
1829 struct grep_info grep_info;
1831 zint hits_limit_value;
1832 const char *term_ref_id_str = 0;
1834 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1837 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1838 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1842 struct ord_list *ol;
1843 WRBUF term_dict = wrbuf_alloc();
1844 WRBUF display_term = wrbuf_alloc();
1845 if (alloc_sets == num_result_sets)
1848 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1851 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1852 alloc_sets = alloc_sets + add;
1855 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1856 grep_info.isam_p_indx = 0;
1857 res = numeric_term(zh, zapt, &termp, term_dict,
1858 attributeSet, stream, &grep_info,
1859 index_type, complete_flag,
1860 display_term, xpath_use, &ol);
1861 wrbuf_destroy(term_dict);
1862 if (res == ZEBRA_FAIL || termp == 0)
1864 wrbuf_destroy(display_term);
1867 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1868 result_sets[num_result_sets] =
1869 rset_trunc(zh, grep_info.isam_p_buf,
1870 grep_info.isam_p_indx, wrbuf_buf(display_term),
1871 wrbuf_len(display_term), rank_type,
1872 0 /* preserve position */,
1873 zapt->term->which, rset_nmem,
1874 kc, kc->scope, ol, index_type,
1877 wrbuf_destroy(display_term);
1878 if (!result_sets[num_result_sets])
1884 grep_info_delete(&grep_info);
1886 if (res != ZEBRA_OK)
1888 if (num_result_sets == 0)
1889 *rset = rset_create_null(rset_nmem, kc, 0);
1890 else if (num_result_sets == 1)
1891 *rset = result_sets[0];
1893 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1894 num_result_sets, result_sets);
1900 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1901 Z_AttributesPlusTerm *zapt,
1903 const Odr_oid *attributeSet,
1905 const char *rank_type, NMEM rset_nmem,
1907 struct rset_key_control *kc)
1910 zint sysno = atozint(termz);
1914 rec = rec_get(zh->reg->records, sysno);
1922 *rset = rset_create_null(rset_nmem, kc, 0);
1928 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1929 res_get(zh->res, "setTmpDir"), 0);
1930 rsfd = rset_open(*rset, RSETF_WRITE);
1935 rset_write(rsfd, &key);
1941 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1942 const Odr_oid *attributeSet, NMEM stream,
1943 Z_SortKeySpecList *sort_sequence,
1944 const char *rank_type,
1947 struct rset_key_control *kc)
1950 int sort_relation_value;
1951 AttrType sort_relation_type;
1956 attr_init_APT(&sort_relation_type, zapt, 7);
1957 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1959 if (!sort_sequence->specs)
1961 sort_sequence->num_specs = 10;
1962 sort_sequence->specs = (Z_SortKeySpec **)
1963 nmem_malloc(stream, sort_sequence->num_specs *
1964 sizeof(*sort_sequence->specs));
1965 for (i = 0; i<sort_sequence->num_specs; i++)
1966 sort_sequence->specs[i] = 0;
1968 if (zapt->term->which != Z_Term_general)
1971 i = atoi_n((char *) zapt->term->u.general->buf,
1972 zapt->term->u.general->len);
1973 if (i >= sort_sequence->num_specs)
1975 sprintf(termz, "%d", i);
1977 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1978 sks->sortElement = (Z_SortElement *)
1979 nmem_malloc(stream, sizeof(*sks->sortElement));
1980 sks->sortElement->which = Z_SortElement_generic;
1981 sk = sks->sortElement->u.generic = (Z_SortKey *)
1982 nmem_malloc(stream, sizeof(*sk));
1983 sk->which = Z_SortKey_sortAttributes;
1984 sk->u.sortAttributes = (Z_SortAttributes *)
1985 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1987 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1988 sk->u.sortAttributes->list = zapt->attributes;
1990 sks->sortRelation = (int *)
1991 nmem_malloc(stream, sizeof(*sks->sortRelation));
1992 if (sort_relation_value == 1)
1993 *sks->sortRelation = Z_SortKeySpec_ascending;
1994 else if (sort_relation_value == 2)
1995 *sks->sortRelation = Z_SortKeySpec_descending;
1997 *sks->sortRelation = Z_SortKeySpec_ascending;
1999 sks->caseSensitivity = (int *)
2000 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2001 *sks->caseSensitivity = 0;
2003 sks->which = Z_SortKeySpec_null;
2004 sks->u.null = odr_nullval ();
2005 sort_sequence->specs[i] = sks;
2006 *rset = rset_create_null(rset_nmem, kc, 0);
2011 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2012 const Odr_oid *attributeSet,
2013 struct xpath_location_step *xpath, int max,
2016 const Odr_oid *curAttributeSet = attributeSet;
2018 const char *use_string = 0;
2020 attr_init_APT(&use, zapt, 1);
2021 attr_find_ex(&use, &curAttributeSet, &use_string);
2023 if (!use_string || *use_string != '/')
2026 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2031 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2032 const char *index_type, const char *term,
2033 const char *xpath_use,
2035 struct rset_key_control *kc)
2037 struct grep_info grep_info;
2038 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2039 zinfo_index_category_index,
2040 index_type, xpath_use);
2041 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2042 return rset_create_null(rset_nmem, kc, 0);
2045 return rset_create_null(rset_nmem, kc, 0);
2051 WRBUF term_dict = wrbuf_alloc();
2052 int ord_len = key_SU_encode(ord, ord_buf);
2053 int term_type = Z_Term_characterString;
2054 const char *flags = "void";
2056 wrbuf_putc(term_dict, '(');
2057 for (i = 0; i<ord_len; i++)
2059 wrbuf_putc(term_dict, 1);
2060 wrbuf_putc(term_dict, ord_buf[i]);
2062 wrbuf_putc(term_dict, ')');
2063 wrbuf_puts(term_dict, term);
2065 grep_info.isam_p_indx = 0;
2066 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2067 &grep_info, &max_pos, 0, grep_handle);
2068 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2069 grep_info.isam_p_indx);
2070 rset = rset_trunc(zh, grep_info.isam_p_buf,
2071 grep_info.isam_p_indx, term, strlen(term),
2072 flags, 1, term_type, rset_nmem,
2073 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2074 0 /* term_ref_id_str */);
2075 grep_info_delete(&grep_info);
2076 wrbuf_destroy(term_dict);
2082 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2083 NMEM stream, const char *rank_type, RSET rset,
2084 int xpath_len, struct xpath_location_step *xpath,
2087 struct rset_key_control *kc)
2090 int always_matches = rset ? 0 : 1;
2098 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2099 for (i = 0; i<xpath_len; i++)
2101 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2113 a[@attr = value]/b[@other = othervalue]
2115 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2116 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2117 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2118 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2119 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2120 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2124 dict_grep_cmap(zh->reg->dict, 0, 0);
2127 int level = xpath_len;
2130 while (--level >= 0)
2132 WRBUF xpath_rev = wrbuf_alloc();
2134 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2136 for (i = level; i >= 1; --i)
2138 const char *cp = xpath[i].part;
2144 wrbuf_puts(xpath_rev, "[^/]*");
2145 else if (*cp == ' ')
2146 wrbuf_puts(xpath_rev, "\001 ");
2148 wrbuf_putc(xpath_rev, *cp);
2150 /* wrbuf_putc does not null-terminate , but
2151 wrbuf_puts below ensures it does.. so xpath_rev
2152 is OK iff length is > 0 */
2154 wrbuf_puts(xpath_rev, "/");
2156 else if (i == 1) /* // case */
2157 wrbuf_puts(xpath_rev, ".*");
2159 if (xpath[level].predicate &&
2160 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2161 xpath[level].predicate->u.relation.name[0])
2163 WRBUF wbuf = wrbuf_alloc();
2164 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2165 if (xpath[level].predicate->u.relation.value)
2167 const char *cp = xpath[level].predicate->u.relation.value;
2168 wrbuf_putc(wbuf, '=');
2172 if (strchr(REGEX_CHARS, *cp))
2173 wrbuf_putc(wbuf, '\\');
2174 wrbuf_putc(wbuf, *cp);
2178 rset_attr = xpath_trunc(
2179 zh, stream, "0", wrbuf_cstr(wbuf),
2180 ZEBRA_XPATH_ATTR_NAME,
2182 wrbuf_destroy(wbuf);
2188 wrbuf_destroy(xpath_rev);
2192 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2193 wrbuf_cstr(xpath_rev));
2194 if (wrbuf_len(xpath_rev))
2196 rset_start_tag = xpath_trunc(zh, stream, "0",
2197 wrbuf_cstr(xpath_rev),
2198 ZEBRA_XPATH_ELM_BEGIN,
2201 rset = rset_start_tag;
2204 rset_end_tag = xpath_trunc(zh, stream, "0",
2205 wrbuf_cstr(xpath_rev),
2206 ZEBRA_XPATH_ELM_END,
2209 rset = rset_create_between(rset_nmem, kc, kc->scope,
2210 rset_start_tag, rset,
2211 rset_end_tag, rset_attr);
2214 wrbuf_destroy(xpath_rev);
2222 #define MAX_XPATH_STEPS 10
2224 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2225 Z_AttributesPlusTerm *zapt,
2226 const Odr_oid *attributeSet, NMEM stream,
2227 Z_SortKeySpecList *sort_sequence,
2230 struct rset_key_control *kc);
2232 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2233 const Odr_oid *attributeSet, NMEM stream,
2234 Z_SortKeySpecList *sort_sequence,
2235 int num_bases, const char **basenames,
2238 struct rset_key_control *kc)
2240 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2241 ZEBRA_RES res = ZEBRA_OK;
2243 for (i = 0; i < num_bases; i++)
2246 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2248 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2253 res = rpn_search_database(zh, zapt, attributeSet, stream,
2255 rset_nmem, rsets+i, kc);
2256 if (res != ZEBRA_OK)
2259 if (res != ZEBRA_OK)
2260 { /* must clean up the already created sets */
2262 rset_delete(rsets[i]);
2269 else if (num_bases == 0)
2270 *rset = rset_create_null(rset_nmem, kc, 0);
2272 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2278 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2279 Z_AttributesPlusTerm *zapt,
2280 const Odr_oid *attributeSet, NMEM stream,
2281 Z_SortKeySpecList *sort_sequence,
2284 struct rset_key_control *kc)
2286 ZEBRA_RES res = ZEBRA_OK;
2287 const char *index_type;
2288 char *search_type = NULL;
2289 char rank_type[128];
2292 char termz[IT_MAX_WORD+1];
2294 const char *xpath_use = 0;
2295 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2299 log_level_rpn = yaz_log_module_level("rpn");
2302 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2303 rank_type, &complete_flag, &sort_flag);
2305 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2306 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2307 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2308 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2310 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2314 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2315 rank_type, rset_nmem, rset, kc);
2316 /* consider if an X-Path query is used */
2317 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2318 xpath, MAX_XPATH_STEPS, stream);
2321 if (xpath[xpath_len-1].part[0] == '@')
2322 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2324 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2331 attr_init_APT(&relation, zapt, 2);
2332 relation_value = attr_find(&relation, NULL);
2334 if (relation_value == 103) /* alwaysmatches */
2336 *rset = 0; /* signal no "term" set */
2337 return rpn_search_xpath(zh, stream, rank_type, *rset,
2338 xpath_len, xpath, rset_nmem, rset, kc);
2343 /* search using one of the various search type strategies
2344 termz is our UTF-8 search term
2345 attributeSet is top-level default attribute set
2346 stream is ODR for search
2347 reg_id is the register type
2348 complete_flag is 1 for complete subfield, 0 for incomplete
2349 xpath_use is use-attribute to be used for X-Path search, 0 for none
2351 if (!strcmp(search_type, "phrase"))
2353 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2354 index_type, complete_flag, rank_type,
2359 else if (!strcmp(search_type, "and-list"))
2361 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2362 index_type, complete_flag, rank_type,
2367 else if (!strcmp(search_type, "or-list"))
2369 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2370 index_type, complete_flag, rank_type,
2375 else if (!strcmp(search_type, "local"))
2377 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2378 rank_type, rset_nmem, rset, kc);
2380 else if (!strcmp(search_type, "numeric"))
2382 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2383 index_type, complete_flag, rank_type,
2390 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2393 if (res != ZEBRA_OK)
2397 return rpn_search_xpath(zh, stream, rank_type, *rset,
2398 xpath_len, xpath, rset_nmem, rset, kc);
2401 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2402 const Odr_oid *attributeSet,
2403 NMEM stream, NMEM rset_nmem,
2404 Z_SortKeySpecList *sort_sequence,
2405 int num_bases, const char **basenames,
2406 RSET **result_sets, int *num_result_sets,
2407 Z_Operator *parent_op,
2408 struct rset_key_control *kc);
2410 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2413 ZEBRA_RES res = ZEBRA_OK;
2414 if (zs->which == Z_RPNStructure_complex)
2416 if (res == ZEBRA_OK)
2417 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2419 if (res == ZEBRA_OK)
2420 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2423 else if (zs->which == Z_RPNStructure_simple)
2425 if (zs->u.simple->which == Z_Operand_APT)
2427 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2428 AttrType global_hits_limit_attr;
2431 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2433 l = attr_find(&global_hits_limit_attr, NULL);
2441 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2442 const Odr_oid *attributeSet,
2443 NMEM stream, NMEM rset_nmem,
2444 Z_SortKeySpecList *sort_sequence,
2445 int num_bases, const char **basenames,
2448 RSET *result_sets = 0;
2449 int num_result_sets = 0;
2451 struct rset_key_control *kc = zebra_key_control_create(zh);
2453 res = rpn_search_structure(zh, zs, attributeSet,
2456 num_bases, basenames,
2457 &result_sets, &num_result_sets,
2458 0 /* no parent op */,
2460 if (res != ZEBRA_OK)
2463 for (i = 0; i<num_result_sets; i++)
2464 rset_delete(result_sets[i]);
2469 assert(num_result_sets == 1);
2470 assert(result_sets);
2471 assert(*result_sets);
2472 *result_set = *result_sets;
2478 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2479 const Odr_oid *attributeSet,
2480 NMEM stream, NMEM rset_nmem,
2481 Z_SortKeySpecList *sort_sequence,
2482 int num_bases, const char **basenames,
2483 RSET **result_sets, int *num_result_sets,
2484 Z_Operator *parent_op,
2485 struct rset_key_control *kc)
2487 *num_result_sets = 0;
2488 if (zs->which == Z_RPNStructure_complex)
2491 Z_Operator *zop = zs->u.complex->roperator;
2492 RSET *result_sets_l = 0;
2493 int num_result_sets_l = 0;
2494 RSET *result_sets_r = 0;
2495 int num_result_sets_r = 0;
2497 res = rpn_search_structure(zh, zs->u.complex->s1,
2498 attributeSet, stream, rset_nmem,
2500 num_bases, basenames,
2501 &result_sets_l, &num_result_sets_l,
2503 if (res != ZEBRA_OK)
2506 for (i = 0; i<num_result_sets_l; i++)
2507 rset_delete(result_sets_l[i]);
2510 res = rpn_search_structure(zh, zs->u.complex->s2,
2511 attributeSet, stream, rset_nmem,
2513 num_bases, basenames,
2514 &result_sets_r, &num_result_sets_r,
2516 if (res != ZEBRA_OK)
2519 for (i = 0; i<num_result_sets_l; i++)
2520 rset_delete(result_sets_l[i]);
2521 for (i = 0; i<num_result_sets_r; i++)
2522 rset_delete(result_sets_r[i]);
2526 /* make a new list of result for all children */
2527 *num_result_sets = num_result_sets_l + num_result_sets_r;
2528 *result_sets = nmem_malloc(stream, *num_result_sets *
2529 sizeof(**result_sets));
2530 memcpy(*result_sets, result_sets_l,
2531 num_result_sets_l * sizeof(**result_sets));
2532 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2533 num_result_sets_r * sizeof(**result_sets));
2535 if (!parent_op || parent_op->which != zop->which
2536 || (zop->which != Z_Operator_and &&
2537 zop->which != Z_Operator_or))
2539 /* parent node different from this one (or non-present) */
2540 /* we must combine result sets now */
2544 case Z_Operator_and:
2545 rset = rset_create_and(rset_nmem, kc,
2547 *num_result_sets, *result_sets);
2550 rset = rset_create_or(rset_nmem, kc,
2551 kc->scope, 0, /* termid */
2552 *num_result_sets, *result_sets);
2554 case Z_Operator_and_not:
2555 rset = rset_create_not(rset_nmem, kc,
2560 case Z_Operator_prox:
2561 if (zop->u.prox->which != Z_ProximityOperator_known)
2564 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2568 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2570 zebra_setError_zint(zh,
2571 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2572 *zop->u.prox->u.known);
2577 rset = rset_create_prox(rset_nmem, kc,
2579 *num_result_sets, *result_sets,
2580 *zop->u.prox->ordered,
2581 (!zop->u.prox->exclusion ?
2582 0 : *zop->u.prox->exclusion),
2583 *zop->u.prox->relationType,
2584 *zop->u.prox->distance );
2588 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2591 *num_result_sets = 1;
2592 *result_sets = nmem_malloc(stream, *num_result_sets *
2593 sizeof(**result_sets));
2594 (*result_sets)[0] = rset;
2597 else if (zs->which == Z_RPNStructure_simple)
2602 if (zs->u.simple->which == Z_Operand_APT)
2604 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2605 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2606 attributeSet, stream, sort_sequence,
2607 num_bases, basenames, rset_nmem, &rset,
2609 if (res != ZEBRA_OK)
2612 else if (zs->u.simple->which == Z_Operand_resultSetId)
2614 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2615 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2619 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2620 zs->u.simple->u.resultSetId);
2627 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2630 *num_result_sets = 1;
2631 *result_sets = nmem_malloc(stream, *num_result_sets *
2632 sizeof(**result_sets));
2633 (*result_sets)[0] = rset;
2637 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2648 * indent-tabs-mode: nil
2650 * vim: shiftwidth=4 tabstop=8 expandtab