1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2010 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <yaz/diagbib1.h>
32 #include <zebra_xpath.h>
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
40 #define TERMSET_DISABLE 1
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
44 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45 const char **out = zebra_maps_input(p->zm, from, len, 0);
49 const char *outp = *out;
50 yaz_log(YLOG_LOG, "---");
53 yaz_log(YLOG_LOG, "%02X", *outp);
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62 struct rpn_char_map_info *map_info)
65 if (zebra_maps_is_icu(zm))
66 dict_grep_cmap(reg->dict, 0, 0);
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 const char *index_type;
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
168 map = zebra_maps_input(zm, &s1, strlen(s1), first);
169 if (**map != *CHR_SPACE)
178 static void esc_str(char *out_buf, size_t out_size,
179 const char *in_buf, int in_size)
185 assert(out_size > 20);
187 for (k = 0; k<in_size; k++)
189 int c = in_buf[k] & 0xff;
191 if (c < 32 || c > 126)
195 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
196 if (strlen(out_buf) > out_size-20)
198 strcat(out_buf, "..");
204 #define REGEX_CHARS " ^[]()|.*+?!\"$"
206 static void add_non_space(const char *start, const char *end,
209 const char **map, int q_map_match)
211 size_t sz = end - start;
213 wrbuf_write(display_term, start, sz);
218 if (strchr(REGEX_CHARS, *start))
219 wrbuf_putc(term_dict, '\\');
220 wrbuf_putc(term_dict, *start);
227 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
229 wrbuf_puts(term_dict, map[0]);
234 static int term_100_icu(zebra_map_t zm,
235 const char **src, WRBUF term_dict, int space_split,
240 const char *res_buf = 0;
242 const char *display_buf;
244 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
245 &display_buf, &display_len))
247 *src += strlen(*src);
250 wrbuf_write(display_term, display_buf, display_len);
253 /* ICU sort keys seem to be of the form
254 basechars \x01 accents \x01 length
255 For now we'll just right truncate from basechars . This
256 may give false hits due to accents not being used.
259 while (--i >= 0 && res_buf[i] != '\x01')
263 while (--i >= 0 && res_buf[i] != '\x01')
267 { /* did not find base chars at all. Throw error */
270 res_len = i; /* reduce res_len */
272 for (i = 0; i < res_len; i++)
274 if (strchr(REGEX_CHARS "\\", res_buf[i]))
275 wrbuf_putc(term_dict, '\\');
277 wrbuf_putc(term_dict, 1);
279 wrbuf_putc(term_dict, res_buf[i]);
282 wrbuf_puts(term_dict, ".*");
286 /* term_100: handle term, where trunc = none(no operators at all) */
287 static int term_100(zebra_map_t zm,
288 const char **src, WRBUF term_dict, int space_split,
295 const char *space_start = 0;
296 const char *space_end = 0;
298 if (!term_pre(zm, src, 0, !space_split))
305 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
308 if (**map == *CHR_SPACE)
311 else /* complete subfield only. */
313 if (**map == *CHR_SPACE)
314 { /* save space mapping for later .. */
319 else if (space_start)
320 { /* reload last space */
321 while (space_start < space_end)
323 if (strchr(REGEX_CHARS, *space_start))
324 wrbuf_putc(term_dict, '\\');
325 wrbuf_putc(display_term, *space_start);
326 wrbuf_putc(term_dict, *space_start);
331 space_start = space_end = 0;
336 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
342 /* term_101: handle term, where trunc = Process # */
343 static int term_101(zebra_map_t zm,
344 const char **src, WRBUF term_dict, int space_split,
351 if (!term_pre(zm, src, "#", !space_split))
359 wrbuf_puts(term_dict, ".*");
360 wrbuf_putc(display_term, *s0);
367 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
368 if (space_split && **map == *CHR_SPACE)
372 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
379 /* term_103: handle term, where trunc = re-2 (regular expressions) */
380 static int term_103(zebra_map_t zm, const char **src,
381 WRBUF term_dict, int *errors, int space_split,
388 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
391 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
392 isdigit(((const unsigned char *)s0)[1]))
394 *errors = s0[1] - '0';
401 if (strchr("^\\()[].*+?|-", *s0))
403 wrbuf_putc(display_term, *s0);
404 wrbuf_putc(term_dict, *s0);
412 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
413 if (space_split && **map == *CHR_SPACE)
417 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
425 /* term_103: handle term, where trunc = re-1 (regular expressions) */
426 static int term_102(zebra_map_t zm, const char **src,
427 WRBUF term_dict, int space_split, WRBUF display_term)
429 return term_103(zm, src, term_dict, NULL, space_split, display_term);
433 /* term_104: handle term, process # and ! */
434 static int term_104(zebra_map_t zm, const char **src,
435 WRBUF term_dict, int space_split, WRBUF display_term)
441 if (!term_pre(zm, src, "?*#", !space_split))
449 wrbuf_putc(display_term, *s0);
451 if (*s0 >= '0' && *s0 <= '9')
454 while (*s0 >= '0' && *s0 <= '9')
456 limit = limit * 10 + (*s0 - '0');
457 wrbuf_putc(display_term, *s0);
464 wrbuf_puts(term_dict, ".?");
469 wrbuf_puts(term_dict, ".*");
475 wrbuf_puts(term_dict, ".*");
476 wrbuf_putc(display_term, *s0);
482 wrbuf_puts(term_dict, ".");
483 wrbuf_putc(display_term, *s0);
490 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
491 if (space_split && **map == *CHR_SPACE)
495 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
502 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
503 static int term_105(zebra_map_t zm, const char **src,
504 WRBUF term_dict, int space_split,
505 WRBUF display_term, int right_truncate)
511 if (!term_pre(zm, src, "\\*!", !space_split))
519 wrbuf_puts(term_dict, ".*");
520 wrbuf_putc(display_term, *s0);
526 wrbuf_putc(term_dict, '.');
527 wrbuf_putc(display_term, *s0);
530 else if (*s0 == '\\')
533 wrbuf_puts(term_dict, "\\\\");
534 wrbuf_putc(display_term, *s0);
541 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
542 if (space_split && **map == *CHR_SPACE)
546 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
550 wrbuf_puts(term_dict, ".*");
556 /* gen_regular_rel - generate regular expression from relation
557 * val: border value (inclusive)
558 * islt: 1 if <=; 0 if >=.
560 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
562 char dst_buf[20*5*20]; /* assuming enough for expansion */
569 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
573 strcpy(dst, "(-[0-9]+|(");
581 strcpy(dst, "([0-9]+|-(");
592 sprintf(numstr, "%d", val);
593 for (w = strlen(numstr); --w >= 0; pos++)
612 strcpy(dst + dst_p, numstr);
613 dst_p = strlen(dst) - pos - 1;
641 for (i = 0; i<pos; i++)
654 /* match everything less than 10^(pos-1) */
656 for (i = 1; i<pos; i++)
657 strcat(dst, "[0-9]?");
661 /* match everything greater than 10^pos */
662 for (i = 0; i <= pos; i++)
663 strcat(dst, "[0-9]");
664 strcat(dst, "[0-9]*");
667 wrbuf_puts(term_dict, dst);
670 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
672 const char *src = wrbuf_cstr(wsrc);
673 if (src[*indx] == '\\')
675 wrbuf_putc(term_p, src[*indx]);
678 wrbuf_putc(term_p, src[*indx]);
683 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
684 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
685 * >= abc ([b-].*|a[c-].*|ab[c-].*)
686 * ([^-a].*|a[^-b].*|ab[c-].*)
687 * < abc ([-0].*|a[-a].*|ab[-b].*)
688 * ([^a-].*|a[^b-].*|ab[^c-].*)
689 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
690 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
692 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
693 const char **term_sub, WRBUF term_dict,
694 const Odr_oid *attributeSet,
695 zebra_map_t zm, int space_split,
702 WRBUF term_component = wrbuf_alloc();
704 attr_init_APT(&relation, zapt, 2);
705 relation_value = attr_find(&relation, NULL);
708 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
709 switch (relation_value)
712 if (!term_100(zm, term_sub, term_component, space_split, display_term))
714 wrbuf_destroy(term_component);
717 yaz_log(log_level_rpn, "Relation <");
719 wrbuf_putc(term_dict, '(');
720 for (i = 0; i < wrbuf_len(term_component); )
725 wrbuf_putc(term_dict, '|');
727 string_rel_add_char(term_dict, term_component, &j);
729 wrbuf_putc(term_dict, '[');
731 wrbuf_putc(term_dict, '^');
733 wrbuf_putc(term_dict, 1);
734 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
736 string_rel_add_char(term_dict, term_component, &i);
737 wrbuf_putc(term_dict, '-');
739 wrbuf_putc(term_dict, ']');
740 wrbuf_putc(term_dict, '.');
741 wrbuf_putc(term_dict, '*');
743 wrbuf_putc(term_dict, ')');
746 if (!term_100(zm, term_sub, term_component, space_split, display_term))
748 wrbuf_destroy(term_component);
751 yaz_log(log_level_rpn, "Relation <=");
753 wrbuf_putc(term_dict, '(');
754 for (i = 0; i < wrbuf_len(term_component); )
759 string_rel_add_char(term_dict, term_component, &j);
760 wrbuf_putc(term_dict, '[');
762 wrbuf_putc(term_dict, '^');
764 wrbuf_putc(term_dict, 1);
765 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
767 string_rel_add_char(term_dict, term_component, &i);
768 wrbuf_putc(term_dict, '-');
770 wrbuf_putc(term_dict, ']');
771 wrbuf_putc(term_dict, '.');
772 wrbuf_putc(term_dict, '*');
774 wrbuf_putc(term_dict, '|');
776 for (i = 0; i < wrbuf_len(term_component); )
777 string_rel_add_char(term_dict, term_component, &i);
778 wrbuf_putc(term_dict, ')');
781 if (!term_100(zm, term_sub, term_component, space_split, display_term))
783 wrbuf_destroy(term_component);
786 yaz_log(log_level_rpn, "Relation >");
788 wrbuf_putc(term_dict, '(');
789 for (i = 0; i < wrbuf_len(term_component); )
794 string_rel_add_char(term_dict, term_component, &j);
795 wrbuf_putc(term_dict, '[');
797 wrbuf_putc(term_dict, '^');
798 wrbuf_putc(term_dict, '-');
799 string_rel_add_char(term_dict, term_component, &i);
801 wrbuf_putc(term_dict, ']');
802 wrbuf_putc(term_dict, '.');
803 wrbuf_putc(term_dict, '*');
805 wrbuf_putc(term_dict, '|');
807 for (i = 0; i < wrbuf_len(term_component); )
808 string_rel_add_char(term_dict, term_component, &i);
809 wrbuf_putc(term_dict, '.');
810 wrbuf_putc(term_dict, '+');
811 wrbuf_putc(term_dict, ')');
814 if (!term_100(zm, term_sub, term_component, space_split, display_term))
816 wrbuf_destroy(term_component);
819 yaz_log(log_level_rpn, "Relation >=");
821 wrbuf_putc(term_dict, '(');
822 for (i = 0; i < wrbuf_len(term_component); )
827 wrbuf_putc(term_dict, '|');
829 string_rel_add_char(term_dict, term_component, &j);
830 wrbuf_putc(term_dict, '[');
832 if (i < wrbuf_len(term_component)-1)
834 wrbuf_putc(term_dict, '^');
835 wrbuf_putc(term_dict, '-');
836 string_rel_add_char(term_dict, term_component, &i);
840 string_rel_add_char(term_dict, term_component, &i);
841 wrbuf_putc(term_dict, '-');
843 wrbuf_putc(term_dict, ']');
844 wrbuf_putc(term_dict, '.');
845 wrbuf_putc(term_dict, '*');
847 wrbuf_putc(term_dict, ')');
854 yaz_log(log_level_rpn, "Relation =");
855 if (!term_100(zm, term_sub, term_component, space_split, display_term))
857 wrbuf_destroy(term_component);
860 wrbuf_puts(term_dict, "(");
861 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
862 wrbuf_puts(term_dict, ")");
865 yaz_log(log_level_rpn, "Relation always matches");
866 /* skip to end of term (we don't care what it is) */
867 while (**term_sub != '\0')
871 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
872 wrbuf_destroy(term_component);
875 wrbuf_destroy(term_component);
879 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
880 const char **term_sub,
882 const Odr_oid *attributeSet, NMEM stream,
883 struct grep_info *grep_info,
884 const char *index_type, int complete_flag,
886 const char *xpath_use,
887 struct ord_list **ol,
890 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
891 Z_AttributesPlusTerm *zapt,
892 zint *hits_limit_value,
893 const char **term_ref_id_str,
896 AttrType term_ref_id_attr;
897 AttrType hits_limit_attr;
899 zint hits_limit_from_attr;
901 attr_init_APT(&hits_limit_attr, zapt, 11);
902 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
904 attr_init_APT(&term_ref_id_attr, zapt, 10);
905 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
906 if (term_ref_id_int >= 0)
908 char *res = nmem_malloc(nmem, 20);
909 sprintf(res, "%d", term_ref_id_int);
910 *term_ref_id_str = res;
912 if (hits_limit_from_attr != -1)
913 *hits_limit_value = hits_limit_from_attr;
915 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
916 *term_ref_id_str ? *term_ref_id_str : "none",
921 /** \brief search for term (which may be truncated)
923 static ZEBRA_RES search_term(ZebraHandle zh,
924 Z_AttributesPlusTerm *zapt,
925 const char **term_sub,
926 const Odr_oid *attributeSet,
927 zint hits_limit, NMEM stream,
928 struct grep_info *grep_info,
929 const char *index_type, int complete_flag,
930 const char *rank_type,
931 const char *xpath_use,
934 struct rset_key_control *kc,
939 zint hits_limit_value = hits_limit;
940 const char *term_ref_id_str = 0;
941 WRBUF term_dict = wrbuf_alloc();
942 WRBUF display_term = wrbuf_alloc();
944 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
946 grep_info->isam_p_indx = 0;
947 res = string_term(zh, zapt, term_sub, term_dict,
948 attributeSet, stream, grep_info,
949 index_type, complete_flag,
950 display_term, xpath_use, &ol, zm);
951 wrbuf_destroy(term_dict);
952 if (res == ZEBRA_OK && *term_sub)
954 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
955 *rset = rset_trunc(zh, grep_info->isam_p_buf,
956 grep_info->isam_p_indx, wrbuf_buf(display_term),
957 wrbuf_len(display_term), rank_type,
958 1 /* preserve pos */,
959 zapt->term->which, rset_nmem,
960 kc, kc->scope, ol, index_type, hits_limit_value,
965 wrbuf_destroy(display_term);
969 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
970 const char **term_sub,
972 const Odr_oid *attributeSet, NMEM stream,
973 struct grep_info *grep_info,
974 const char *index_type, int complete_flag,
976 const char *xpath_use,
977 struct ord_list **ol,
982 int truncation_value;
984 struct rpn_char_map_info rcmi;
986 int space_split = complete_flag ? 0 : 1;
989 int max_pos, prefix_len = 0;
994 *ol = ord_list_create(stream);
996 rpn_char_map_prepare(zh->reg, zm, &rcmi);
997 attr_init_APT(&truncation, zapt, 5);
998 truncation_value = attr_find(&truncation, NULL);
999 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1001 termp = *term_sub; /* start of term for each database */
1003 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1004 attributeSet, &ord) != ZEBRA_OK)
1010 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1012 *ol = ord_list_append(stream, *ol, ord);
1013 ord_len = key_SU_encode(ord, ord_buf);
1015 wrbuf_putc(term_dict, '(');
1017 for (i = 0; i<ord_len; i++)
1019 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1020 wrbuf_putc(term_dict, ord_buf[i]);
1022 wrbuf_putc(term_dict, ')');
1024 prefix_len = wrbuf_len(term_dict);
1026 if (zebra_maps_is_icu(zm))
1031 attr_init_APT(&relation, zapt, 2);
1032 relation_value = attr_find(&relation, NULL);
1033 if (relation_value == 103) /* always matches */
1034 termp += strlen(termp); /* move to end of term */
1035 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1038 switch (truncation_value)
1040 case -1: /* not specified */
1041 case 100: /* do not truncate */
1042 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1048 case 1: /* right truncation */
1049 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1056 zebra_setError_zint(zh,
1057 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1064 zebra_setError_zint(zh,
1065 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1072 /* non-ICU case. using string.chr and friends */
1073 switch (truncation_value)
1075 case -1: /* not specified */
1076 case 100: /* do not truncate */
1077 if (!string_relation(zh, zapt, &termp, term_dict,
1079 zm, space_split, display_term,
1084 zebra_setError(zh, relation_error, 0);
1091 case 1: /* right truncation */
1092 wrbuf_putc(term_dict, '(');
1093 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1098 wrbuf_puts(term_dict, ".*)");
1100 case 2: /* left truncation */
1101 wrbuf_puts(term_dict, "(.*");
1102 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1107 wrbuf_putc(term_dict, ')');
1109 case 3: /* left&right truncation */
1110 wrbuf_puts(term_dict, "(.*");
1111 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1116 wrbuf_puts(term_dict, ".*)");
1118 case 101: /* process # in term */
1119 wrbuf_putc(term_dict, '(');
1120 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1125 wrbuf_puts(term_dict, ")");
1127 case 102: /* Regexp-1 */
1128 wrbuf_putc(term_dict, '(');
1129 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1134 wrbuf_putc(term_dict, ')');
1136 case 103: /* Regexp-2 */
1138 wrbuf_putc(term_dict, '(');
1139 if (!term_103(zm, &termp, term_dict, ®ex_range,
1140 space_split, display_term))
1145 wrbuf_putc(term_dict, ')');
1147 case 104: /* process # and ! in term */
1148 wrbuf_putc(term_dict, '(');
1149 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1154 wrbuf_putc(term_dict, ')');
1156 case 105: /* process * and ! in term */
1157 wrbuf_putc(term_dict, '(');
1158 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1163 wrbuf_putc(term_dict, ')');
1165 case 106: /* process * and ! in term */
1166 wrbuf_putc(term_dict, '(');
1167 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1172 wrbuf_putc(term_dict, ')');
1175 zebra_setError_zint(zh,
1176 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1184 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1185 esc_str(buf, sizeof(buf), input, strlen(input));
1188 WRBUF pr_wr = wrbuf_alloc();
1190 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1191 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1192 wrbuf_destroy(pr_wr);
1194 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 zebra_set_partial_result(zh);
1201 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1203 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 static void grep_info_delete(struct grep_info *grep_info)
1212 xfree(grep_info->term_no);
1214 xfree(grep_info->isam_p_buf);
1217 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1218 Z_AttributesPlusTerm *zapt,
1219 struct grep_info *grep_info,
1220 const char *index_type)
1223 grep_info->term_no = 0;
1225 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1226 grep_info->isam_p_size = 0;
1227 grep_info->isam_p_buf = NULL;
1229 grep_info->index_type = index_type;
1230 grep_info->termset = 0;
1236 attr_init_APT(&truncmax, zapt, 13);
1237 truncmax_value = attr_find(&truncmax, NULL);
1238 if (truncmax_value != -1)
1239 grep_info->trunc_max = truncmax_value;
1244 int termset_value_numeric;
1245 const char *termset_value_string;
1247 attr_init_APT(&termset, zapt, 8);
1248 termset_value_numeric =
1249 attr_find_ex(&termset, NULL, &termset_value_string);
1250 if (termset_value_numeric != -1)
1253 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1257 const char *termset_name = 0;
1258 if (termset_value_numeric != -2)
1261 sprintf(resname, "%d", termset_value_numeric);
1262 termset_name = resname;
1265 termset_name = termset_value_string;
1266 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1267 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1268 if (!grep_info->termset)
1270 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1279 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1280 Z_AttributesPlusTerm *zapt,
1282 const Odr_oid *attributeSet,
1285 const char *index_type, int complete_flag,
1286 const char *rank_type,
1287 const char *xpath_use,
1289 RSET **result_sets, int *num_result_sets,
1290 struct rset_key_control *kc,
1293 struct grep_info grep_info;
1294 const char *termp = termz;
1297 *num_result_sets = 0;
1298 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1304 if (alloc_sets == *num_result_sets)
1307 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1310 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1311 alloc_sets = alloc_sets + add;
1312 *result_sets = rnew;
1314 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1316 index_type, complete_flag,
1318 xpath_use, rset_nmem,
1319 &(*result_sets)[*num_result_sets],
1321 if (res != ZEBRA_OK)
1324 for (i = 0; i < *num_result_sets; i++)
1325 rset_delete((*result_sets)[i]);
1326 grep_info_delete(&grep_info);
1329 if ((*result_sets)[*num_result_sets] == 0)
1331 (*num_result_sets)++;
1336 grep_info_delete(&grep_info);
1341 \brief Create result set(s) for list of terms
1342 \param zh Zebra Handle
1343 \param zapt Attributes Plust Term (RPN leaf)
1344 \param termz term as used in query but converted to UTF-8
1345 \param attributeSet default attribute set
1346 \param stream memory for result
1347 \param index_type register type ("w", "p",..)
1348 \param complete_flag whether it's phrases or not
1349 \param rank_type term flags for ranking
1350 \param xpath_use use attribute for X-Path (-1 for no X-path)
1351 \param rset_nmem memory for result sets
1352 \param result_sets output result set for each term in list (output)
1353 \param num_result_sets number of output result sets
1354 \param kc rset key control to be used for created result sets
1356 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1357 Z_AttributesPlusTerm *zapt,
1359 const Odr_oid *attributeSet,
1362 const char *index_type, int complete_flag,
1363 const char *rank_type,
1364 const char *xpath_use,
1366 RSET **result_sets, int *num_result_sets,
1367 struct rset_key_control *kc)
1369 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1370 if (zebra_maps_is_icu(zm))
1371 zebra_map_tokenize_start(zm, termz, strlen(termz));
1372 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1373 stream, index_type, complete_flag,
1374 rank_type, xpath_use,
1375 rset_nmem, result_sets, num_result_sets,
1380 /** \brief limit a search by position - returns result set
1382 static ZEBRA_RES search_position(ZebraHandle zh,
1383 Z_AttributesPlusTerm *zapt,
1384 const Odr_oid *attributeSet,
1385 const char *index_type,
1388 struct rset_key_control *kc)
1394 char term_dict[100];
1398 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1400 attr_init_APT(&position, zapt, 3);
1401 position_value = attr_find(&position, NULL);
1402 switch(position_value)
1411 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1417 if (!zebra_maps_is_first_in_field(zm))
1419 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1424 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1425 attributeSet, &ord) != ZEBRA_OK)
1429 ord_len = key_SU_encode(ord, ord_buf);
1430 memcpy(term_dict, ord_buf, ord_len);
1431 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1432 val = dict_lookup(zh->reg->dict, term_dict);
1435 assert(*val == sizeof(ISAM_P));
1436 memcpy(&isam_p, val+1, sizeof(isam_p));
1438 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1444 /** \brief returns result set for phrase search
1446 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1447 Z_AttributesPlusTerm *zapt,
1448 const char *termz_org,
1449 const Odr_oid *attributeSet,
1452 const char *index_type,
1454 const char *rank_type,
1455 const char *xpath_use,
1458 struct rset_key_control *kc)
1460 RSET *result_sets = 0;
1461 int num_result_sets = 0;
1463 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1464 stream, index_type, complete_flag,
1465 rank_type, xpath_use,
1467 &result_sets, &num_result_sets, kc);
1469 if (res != ZEBRA_OK)
1472 if (num_result_sets > 0)
1475 res = search_position(zh, zapt, attributeSet,
1477 rset_nmem, &first_set,
1479 if (res != ZEBRA_OK)
1482 for (i = 0; i<num_result_sets; i++)
1483 rset_delete(result_sets[i]);
1488 RSET *nsets = nmem_malloc(stream,
1489 sizeof(RSET) * (num_result_sets+1));
1490 nsets[0] = first_set;
1491 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1492 result_sets = nsets;
1496 if (num_result_sets == 0)
1497 *rset = rset_create_null(rset_nmem, kc, 0);
1498 else if (num_result_sets == 1)
1499 *rset = result_sets[0];
1501 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1502 num_result_sets, result_sets,
1503 1 /* ordered */, 0 /* exclusion */,
1504 3 /* relation */, 1 /* distance */);
1510 /** \brief returns result set for or-list search
1512 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1513 Z_AttributesPlusTerm *zapt,
1514 const char *termz_org,
1515 const Odr_oid *attributeSet,
1518 const char *index_type,
1520 const char *rank_type,
1521 const char *xpath_use,
1524 struct rset_key_control *kc)
1526 RSET *result_sets = 0;
1527 int num_result_sets = 0;
1530 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1531 stream, index_type, complete_flag,
1532 rank_type, xpath_use,
1534 &result_sets, &num_result_sets, kc);
1535 if (res != ZEBRA_OK)
1538 for (i = 0; i<num_result_sets; i++)
1541 res = search_position(zh, zapt, attributeSet,
1543 rset_nmem, &first_set,
1545 if (res != ZEBRA_OK)
1547 for (i = 0; i<num_result_sets; i++)
1548 rset_delete(result_sets[i]);
1556 tmp_set[0] = first_set;
1557 tmp_set[1] = result_sets[i];
1559 result_sets[i] = rset_create_prox(
1560 rset_nmem, kc, kc->scope,
1562 1 /* ordered */, 0 /* exclusion */,
1563 3 /* relation */, 1 /* distance */);
1566 if (num_result_sets == 0)
1567 *rset = rset_create_null(rset_nmem, kc, 0);
1568 else if (num_result_sets == 1)
1569 *rset = result_sets[0];
1571 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1572 num_result_sets, result_sets);
1578 /** \brief returns result set for and-list search
1580 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1581 Z_AttributesPlusTerm *zapt,
1582 const char *termz_org,
1583 const Odr_oid *attributeSet,
1586 const char *index_type,
1588 const char *rank_type,
1589 const char *xpath_use,
1592 struct rset_key_control *kc)
1594 RSET *result_sets = 0;
1595 int num_result_sets = 0;
1598 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1599 stream, index_type, complete_flag,
1600 rank_type, xpath_use,
1602 &result_sets, &num_result_sets,
1604 if (res != ZEBRA_OK)
1606 for (i = 0; i<num_result_sets; i++)
1609 res = search_position(zh, zapt, attributeSet,
1611 rset_nmem, &first_set,
1613 if (res != ZEBRA_OK)
1615 for (i = 0; i<num_result_sets; i++)
1616 rset_delete(result_sets[i]);
1624 tmp_set[0] = first_set;
1625 tmp_set[1] = result_sets[i];
1627 result_sets[i] = rset_create_prox(
1628 rset_nmem, kc, kc->scope,
1630 1 /* ordered */, 0 /* exclusion */,
1631 3 /* relation */, 1 /* distance */);
1636 if (num_result_sets == 0)
1637 *rset = rset_create_null(rset_nmem, kc, 0);
1638 else if (num_result_sets == 1)
1639 *rset = result_sets[0];
1641 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1642 num_result_sets, result_sets);
1648 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1649 const char **term_sub,
1651 const Odr_oid *attributeSet,
1652 struct grep_info *grep_info,
1662 WRBUF term_num = wrbuf_alloc();
1665 attr_init_APT(&relation, zapt, 2);
1666 relation_value = attr_find(&relation, NULL);
1668 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1670 switch (relation_value)
1673 yaz_log(log_level_rpn, "Relation <");
1674 if (!term_100(zm, term_sub, term_num, 1, display_term))
1676 wrbuf_destroy(term_num);
1679 term_value = atoi(wrbuf_cstr(term_num));
1680 gen_regular_rel(term_dict, term_value-1, 1);
1683 yaz_log(log_level_rpn, "Relation <=");
1684 if (!term_100(zm, term_sub, term_num, 1, display_term))
1686 wrbuf_destroy(term_num);
1689 term_value = atoi(wrbuf_cstr(term_num));
1690 gen_regular_rel(term_dict, term_value, 1);
1693 yaz_log(log_level_rpn, "Relation >=");
1694 if (!term_100(zm, term_sub, term_num, 1, display_term))
1696 wrbuf_destroy(term_num);
1699 term_value = atoi(wrbuf_cstr(term_num));
1700 gen_regular_rel(term_dict, term_value, 0);
1703 yaz_log(log_level_rpn, "Relation >");
1704 if (!term_100(zm, term_sub, term_num, 1, display_term))
1706 wrbuf_destroy(term_num);
1709 term_value = atoi(wrbuf_cstr(term_num));
1710 gen_regular_rel(term_dict, term_value+1, 0);
1714 yaz_log(log_level_rpn, "Relation =");
1715 if (!term_100(zm, term_sub, term_num, 1, display_term))
1717 wrbuf_destroy(term_num);
1720 term_value = atoi(wrbuf_cstr(term_num));
1721 wrbuf_printf(term_dict, "(0*%d)", term_value);
1724 /* term_tmp untouched.. */
1725 while (**term_sub != '\0')
1729 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1730 wrbuf_destroy(term_num);
1733 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1734 0, grep_info, max_pos, 0, grep_handle);
1737 zebra_set_partial_result(zh);
1739 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1740 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1741 wrbuf_destroy(term_num);
1745 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1746 const char **term_sub,
1748 const Odr_oid *attributeSet, NMEM stream,
1749 struct grep_info *grep_info,
1750 const char *index_type, int complete_flag,
1752 const char *xpath_use,
1753 struct ord_list **ol)
1756 struct rpn_char_map_info rcmi;
1758 int relation_error = 0;
1759 int ord, ord_len, i;
1761 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1763 *ol = ord_list_create(stream);
1765 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1769 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1770 attributeSet, &ord) != ZEBRA_OK)
1775 wrbuf_rewind(term_dict);
1777 *ol = ord_list_append(stream, *ol, ord);
1779 ord_len = key_SU_encode(ord, ord_buf);
1781 wrbuf_putc(term_dict, '(');
1782 for (i = 0; i < ord_len; i++)
1784 wrbuf_putc(term_dict, 1);
1785 wrbuf_putc(term_dict, ord_buf[i]);
1787 wrbuf_putc(term_dict, ')');
1789 if (!numeric_relation(zh, zapt, &termp, term_dict,
1790 attributeSet, grep_info, &max_pos, zm,
1791 display_term, &relation_error))
1795 zebra_setError(zh, relation_error, 0);
1802 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1807 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1808 Z_AttributesPlusTerm *zapt,
1810 const Odr_oid *attributeSet,
1812 const char *index_type,
1814 const char *rank_type,
1815 const char *xpath_use,
1818 struct rset_key_control *kc)
1820 const char *termp = termz;
1821 RSET *result_sets = 0;
1822 int num_result_sets = 0;
1824 struct grep_info grep_info;
1826 zint hits_limit_value;
1827 const char *term_ref_id_str = 0;
1829 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1832 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1833 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1837 struct ord_list *ol;
1838 WRBUF term_dict = wrbuf_alloc();
1839 WRBUF display_term = wrbuf_alloc();
1840 if (alloc_sets == num_result_sets)
1843 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1846 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1847 alloc_sets = alloc_sets + add;
1850 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1851 grep_info.isam_p_indx = 0;
1852 res = numeric_term(zh, zapt, &termp, term_dict,
1853 attributeSet, stream, &grep_info,
1854 index_type, complete_flag,
1855 display_term, xpath_use, &ol);
1856 wrbuf_destroy(term_dict);
1857 if (res == ZEBRA_FAIL || termp == 0)
1859 wrbuf_destroy(display_term);
1862 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1863 result_sets[num_result_sets] =
1864 rset_trunc(zh, grep_info.isam_p_buf,
1865 grep_info.isam_p_indx, wrbuf_buf(display_term),
1866 wrbuf_len(display_term), rank_type,
1867 0 /* preserve position */,
1868 zapt->term->which, rset_nmem,
1869 kc, kc->scope, ol, index_type,
1872 wrbuf_destroy(display_term);
1873 if (!result_sets[num_result_sets])
1879 grep_info_delete(&grep_info);
1881 if (res != ZEBRA_OK)
1883 if (num_result_sets == 0)
1884 *rset = rset_create_null(rset_nmem, kc, 0);
1885 else if (num_result_sets == 1)
1886 *rset = result_sets[0];
1888 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1889 num_result_sets, result_sets);
1895 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1896 Z_AttributesPlusTerm *zapt,
1898 const Odr_oid *attributeSet,
1900 const char *rank_type, NMEM rset_nmem,
1902 struct rset_key_control *kc)
1905 zint sysno = atozint(termz);
1909 rec = rec_get(zh->reg->records, sysno);
1917 *rset = rset_create_null(rset_nmem, kc, 0);
1923 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1924 res_get(zh->res, "setTmpDir"), 0);
1925 rsfd = rset_open(*rset, RSETF_WRITE);
1930 rset_write(rsfd, &key);
1936 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1937 const Odr_oid *attributeSet, NMEM stream,
1938 Z_SortKeySpecList *sort_sequence,
1939 const char *rank_type,
1942 struct rset_key_control *kc)
1945 int sort_relation_value;
1946 AttrType sort_relation_type;
1951 attr_init_APT(&sort_relation_type, zapt, 7);
1952 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1954 if (!sort_sequence->specs)
1956 sort_sequence->num_specs = 10;
1957 sort_sequence->specs = (Z_SortKeySpec **)
1958 nmem_malloc(stream, sort_sequence->num_specs *
1959 sizeof(*sort_sequence->specs));
1960 for (i = 0; i<sort_sequence->num_specs; i++)
1961 sort_sequence->specs[i] = 0;
1963 if (zapt->term->which != Z_Term_general)
1966 i = atoi_n((char *) zapt->term->u.general->buf,
1967 zapt->term->u.general->len);
1968 if (i >= sort_sequence->num_specs)
1970 sprintf(termz, "%d", i);
1972 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1973 sks->sortElement = (Z_SortElement *)
1974 nmem_malloc(stream, sizeof(*sks->sortElement));
1975 sks->sortElement->which = Z_SortElement_generic;
1976 sk = sks->sortElement->u.generic = (Z_SortKey *)
1977 nmem_malloc(stream, sizeof(*sk));
1978 sk->which = Z_SortKey_sortAttributes;
1979 sk->u.sortAttributes = (Z_SortAttributes *)
1980 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1982 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1983 sk->u.sortAttributes->list = zapt->attributes;
1985 sks->sortRelation = (Odr_int *)
1986 nmem_malloc(stream, sizeof(*sks->sortRelation));
1987 if (sort_relation_value == 1)
1988 *sks->sortRelation = Z_SortKeySpec_ascending;
1989 else if (sort_relation_value == 2)
1990 *sks->sortRelation = Z_SortKeySpec_descending;
1992 *sks->sortRelation = Z_SortKeySpec_ascending;
1994 sks->caseSensitivity = (Odr_int *)
1995 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1996 *sks->caseSensitivity = 0;
1998 sks->which = Z_SortKeySpec_null;
1999 sks->u.null = odr_nullval ();
2000 sort_sequence->specs[i] = sks;
2001 *rset = rset_create_null(rset_nmem, kc, 0);
2006 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2007 const Odr_oid *attributeSet,
2008 struct xpath_location_step *xpath, int max,
2011 const Odr_oid *curAttributeSet = attributeSet;
2013 const char *use_string = 0;
2015 attr_init_APT(&use, zapt, 1);
2016 attr_find_ex(&use, &curAttributeSet, &use_string);
2018 if (!use_string || *use_string != '/')
2021 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2026 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2027 const char *index_type, const char *term,
2028 const char *xpath_use,
2030 struct rset_key_control *kc)
2032 struct grep_info grep_info;
2033 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2034 zinfo_index_category_index,
2035 index_type, xpath_use);
2036 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2037 return rset_create_null(rset_nmem, kc, 0);
2040 return rset_create_null(rset_nmem, kc, 0);
2046 WRBUF term_dict = wrbuf_alloc();
2047 int ord_len = key_SU_encode(ord, ord_buf);
2048 int term_type = Z_Term_characterString;
2049 const char *flags = "void";
2051 wrbuf_putc(term_dict, '(');
2052 for (i = 0; i<ord_len; i++)
2054 wrbuf_putc(term_dict, 1);
2055 wrbuf_putc(term_dict, ord_buf[i]);
2057 wrbuf_putc(term_dict, ')');
2058 wrbuf_puts(term_dict, term);
2060 grep_info.isam_p_indx = 0;
2061 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2062 &grep_info, &max_pos, 0, grep_handle);
2063 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2064 grep_info.isam_p_indx);
2065 rset = rset_trunc(zh, grep_info.isam_p_buf,
2066 grep_info.isam_p_indx, term, strlen(term),
2067 flags, 1, term_type, rset_nmem,
2068 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2069 0 /* term_ref_id_str */);
2070 grep_info_delete(&grep_info);
2071 wrbuf_destroy(term_dict);
2077 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2078 NMEM stream, const char *rank_type, RSET rset,
2079 int xpath_len, struct xpath_location_step *xpath,
2082 struct rset_key_control *kc)
2085 int always_matches = rset ? 0 : 1;
2093 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2094 for (i = 0; i<xpath_len; i++)
2096 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2108 a[@attr = value]/b[@other = othervalue]
2110 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2111 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2112 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2113 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2114 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2115 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2119 dict_grep_cmap(zh->reg->dict, 0, 0);
2122 int level = xpath_len;
2125 while (--level >= 0)
2127 WRBUF xpath_rev = wrbuf_alloc();
2129 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2131 for (i = level; i >= 1; --i)
2133 const char *cp = xpath[i].part;
2139 wrbuf_puts(xpath_rev, "[^/]*");
2140 else if (*cp == ' ')
2141 wrbuf_puts(xpath_rev, "\001 ");
2143 wrbuf_putc(xpath_rev, *cp);
2145 /* wrbuf_putc does not null-terminate , but
2146 wrbuf_puts below ensures it does.. so xpath_rev
2147 is OK iff length is > 0 */
2149 wrbuf_puts(xpath_rev, "/");
2151 else if (i == 1) /* // case */
2152 wrbuf_puts(xpath_rev, ".*");
2154 if (xpath[level].predicate &&
2155 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2156 xpath[level].predicate->u.relation.name[0])
2158 WRBUF wbuf = wrbuf_alloc();
2159 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2160 if (xpath[level].predicate->u.relation.value)
2162 const char *cp = xpath[level].predicate->u.relation.value;
2163 wrbuf_putc(wbuf, '=');
2167 if (strchr(REGEX_CHARS, *cp))
2168 wrbuf_putc(wbuf, '\\');
2169 wrbuf_putc(wbuf, *cp);
2173 rset_attr = xpath_trunc(
2174 zh, stream, "0", wrbuf_cstr(wbuf),
2175 ZEBRA_XPATH_ATTR_NAME,
2177 wrbuf_destroy(wbuf);
2183 wrbuf_destroy(xpath_rev);
2187 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2188 wrbuf_cstr(xpath_rev));
2189 if (wrbuf_len(xpath_rev))
2191 rset_start_tag = xpath_trunc(zh, stream, "0",
2192 wrbuf_cstr(xpath_rev),
2193 ZEBRA_XPATH_ELM_BEGIN,
2196 rset = rset_start_tag;
2199 rset_end_tag = xpath_trunc(zh, stream, "0",
2200 wrbuf_cstr(xpath_rev),
2201 ZEBRA_XPATH_ELM_END,
2204 rset = rset_create_between(rset_nmem, kc, kc->scope,
2205 rset_start_tag, rset,
2206 rset_end_tag, rset_attr);
2209 wrbuf_destroy(xpath_rev);
2217 #define MAX_XPATH_STEPS 10
2219 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2220 Z_AttributesPlusTerm *zapt,
2221 const Odr_oid *attributeSet,
2222 zint hits_limit, NMEM stream,
2223 Z_SortKeySpecList *sort_sequence,
2226 struct rset_key_control *kc);
2228 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2229 const Odr_oid *attributeSet,
2230 zint hits_limit, NMEM stream,
2231 Z_SortKeySpecList *sort_sequence,
2232 int num_bases, const char **basenames,
2235 struct rset_key_control *kc)
2237 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2238 ZEBRA_RES res = ZEBRA_OK;
2240 for (i = 0; i < num_bases; i++)
2243 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2245 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2250 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2252 rset_nmem, rsets+i, kc);
2253 if (res != ZEBRA_OK)
2256 if (res != ZEBRA_OK)
2257 { /* must clean up the already created sets */
2259 rset_delete(rsets[i]);
2266 else if (num_bases == 0)
2267 *rset = rset_create_null(rset_nmem, kc, 0);
2269 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2275 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2276 Z_AttributesPlusTerm *zapt,
2277 const Odr_oid *attributeSet,
2278 zint hits_limit, NMEM stream,
2279 Z_SortKeySpecList *sort_sequence,
2282 struct rset_key_control *kc)
2284 ZEBRA_RES res = ZEBRA_OK;
2285 const char *index_type;
2286 char *search_type = NULL;
2287 char rank_type[128];
2290 char termz[IT_MAX_WORD+1];
2292 const char *xpath_use = 0;
2293 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2297 log_level_rpn = yaz_log_module_level("rpn");
2300 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2301 rank_type, &complete_flag, &sort_flag);
2303 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2304 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2305 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2306 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2308 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2312 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2313 rank_type, rset_nmem, rset, kc);
2314 /* consider if an X-Path query is used */
2315 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2316 xpath, MAX_XPATH_STEPS, stream);
2319 if (xpath[xpath_len-1].part[0] == '@')
2320 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2322 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2329 attr_init_APT(&relation, zapt, 2);
2330 relation_value = attr_find(&relation, NULL);
2332 if (relation_value == 103) /* alwaysmatches */
2334 *rset = 0; /* signal no "term" set */
2335 return rpn_search_xpath(zh, stream, rank_type, *rset,
2336 xpath_len, xpath, rset_nmem, rset, kc);
2341 /* search using one of the various search type strategies
2342 termz is our UTF-8 search term
2343 attributeSet is top-level default attribute set
2344 stream is ODR for search
2345 reg_id is the register type
2346 complete_flag is 1 for complete subfield, 0 for incomplete
2347 xpath_use is use-attribute to be used for X-Path search, 0 for none
2349 if (!strcmp(search_type, "phrase"))
2351 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2353 index_type, complete_flag, rank_type,
2358 else if (!strcmp(search_type, "and-list"))
2360 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2362 index_type, complete_flag, rank_type,
2367 else if (!strcmp(search_type, "or-list"))
2369 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2371 index_type, complete_flag, rank_type,
2376 else if (!strcmp(search_type, "local"))
2378 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2379 rank_type, rset_nmem, rset, kc);
2381 else if (!strcmp(search_type, "numeric"))
2383 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2384 index_type, complete_flag, rank_type,
2391 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2394 if (res != ZEBRA_OK)
2398 return rpn_search_xpath(zh, stream, rank_type, *rset,
2399 xpath_len, xpath, rset_nmem, rset, kc);
2402 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2403 const Odr_oid *attributeSet,
2405 NMEM stream, NMEM rset_nmem,
2406 Z_SortKeySpecList *sort_sequence,
2407 int num_bases, const char **basenames,
2408 RSET **result_sets, int *num_result_sets,
2409 Z_Operator *parent_op,
2410 struct rset_key_control *kc);
2412 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2415 ZEBRA_RES res = ZEBRA_OK;
2416 if (zs->which == Z_RPNStructure_complex)
2418 if (res == ZEBRA_OK)
2419 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2421 if (res == ZEBRA_OK)
2422 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2425 else if (zs->which == Z_RPNStructure_simple)
2427 if (zs->u.simple->which == Z_Operand_APT)
2429 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2430 AttrType global_hits_limit_attr;
2433 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2435 l = attr_find(&global_hits_limit_attr, NULL);
2443 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2444 const Odr_oid *attributeSet,
2446 NMEM stream, NMEM rset_nmem,
2447 Z_SortKeySpecList *sort_sequence,
2448 int num_bases, const char **basenames,
2451 RSET *result_sets = 0;
2452 int num_result_sets = 0;
2454 struct rset_key_control *kc = zebra_key_control_create(zh);
2456 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2459 num_bases, basenames,
2460 &result_sets, &num_result_sets,
2461 0 /* no parent op */,
2463 if (res != ZEBRA_OK)
2466 for (i = 0; i<num_result_sets; i++)
2467 rset_delete(result_sets[i]);
2472 assert(num_result_sets == 1);
2473 assert(result_sets);
2474 assert(*result_sets);
2475 *result_set = *result_sets;
2481 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2482 const Odr_oid *attributeSet, zint hits_limit,
2483 NMEM stream, NMEM rset_nmem,
2484 Z_SortKeySpecList *sort_sequence,
2485 int num_bases, const char **basenames,
2486 RSET **result_sets, int *num_result_sets,
2487 Z_Operator *parent_op,
2488 struct rset_key_control *kc)
2490 *num_result_sets = 0;
2491 if (zs->which == Z_RPNStructure_complex)
2494 Z_Operator *zop = zs->u.complex->roperator;
2495 RSET *result_sets_l = 0;
2496 int num_result_sets_l = 0;
2497 RSET *result_sets_r = 0;
2498 int num_result_sets_r = 0;
2500 res = rpn_search_structure(zh, zs->u.complex->s1,
2501 attributeSet, hits_limit, stream, rset_nmem,
2503 num_bases, basenames,
2504 &result_sets_l, &num_result_sets_l,
2506 if (res != ZEBRA_OK)
2509 for (i = 0; i<num_result_sets_l; i++)
2510 rset_delete(result_sets_l[i]);
2513 res = rpn_search_structure(zh, zs->u.complex->s2,
2514 attributeSet, hits_limit, stream, rset_nmem,
2516 num_bases, basenames,
2517 &result_sets_r, &num_result_sets_r,
2519 if (res != ZEBRA_OK)
2522 for (i = 0; i<num_result_sets_l; i++)
2523 rset_delete(result_sets_l[i]);
2524 for (i = 0; i<num_result_sets_r; i++)
2525 rset_delete(result_sets_r[i]);
2529 /* make a new list of result for all children */
2530 *num_result_sets = num_result_sets_l + num_result_sets_r;
2531 *result_sets = nmem_malloc(stream, *num_result_sets *
2532 sizeof(**result_sets));
2533 memcpy(*result_sets, result_sets_l,
2534 num_result_sets_l * sizeof(**result_sets));
2535 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2536 num_result_sets_r * sizeof(**result_sets));
2538 if (!parent_op || parent_op->which != zop->which
2539 || (zop->which != Z_Operator_and &&
2540 zop->which != Z_Operator_or))
2542 /* parent node different from this one (or non-present) */
2543 /* we must combine result sets now */
2547 case Z_Operator_and:
2548 rset = rset_create_and(rset_nmem, kc,
2550 *num_result_sets, *result_sets);
2553 rset = rset_create_or(rset_nmem, kc,
2554 kc->scope, 0, /* termid */
2555 *num_result_sets, *result_sets);
2557 case Z_Operator_and_not:
2558 rset = rset_create_not(rset_nmem, kc,
2563 case Z_Operator_prox:
2564 if (zop->u.prox->which != Z_ProximityOperator_known)
2567 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2571 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2573 zebra_setError_zint(zh,
2574 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2575 *zop->u.prox->u.known);
2580 rset = rset_create_prox(rset_nmem, kc,
2582 *num_result_sets, *result_sets,
2583 *zop->u.prox->ordered,
2584 (!zop->u.prox->exclusion ?
2585 0 : *zop->u.prox->exclusion),
2586 *zop->u.prox->relationType,
2587 *zop->u.prox->distance );
2591 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2594 *num_result_sets = 1;
2595 *result_sets = nmem_malloc(stream, *num_result_sets *
2596 sizeof(**result_sets));
2597 (*result_sets)[0] = rset;
2600 else if (zs->which == Z_RPNStructure_simple)
2605 if (zs->u.simple->which == Z_Operand_APT)
2607 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2608 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2609 attributeSet, hits_limit,
2610 stream, sort_sequence,
2611 num_bases, basenames, rset_nmem, &rset,
2613 if (res != ZEBRA_OK)
2616 else if (zs->u.simple->which == Z_Operand_resultSetId)
2618 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2619 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2623 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2624 zs->u.simple->u.resultSetId);
2631 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2634 *num_result_sets = 1;
2635 *result_sets = nmem_malloc(stream, *num_result_sets *
2636 sizeof(**result_sets));
2637 (*result_sets)[0] = rset;
2641 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2652 * c-file-style: "Stroustrup"
2653 * indent-tabs-mode: nil
2655 * vim: shiftwidth=4 tabstop=8 expandtab