1 /* This file is part of the Zebra server.
2 Copyright (C) 2004-2013 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 static int term_102_icu(zebra_map_t zm,
238 const char **src, WRBUF term_dict, int space_split,
242 const char *s0 = *src, *s1;
248 if (*s1 == ' ' && space_split)
250 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
254 /* EOF or regex reserved char */
257 const char *res_buf = 0;
259 const char *display_buf;
262 zebra_map_tokenize_start(zm, s0, s1 - s0);
264 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
265 &display_buf, &display_len))
268 while (i > 0 && res_buf[--i] != '\x01')
270 while (i > 0 && res_buf[--i] != '\x01')
272 res_len = i; /* reduce res_len */
273 for (i = 0; i < res_len; i++)
275 if (strchr(REGEX_CHARS "\\", res_buf[i]))
276 wrbuf_putc(term_dict, '\\');
278 wrbuf_putc(term_dict, '\x01');
280 wrbuf_putc(term_dict, res_buf[i]);
282 wrbuf_write(display_term, display_buf, display_len);
290 wrbuf_putc(term_dict, *s1);
291 wrbuf_putc(display_term, *s1);
298 wrbuf_puts(term_dict, "\x01\x01.*");
303 static int term_100_icu(zebra_map_t zm,
304 const char **src, WRBUF term_dict, int space_split,
309 const char *res_buf = 0;
311 const char *display_buf;
313 const char *s0 = *src, *s1;
324 while (*s1 && *s1 != ' ')
328 s1 = s0 + strlen(s0);
332 zebra_map_tokenize_start(zm, s0, s1 - s0);
334 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
335 &display_buf, &display_len))
339 wrbuf_write(display_term, display_buf, display_len);
342 /* ICU sort keys seem to be of the form
343 basechars \x01 accents \x01 length
344 For now we'll just right truncate from basechars . This
345 may give false hits due to accents not being used.
348 while (i > 0 && res_buf[--i] != '\x01')
350 while (i > 0 && res_buf[--i] != '\x01')
353 { /* did not find base chars at all. Throw error */
356 res_len = i; /* reduce res_len */
359 wrbuf_puts(term_dict, ".*");
360 for (i = 0; i < res_len; i++)
362 if (strchr(REGEX_CHARS "\\", res_buf[i]))
363 wrbuf_putc(term_dict, '\\');
365 wrbuf_putc(term_dict, '\x01');
367 wrbuf_putc(term_dict, res_buf[i]);
370 wrbuf_puts(term_dict, ".*");
372 wrbuf_puts(term_dict, "\x01\x01.*");
376 /* term_100: handle term, where trunc = none(no operators at all) */
377 static int term_100(zebra_map_t zm,
378 const char **src, WRBUF term_dict, int space_split,
385 const char *space_start = 0;
386 const char *space_end = 0;
388 if (!term_pre(zm, src, 0, !space_split))
395 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
398 if (**map == *CHR_SPACE)
401 else /* complete subfield only. */
403 if (**map == *CHR_SPACE)
404 { /* save space mapping for later .. */
409 else if (space_start)
410 { /* reload last space */
411 while (space_start < space_end)
413 if (strchr(REGEX_CHARS, *space_start))
414 wrbuf_putc(term_dict, '\\');
415 wrbuf_putc(display_term, *space_start);
416 wrbuf_putc(term_dict, *space_start);
421 space_start = space_end = 0;
426 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
432 /* term_101: handle term, where trunc = Process # */
433 static int term_101(zebra_map_t zm,
434 const char **src, WRBUF term_dict, int space_split,
441 if (!term_pre(zm, src, "#", !space_split))
449 wrbuf_puts(term_dict, ".*");
450 wrbuf_putc(display_term, *s0);
457 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
458 if (space_split && **map == *CHR_SPACE)
462 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
469 /* term_103: handle term, where trunc = re-2 (regular expressions) */
470 static int term_103(zebra_map_t zm, const char **src,
471 WRBUF term_dict, int *errors, int space_split,
478 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
481 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
482 isdigit(((const unsigned char *)s0)[1]))
484 *errors = s0[1] - '0';
491 if (strchr("^\\()[].*+?|-", *s0))
493 wrbuf_putc(display_term, *s0);
494 wrbuf_putc(term_dict, *s0);
502 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
503 if (space_split && **map == *CHR_SPACE)
507 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
515 /* term_103: handle term, where trunc = re-1 (regular expressions) */
516 static int term_102(zebra_map_t zm, const char **src,
517 WRBUF term_dict, int space_split, WRBUF display_term)
519 return term_103(zm, src, term_dict, NULL, space_split, display_term);
523 /* term_104: handle term, process ?n * # */
524 static int term_104(zebra_map_t zm, const char **src,
525 WRBUF term_dict, int space_split, WRBUF display_term)
531 if (!term_pre(zm, src, "?*#", !space_split))
539 wrbuf_putc(display_term, *s0);
541 if (*s0 >= '0' && *s0 <= '9')
544 while (*s0 >= '0' && *s0 <= '9')
546 limit = limit * 10 + (*s0 - '0');
547 wrbuf_putc(display_term, *s0);
554 wrbuf_puts(term_dict, ".?");
559 wrbuf_puts(term_dict, ".*");
565 wrbuf_puts(term_dict, ".*");
566 wrbuf_putc(display_term, *s0);
572 wrbuf_puts(term_dict, ".");
573 wrbuf_putc(display_term, *s0);
580 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
581 if (space_split && **map == *CHR_SPACE)
585 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
592 /* term_105/106: handle term, process * ! and possibly right_truncate */
593 static int term_105(zebra_map_t zm, const char **src,
594 WRBUF term_dict, int space_split,
595 WRBUF display_term, int right_truncate)
601 if (!term_pre(zm, src, "\\*!", !space_split))
609 wrbuf_puts(term_dict, ".*");
610 wrbuf_putc(display_term, *s0);
616 wrbuf_putc(term_dict, '.');
617 wrbuf_putc(display_term, *s0);
620 else if (*s0 == '\\')
623 wrbuf_puts(term_dict, "\\\\");
624 wrbuf_putc(display_term, *s0);
631 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
632 if (space_split && **map == *CHR_SPACE)
636 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
640 wrbuf_puts(term_dict, ".*");
646 /* gen_regular_rel - generate regular expression from relation
647 * val: border value (inclusive)
648 * islt: 1 if <=; 0 if >=.
650 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
652 char dst_buf[20*5*20]; /* assuming enough for expansion */
659 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
663 strcpy(dst, "(-[0-9]+|(");
671 strcpy(dst, "([0-9]+|-(");
682 sprintf(numstr, "%d", val);
683 for (w = strlen(numstr); --w >= 0; pos++)
702 strcpy(dst + dst_p, numstr);
703 dst_p = strlen(dst) - pos - 1;
731 for (i = 0; i<pos; i++)
744 /* match everything less than 10^(pos-1) */
746 for (i = 1; i<pos; i++)
747 strcat(dst, "[0-9]?");
751 /* match everything greater than 10^pos */
752 for (i = 0; i <= pos; i++)
753 strcat(dst, "[0-9]");
754 strcat(dst, "[0-9]*");
757 wrbuf_puts(term_dict, dst);
760 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
762 const char *src = wrbuf_cstr(wsrc);
763 if (src[*indx] == '\\')
765 wrbuf_putc(term_p, src[*indx]);
768 wrbuf_putc(term_p, src[*indx]);
773 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
774 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
775 * >= abc ([b-].*|a[c-].*|ab[c-].*)
776 * ([^-a].*|a[^-b].*|ab[c-].*)
777 * < abc ([-0].*|a[-a].*|ab[-b].*)
778 * ([^a-].*|a[^b-].*|ab[^c-].*)
779 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
780 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
782 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
783 const char **term_sub, WRBUF term_dict,
784 const Odr_oid *attributeSet,
785 zebra_map_t zm, int space_split,
792 WRBUF term_component = wrbuf_alloc();
794 attr_init_APT(&relation, zapt, 2);
795 relation_value = attr_find(&relation, NULL);
798 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
799 switch (relation_value)
802 if (!term_100(zm, term_sub, term_component, space_split, display_term))
804 wrbuf_destroy(term_component);
807 yaz_log(log_level_rpn, "Relation <");
809 wrbuf_putc(term_dict, '(');
810 for (i = 0; i < wrbuf_len(term_component); )
815 wrbuf_putc(term_dict, '|');
817 string_rel_add_char(term_dict, term_component, &j);
819 wrbuf_putc(term_dict, '[');
821 wrbuf_putc(term_dict, '^');
823 wrbuf_putc(term_dict, 1);
824 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
826 string_rel_add_char(term_dict, term_component, &i);
827 wrbuf_putc(term_dict, '-');
829 wrbuf_putc(term_dict, ']');
830 wrbuf_putc(term_dict, '.');
831 wrbuf_putc(term_dict, '*');
833 wrbuf_putc(term_dict, ')');
836 if (!term_100(zm, term_sub, term_component, space_split, display_term))
838 wrbuf_destroy(term_component);
841 yaz_log(log_level_rpn, "Relation <=");
843 wrbuf_putc(term_dict, '(');
844 for (i = 0; i < wrbuf_len(term_component); )
849 string_rel_add_char(term_dict, term_component, &j);
850 wrbuf_putc(term_dict, '[');
852 wrbuf_putc(term_dict, '^');
854 wrbuf_putc(term_dict, 1);
855 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
857 string_rel_add_char(term_dict, term_component, &i);
858 wrbuf_putc(term_dict, '-');
860 wrbuf_putc(term_dict, ']');
861 wrbuf_putc(term_dict, '.');
862 wrbuf_putc(term_dict, '*');
864 wrbuf_putc(term_dict, '|');
866 for (i = 0; i < wrbuf_len(term_component); )
867 string_rel_add_char(term_dict, term_component, &i);
868 wrbuf_putc(term_dict, ')');
871 if (!term_100(zm, term_sub, term_component, space_split, display_term))
873 wrbuf_destroy(term_component);
876 yaz_log(log_level_rpn, "Relation >");
878 wrbuf_putc(term_dict, '(');
879 for (i = 0; i < wrbuf_len(term_component); )
884 string_rel_add_char(term_dict, term_component, &j);
885 wrbuf_putc(term_dict, '[');
887 wrbuf_putc(term_dict, '^');
888 wrbuf_putc(term_dict, '-');
889 string_rel_add_char(term_dict, term_component, &i);
891 wrbuf_putc(term_dict, ']');
892 wrbuf_putc(term_dict, '.');
893 wrbuf_putc(term_dict, '*');
895 wrbuf_putc(term_dict, '|');
897 for (i = 0; i < wrbuf_len(term_component); )
898 string_rel_add_char(term_dict, term_component, &i);
899 wrbuf_putc(term_dict, '.');
900 wrbuf_putc(term_dict, '+');
901 wrbuf_putc(term_dict, ')');
904 if (!term_100(zm, term_sub, term_component, space_split, display_term))
906 wrbuf_destroy(term_component);
909 yaz_log(log_level_rpn, "Relation >=");
911 wrbuf_putc(term_dict, '(');
912 for (i = 0; i < wrbuf_len(term_component); )
917 wrbuf_putc(term_dict, '|');
919 string_rel_add_char(term_dict, term_component, &j);
920 wrbuf_putc(term_dict, '[');
922 if (i < wrbuf_len(term_component)-1)
924 wrbuf_putc(term_dict, '^');
925 wrbuf_putc(term_dict, '-');
926 string_rel_add_char(term_dict, term_component, &i);
930 string_rel_add_char(term_dict, term_component, &i);
931 wrbuf_putc(term_dict, '-');
933 wrbuf_putc(term_dict, ']');
934 wrbuf_putc(term_dict, '.');
935 wrbuf_putc(term_dict, '*');
937 wrbuf_putc(term_dict, ')');
944 yaz_log(log_level_rpn, "Relation =");
945 if (!term_100(zm, term_sub, term_component, space_split, display_term))
947 wrbuf_destroy(term_component);
950 wrbuf_puts(term_dict, "(");
951 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
952 wrbuf_puts(term_dict, ")");
955 yaz_log(log_level_rpn, "Relation always matches");
956 /* skip to end of term (we don't care what it is) */
957 while (**term_sub != '\0')
961 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
962 wrbuf_destroy(term_component);
965 wrbuf_destroy(term_component);
969 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
970 const char **term_sub,
972 const Odr_oid *attributeSet, NMEM stream,
973 struct grep_info *grep_info,
974 const char *index_type, int complete_flag,
976 const char *xpath_use,
977 struct ord_list **ol,
980 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
981 Z_AttributesPlusTerm *zapt,
982 zint *hits_limit_value,
983 const char **term_ref_id_str,
986 AttrType term_ref_id_attr;
987 AttrType hits_limit_attr;
989 zint hits_limit_from_attr;
991 attr_init_APT(&hits_limit_attr, zapt, 11);
992 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
994 attr_init_APT(&term_ref_id_attr, zapt, 10);
995 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
996 if (term_ref_id_int >= 0)
998 char *res = nmem_malloc(nmem, 20);
999 sprintf(res, "%d", term_ref_id_int);
1000 *term_ref_id_str = res;
1002 if (hits_limit_from_attr != -1)
1003 *hits_limit_value = hits_limit_from_attr;
1005 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1006 *term_ref_id_str ? *term_ref_id_str : "none",
1011 /** \brief search for term (which may be truncated)
1013 static ZEBRA_RES search_term(ZebraHandle zh,
1014 Z_AttributesPlusTerm *zapt,
1015 const char **term_sub,
1016 const Odr_oid *attributeSet,
1017 zint hits_limit, NMEM stream,
1018 struct grep_info *grep_info,
1019 const char *index_type, int complete_flag,
1020 const char *rank_type,
1021 const char *xpath_use,
1024 struct rset_key_control *kc,
1028 struct ord_list *ol;
1029 zint hits_limit_value = hits_limit;
1030 const char *term_ref_id_str = 0;
1031 WRBUF term_dict = wrbuf_alloc();
1032 WRBUF display_term = wrbuf_alloc();
1034 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1036 grep_info->isam_p_indx = 0;
1037 res = string_term(zh, zapt, term_sub, term_dict,
1038 attributeSet, stream, grep_info,
1039 index_type, complete_flag,
1040 display_term, xpath_use, &ol, zm);
1041 wrbuf_destroy(term_dict);
1042 if (res == ZEBRA_OK && *term_sub)
1044 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1045 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1046 grep_info->isam_p_indx, wrbuf_buf(display_term),
1047 wrbuf_len(display_term), rank_type,
1048 1 /* preserve pos */,
1049 zapt->term->which, rset_nmem,
1050 kc, kc->scope, ol, index_type, hits_limit_value,
1055 wrbuf_destroy(display_term);
1059 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1060 const char **term_sub,
1062 const Odr_oid *attributeSet, NMEM stream,
1063 struct grep_info *grep_info,
1064 const char *index_type, int complete_flag,
1066 const char *xpath_use,
1067 struct ord_list **ol,
1071 AttrType truncation;
1072 int truncation_value;
1074 struct rpn_char_map_info rcmi;
1076 int space_split = complete_flag ? 0 : 1;
1078 int regex_range = 0;
1079 int max_pos, prefix_len = 0;
1084 *ol = ord_list_create(stream);
1086 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1087 attr_init_APT(&truncation, zapt, 5);
1088 truncation_value = attr_find(&truncation, NULL);
1089 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1091 termp = *term_sub; /* start of term for each database */
1093 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1094 attributeSet, &ord) != ZEBRA_OK)
1100 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1102 *ol = ord_list_append(stream, *ol, ord);
1103 ord_len = key_SU_encode(ord, ord_buf);
1105 wrbuf_putc(term_dict, '(');
1107 for (i = 0; i<ord_len; i++)
1109 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1110 wrbuf_putc(term_dict, ord_buf[i]);
1112 wrbuf_putc(term_dict, ')');
1114 prefix_len = wrbuf_len(term_dict);
1116 if (zebra_maps_is_icu(zm))
1121 attr_init_APT(&relation, zapt, 2);
1122 relation_value = attr_find(&relation, NULL);
1123 if (relation_value == 103) /* always matches */
1124 termp += strlen(termp); /* move to end of term */
1125 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1128 switch (truncation_value)
1130 case -1: /* not specified */
1131 case 100: /* do not truncate */
1132 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1139 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1145 case 1: /* right truncation */
1146 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1153 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1160 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1167 zebra_setError_zint(zh,
1168 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1175 zebra_setError_zint(zh,
1176 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1183 /* non-ICU case. using string.chr and friends */
1184 switch (truncation_value)
1186 case -1: /* not specified */
1187 case 100: /* do not truncate */
1188 if (!string_relation(zh, zapt, &termp, term_dict,
1190 zm, space_split, display_term,
1195 zebra_setError(zh, relation_error, 0);
1202 case 1: /* right truncation */
1203 wrbuf_putc(term_dict, '(');
1204 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1209 wrbuf_puts(term_dict, ".*)");
1211 case 2: /* left truncation */
1212 wrbuf_puts(term_dict, "(.*");
1213 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1218 wrbuf_putc(term_dict, ')');
1220 case 3: /* left&right truncation */
1221 wrbuf_puts(term_dict, "(.*");
1222 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1227 wrbuf_puts(term_dict, ".*)");
1229 case 101: /* process # in term */
1230 wrbuf_putc(term_dict, '(');
1231 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1236 wrbuf_puts(term_dict, ")");
1238 case 102: /* Regexp-1 */
1239 wrbuf_putc(term_dict, '(');
1240 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1245 wrbuf_putc(term_dict, ')');
1247 case 103: /* Regexp-2 */
1249 wrbuf_putc(term_dict, '(');
1250 if (!term_103(zm, &termp, term_dict, ®ex_range,
1251 space_split, display_term))
1256 wrbuf_putc(term_dict, ')');
1258 case 104: /* process ?n * # term */
1259 wrbuf_putc(term_dict, '(');
1260 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1265 wrbuf_putc(term_dict, ')');
1267 case 105: /* process * ! in term and right truncate */
1268 wrbuf_putc(term_dict, '(');
1269 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1274 wrbuf_putc(term_dict, ')');
1276 case 106: /* process * ! in term */
1277 wrbuf_putc(term_dict, '(');
1278 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1283 wrbuf_putc(term_dict, ')');
1286 zebra_setError_zint(zh,
1287 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1295 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1296 esc_str(buf, sizeof(buf), input, strlen(input));
1299 WRBUF pr_wr = wrbuf_alloc();
1301 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1302 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1303 wrbuf_destroy(pr_wr);
1305 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1306 grep_info, &max_pos,
1307 ord_len /* number of "exact" chars */,
1310 zebra_set_partial_result(zh);
1312 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1314 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1320 static void grep_info_delete(struct grep_info *grep_info)
1323 xfree(grep_info->term_no);
1325 xfree(grep_info->isam_p_buf);
1328 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1329 Z_AttributesPlusTerm *zapt,
1330 struct grep_info *grep_info,
1331 const char *index_type)
1334 grep_info->term_no = 0;
1336 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1337 grep_info->isam_p_size = 0;
1338 grep_info->isam_p_buf = NULL;
1340 grep_info->index_type = index_type;
1341 grep_info->termset = 0;
1347 attr_init_APT(&truncmax, zapt, 13);
1348 truncmax_value = attr_find(&truncmax, NULL);
1349 if (truncmax_value != -1)
1350 grep_info->trunc_max = truncmax_value;
1355 int termset_value_numeric;
1356 const char *termset_value_string;
1358 attr_init_APT(&termset, zapt, 8);
1359 termset_value_numeric =
1360 attr_find_ex(&termset, NULL, &termset_value_string);
1361 if (termset_value_numeric != -1)
1364 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1368 const char *termset_name = 0;
1369 if (termset_value_numeric != -2)
1372 sprintf(resname, "%d", termset_value_numeric);
1373 termset_name = resname;
1376 termset_name = termset_value_string;
1377 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1378 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1379 if (!grep_info->termset)
1381 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1390 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1391 Z_AttributesPlusTerm *zapt,
1393 const Odr_oid *attributeSet,
1396 const char *index_type, int complete_flag,
1397 const char *rank_type,
1398 const char *xpath_use,
1400 RSET **result_sets, int *num_result_sets,
1401 struct rset_key_control *kc,
1404 struct grep_info grep_info;
1405 const char *termp = termz;
1408 *num_result_sets = 0;
1409 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1415 if (alloc_sets == *num_result_sets)
1418 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1421 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1422 alloc_sets = alloc_sets + add;
1423 *result_sets = rnew;
1425 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1427 index_type, complete_flag,
1429 xpath_use, rset_nmem,
1430 &(*result_sets)[*num_result_sets],
1432 if (res != ZEBRA_OK)
1435 for (i = 0; i < *num_result_sets; i++)
1436 rset_delete((*result_sets)[i]);
1437 grep_info_delete(&grep_info);
1440 if ((*result_sets)[*num_result_sets] == 0)
1442 (*num_result_sets)++;
1447 grep_info_delete(&grep_info);
1452 \brief Create result set(s) for list of terms
1453 \param zh Zebra Handle
1454 \param zapt Attributes Plust Term (RPN leaf)
1455 \param termz term as used in query but converted to UTF-8
1456 \param attributeSet default attribute set
1457 \param stream memory for result
1458 \param index_type register type ("w", "p",..)
1459 \param complete_flag whether it's phrases or not
1460 \param rank_type term flags for ranking
1461 \param xpath_use use attribute for X-Path (-1 for no X-path)
1462 \param rset_nmem memory for result sets
1463 \param result_sets output result set for each term in list (output)
1464 \param num_result_sets number of output result sets
1465 \param kc rset key control to be used for created result sets
1467 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1468 Z_AttributesPlusTerm *zapt,
1470 const Odr_oid *attributeSet,
1473 const char *index_type, int complete_flag,
1474 const char *rank_type,
1475 const char *xpath_use,
1477 RSET **result_sets, int *num_result_sets,
1478 struct rset_key_control *kc)
1480 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1481 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1482 stream, index_type, complete_flag,
1483 rank_type, xpath_use,
1484 rset_nmem, result_sets, num_result_sets,
1489 /** \brief limit a search by position - returns result set
1491 static ZEBRA_RES search_position(ZebraHandle zh,
1492 Z_AttributesPlusTerm *zapt,
1493 const Odr_oid *attributeSet,
1494 const char *index_type,
1497 struct rset_key_control *kc)
1503 char term_dict[100];
1507 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1509 attr_init_APT(&position, zapt, 3);
1510 position_value = attr_find(&position, NULL);
1511 switch(position_value)
1520 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1526 if (!zebra_maps_is_first_in_field(zm))
1528 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1533 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1534 attributeSet, &ord) != ZEBRA_OK)
1538 ord_len = key_SU_encode(ord, ord_buf);
1539 memcpy(term_dict, ord_buf, ord_len);
1540 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1541 val = dict_lookup(zh->reg->dict, term_dict);
1544 assert(*val == sizeof(ISAM_P));
1545 memcpy(&isam_p, val+1, sizeof(isam_p));
1547 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1553 /** \brief returns result set for phrase search
1555 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1556 Z_AttributesPlusTerm *zapt,
1557 const char *termz_org,
1558 const Odr_oid *attributeSet,
1561 const char *index_type,
1563 const char *rank_type,
1564 const char *xpath_use,
1567 struct rset_key_control *kc)
1569 RSET *result_sets = 0;
1570 int num_result_sets = 0;
1572 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1573 stream, index_type, complete_flag,
1574 rank_type, xpath_use,
1576 &result_sets, &num_result_sets, kc);
1578 if (res != ZEBRA_OK)
1581 if (num_result_sets > 0)
1584 res = search_position(zh, zapt, attributeSet,
1586 rset_nmem, &first_set,
1588 if (res != ZEBRA_OK)
1591 for (i = 0; i<num_result_sets; i++)
1592 rset_delete(result_sets[i]);
1597 RSET *nsets = nmem_malloc(stream,
1598 sizeof(RSET) * (num_result_sets+1));
1599 nsets[0] = first_set;
1600 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1601 result_sets = nsets;
1605 if (num_result_sets == 0)
1606 *rset = rset_create_null(rset_nmem, kc, 0);
1607 else if (num_result_sets == 1)
1608 *rset = result_sets[0];
1610 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1611 num_result_sets, result_sets,
1612 1 /* ordered */, 0 /* exclusion */,
1613 3 /* relation */, 1 /* distance */);
1619 /** \brief returns result set for or-list search
1621 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1622 Z_AttributesPlusTerm *zapt,
1623 const char *termz_org,
1624 const Odr_oid *attributeSet,
1627 const char *index_type,
1629 const char *rank_type,
1630 const char *xpath_use,
1633 struct rset_key_control *kc)
1635 RSET *result_sets = 0;
1636 int num_result_sets = 0;
1639 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1640 stream, index_type, complete_flag,
1641 rank_type, xpath_use,
1643 &result_sets, &num_result_sets, kc);
1644 if (res != ZEBRA_OK)
1647 for (i = 0; i<num_result_sets; i++)
1650 res = search_position(zh, zapt, attributeSet,
1652 rset_nmem, &first_set,
1654 if (res != ZEBRA_OK)
1656 for (i = 0; i<num_result_sets; i++)
1657 rset_delete(result_sets[i]);
1665 tmp_set[0] = first_set;
1666 tmp_set[1] = result_sets[i];
1668 result_sets[i] = rset_create_prox(
1669 rset_nmem, kc, kc->scope,
1671 1 /* ordered */, 0 /* exclusion */,
1672 3 /* relation */, 1 /* distance */);
1675 if (num_result_sets == 0)
1676 *rset = rset_create_null(rset_nmem, kc, 0);
1677 else if (num_result_sets == 1)
1678 *rset = result_sets[0];
1680 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1681 num_result_sets, result_sets);
1687 /** \brief returns result set for and-list search
1689 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1690 Z_AttributesPlusTerm *zapt,
1691 const char *termz_org,
1692 const Odr_oid *attributeSet,
1695 const char *index_type,
1697 const char *rank_type,
1698 const char *xpath_use,
1701 struct rset_key_control *kc)
1703 RSET *result_sets = 0;
1704 int num_result_sets = 0;
1707 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1708 stream, index_type, complete_flag,
1709 rank_type, xpath_use,
1711 &result_sets, &num_result_sets,
1713 if (res != ZEBRA_OK)
1715 for (i = 0; i<num_result_sets; i++)
1718 res = search_position(zh, zapt, attributeSet,
1720 rset_nmem, &first_set,
1722 if (res != ZEBRA_OK)
1724 for (i = 0; i<num_result_sets; i++)
1725 rset_delete(result_sets[i]);
1733 tmp_set[0] = first_set;
1734 tmp_set[1] = result_sets[i];
1736 result_sets[i] = rset_create_prox(
1737 rset_nmem, kc, kc->scope,
1739 1 /* ordered */, 0 /* exclusion */,
1740 3 /* relation */, 1 /* distance */);
1745 if (num_result_sets == 0)
1746 *rset = rset_create_null(rset_nmem, kc, 0);
1747 else if (num_result_sets == 1)
1748 *rset = result_sets[0];
1750 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1751 num_result_sets, result_sets);
1757 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1758 const char **term_sub,
1760 const Odr_oid *attributeSet,
1761 struct grep_info *grep_info,
1771 WRBUF term_num = wrbuf_alloc();
1774 attr_init_APT(&relation, zapt, 2);
1775 relation_value = attr_find(&relation, NULL);
1777 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1779 switch (relation_value)
1782 yaz_log(log_level_rpn, "Relation <");
1783 if (!term_100(zm, term_sub, term_num, 1, display_term))
1785 wrbuf_destroy(term_num);
1788 term_value = atoi(wrbuf_cstr(term_num));
1789 gen_regular_rel(term_dict, term_value-1, 1);
1792 yaz_log(log_level_rpn, "Relation <=");
1793 if (!term_100(zm, term_sub, term_num, 1, display_term))
1795 wrbuf_destroy(term_num);
1798 term_value = atoi(wrbuf_cstr(term_num));
1799 gen_regular_rel(term_dict, term_value, 1);
1802 yaz_log(log_level_rpn, "Relation >=");
1803 if (!term_100(zm, term_sub, term_num, 1, display_term))
1805 wrbuf_destroy(term_num);
1808 term_value = atoi(wrbuf_cstr(term_num));
1809 gen_regular_rel(term_dict, term_value, 0);
1812 yaz_log(log_level_rpn, "Relation >");
1813 if (!term_100(zm, term_sub, term_num, 1, display_term))
1815 wrbuf_destroy(term_num);
1818 term_value = atoi(wrbuf_cstr(term_num));
1819 gen_regular_rel(term_dict, term_value+1, 0);
1823 yaz_log(log_level_rpn, "Relation =");
1824 if (!term_100(zm, term_sub, term_num, 1, display_term))
1826 wrbuf_destroy(term_num);
1829 term_value = atoi(wrbuf_cstr(term_num));
1830 wrbuf_printf(term_dict, "(0*%d)", term_value);
1833 /* term_tmp untouched.. */
1834 while (**term_sub != '\0')
1838 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1839 wrbuf_destroy(term_num);
1842 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1843 0, grep_info, max_pos, 0, grep_handle);
1846 zebra_set_partial_result(zh);
1848 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1849 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1850 wrbuf_destroy(term_num);
1854 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1855 const char **term_sub,
1857 const Odr_oid *attributeSet, NMEM stream,
1858 struct grep_info *grep_info,
1859 const char *index_type, int complete_flag,
1861 const char *xpath_use,
1862 struct ord_list **ol)
1865 struct rpn_char_map_info rcmi;
1867 int relation_error = 0;
1868 int ord, ord_len, i;
1870 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1872 *ol = ord_list_create(stream);
1874 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1878 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1879 attributeSet, &ord) != ZEBRA_OK)
1884 wrbuf_rewind(term_dict);
1886 *ol = ord_list_append(stream, *ol, ord);
1888 ord_len = key_SU_encode(ord, ord_buf);
1890 wrbuf_putc(term_dict, '(');
1891 for (i = 0; i < ord_len; i++)
1893 wrbuf_putc(term_dict, 1);
1894 wrbuf_putc(term_dict, ord_buf[i]);
1896 wrbuf_putc(term_dict, ')');
1898 if (!numeric_relation(zh, zapt, &termp, term_dict,
1899 attributeSet, grep_info, &max_pos, zm,
1900 display_term, &relation_error))
1904 zebra_setError(zh, relation_error, 0);
1911 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1916 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1917 Z_AttributesPlusTerm *zapt,
1919 const Odr_oid *attributeSet,
1922 const char *index_type,
1924 const char *rank_type,
1925 const char *xpath_use,
1928 struct rset_key_control *kc)
1930 const char *termp = termz;
1931 RSET *result_sets = 0;
1932 int num_result_sets = 0;
1934 struct grep_info grep_info;
1936 zint hits_limit_value = hits_limit;
1937 const char *term_ref_id_str = 0;
1939 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1942 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1943 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1947 struct ord_list *ol;
1948 WRBUF term_dict = wrbuf_alloc();
1949 WRBUF display_term = wrbuf_alloc();
1950 if (alloc_sets == num_result_sets)
1953 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1956 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1957 alloc_sets = alloc_sets + add;
1960 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1961 grep_info.isam_p_indx = 0;
1962 res = numeric_term(zh, zapt, &termp, term_dict,
1963 attributeSet, stream, &grep_info,
1964 index_type, complete_flag,
1965 display_term, xpath_use, &ol);
1966 wrbuf_destroy(term_dict);
1967 if (res == ZEBRA_FAIL || termp == 0)
1969 wrbuf_destroy(display_term);
1972 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1973 result_sets[num_result_sets] =
1974 rset_trunc(zh, grep_info.isam_p_buf,
1975 grep_info.isam_p_indx, wrbuf_buf(display_term),
1976 wrbuf_len(display_term), rank_type,
1977 0 /* preserve position */,
1978 zapt->term->which, rset_nmem,
1979 kc, kc->scope, ol, index_type,
1982 wrbuf_destroy(display_term);
1983 if (!result_sets[num_result_sets])
1989 grep_info_delete(&grep_info);
1991 if (res != ZEBRA_OK)
1993 if (num_result_sets == 0)
1994 *rset = rset_create_null(rset_nmem, kc, 0);
1995 else if (num_result_sets == 1)
1996 *rset = result_sets[0];
1998 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1999 num_result_sets, result_sets);
2005 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2006 Z_AttributesPlusTerm *zapt,
2008 const Odr_oid *attributeSet,
2010 const char *rank_type, NMEM rset_nmem,
2012 struct rset_key_control *kc)
2015 zint sysno = atozint(termz);
2019 rec = rec_get(zh->reg->records, sysno);
2027 *rset = rset_create_null(rset_nmem, kc, 0);
2033 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2034 res_get(zh->res, "setTmpDir"), 0);
2035 rsfd = rset_open(*rset, RSETF_WRITE);
2040 rset_write(rsfd, &key);
2046 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2047 const Odr_oid *attributeSet, NMEM stream,
2048 Z_SortKeySpecList *sort_sequence,
2049 const char *rank_type,
2052 struct rset_key_control *kc)
2055 int sort_relation_value;
2056 AttrType sort_relation_type;
2061 attr_init_APT(&sort_relation_type, zapt, 7);
2062 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2064 if (!sort_sequence->specs)
2066 sort_sequence->num_specs = 10;
2067 sort_sequence->specs = (Z_SortKeySpec **)
2068 nmem_malloc(stream, sort_sequence->num_specs *
2069 sizeof(*sort_sequence->specs));
2070 for (i = 0; i<sort_sequence->num_specs; i++)
2071 sort_sequence->specs[i] = 0;
2073 if (zapt->term->which != Z_Term_general)
2076 i = atoi_n((char *) zapt->term->u.general->buf,
2077 zapt->term->u.general->len);
2078 if (i >= sort_sequence->num_specs)
2080 sprintf(termz, "%d", i);
2082 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2083 sks->sortElement = (Z_SortElement *)
2084 nmem_malloc(stream, sizeof(*sks->sortElement));
2085 sks->sortElement->which = Z_SortElement_generic;
2086 sk = sks->sortElement->u.generic = (Z_SortKey *)
2087 nmem_malloc(stream, sizeof(*sk));
2088 sk->which = Z_SortKey_sortAttributes;
2089 sk->u.sortAttributes = (Z_SortAttributes *)
2090 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2092 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2093 sk->u.sortAttributes->list = zapt->attributes;
2095 sks->sortRelation = (Odr_int *)
2096 nmem_malloc(stream, sizeof(*sks->sortRelation));
2097 if (sort_relation_value == 1)
2098 *sks->sortRelation = Z_SortKeySpec_ascending;
2099 else if (sort_relation_value == 2)
2100 *sks->sortRelation = Z_SortKeySpec_descending;
2102 *sks->sortRelation = Z_SortKeySpec_ascending;
2104 sks->caseSensitivity = (Odr_int *)
2105 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2106 *sks->caseSensitivity = 0;
2108 sks->which = Z_SortKeySpec_null;
2109 sks->u.null = odr_nullval ();
2110 sort_sequence->specs[i] = sks;
2111 *rset = rset_create_null(rset_nmem, kc, 0);
2116 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2117 const Odr_oid *attributeSet,
2118 struct xpath_location_step *xpath, int max,
2121 const Odr_oid *curAttributeSet = attributeSet;
2123 const char *use_string = 0;
2125 attr_init_APT(&use, zapt, 1);
2126 attr_find_ex(&use, &curAttributeSet, &use_string);
2128 if (!use_string || *use_string != '/')
2131 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2136 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2137 const char *index_type, const char *term,
2138 const char *xpath_use,
2140 struct rset_key_control *kc)
2142 struct grep_info grep_info;
2143 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2144 zinfo_index_category_index,
2145 index_type, xpath_use);
2146 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2147 return rset_create_null(rset_nmem, kc, 0);
2150 return rset_create_null(rset_nmem, kc, 0);
2156 WRBUF term_dict = wrbuf_alloc();
2157 int ord_len = key_SU_encode(ord, ord_buf);
2158 int term_type = Z_Term_characterString;
2159 const char *flags = "void";
2161 wrbuf_putc(term_dict, '(');
2162 for (i = 0; i<ord_len; i++)
2164 wrbuf_putc(term_dict, 1);
2165 wrbuf_putc(term_dict, ord_buf[i]);
2167 wrbuf_putc(term_dict, ')');
2168 wrbuf_puts(term_dict, term);
2170 grep_info.isam_p_indx = 0;
2171 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2172 &grep_info, &max_pos, 0, grep_handle);
2173 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2174 grep_info.isam_p_indx);
2175 rset = rset_trunc(zh, grep_info.isam_p_buf,
2176 grep_info.isam_p_indx, term, strlen(term),
2177 flags, 1, term_type, rset_nmem,
2178 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2179 0 /* term_ref_id_str */);
2180 grep_info_delete(&grep_info);
2181 wrbuf_destroy(term_dict);
2187 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2188 NMEM stream, const char *rank_type, RSET rset,
2189 int xpath_len, struct xpath_location_step *xpath,
2192 struct rset_key_control *kc)
2195 int always_matches = rset ? 0 : 1;
2203 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2204 for (i = 0; i<xpath_len; i++)
2206 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2218 a[@attr = value]/b[@other = othervalue]
2220 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2221 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2222 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2223 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2224 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2225 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2229 dict_grep_cmap(zh->reg->dict, 0, 0);
2232 int level = xpath_len;
2235 while (--level >= 0)
2237 WRBUF xpath_rev = wrbuf_alloc();
2239 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2241 for (i = level; i >= 1; --i)
2243 const char *cp = xpath[i].part;
2249 wrbuf_puts(xpath_rev, "[^/]*");
2250 else if (*cp == ' ')
2251 wrbuf_puts(xpath_rev, "\001 ");
2253 wrbuf_putc(xpath_rev, *cp);
2255 /* wrbuf_putc does not null-terminate , but
2256 wrbuf_puts below ensures it does.. so xpath_rev
2257 is OK iff length is > 0 */
2259 wrbuf_puts(xpath_rev, "/");
2261 else if (i == 1) /* // case */
2262 wrbuf_puts(xpath_rev, ".*");
2264 if (xpath[level].predicate &&
2265 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2266 xpath[level].predicate->u.relation.name[0])
2268 WRBUF wbuf = wrbuf_alloc();
2269 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2270 if (xpath[level].predicate->u.relation.value)
2272 const char *cp = xpath[level].predicate->u.relation.value;
2273 wrbuf_putc(wbuf, '=');
2277 if (strchr(REGEX_CHARS, *cp))
2278 wrbuf_putc(wbuf, '\\');
2279 wrbuf_putc(wbuf, *cp);
2283 rset_attr = xpath_trunc(
2284 zh, stream, "0", wrbuf_cstr(wbuf),
2285 ZEBRA_XPATH_ATTR_NAME,
2287 wrbuf_destroy(wbuf);
2293 wrbuf_destroy(xpath_rev);
2297 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2298 wrbuf_cstr(xpath_rev));
2299 if (wrbuf_len(xpath_rev))
2301 rset_start_tag = xpath_trunc(zh, stream, "0",
2302 wrbuf_cstr(xpath_rev),
2303 ZEBRA_XPATH_ELM_BEGIN,
2306 rset = rset_start_tag;
2309 rset_end_tag = xpath_trunc(zh, stream, "0",
2310 wrbuf_cstr(xpath_rev),
2311 ZEBRA_XPATH_ELM_END,
2314 rset = rset_create_between(rset_nmem, kc, kc->scope,
2315 rset_start_tag, rset,
2316 rset_end_tag, rset_attr);
2319 wrbuf_destroy(xpath_rev);
2327 #define MAX_XPATH_STEPS 10
2329 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2330 Z_AttributesPlusTerm *zapt,
2331 const Odr_oid *attributeSet,
2332 zint hits_limit, NMEM stream,
2333 Z_SortKeySpecList *sort_sequence,
2336 struct rset_key_control *kc);
2338 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2339 const Odr_oid *attributeSet,
2340 zint hits_limit, NMEM stream,
2341 Z_SortKeySpecList *sort_sequence,
2342 int num_bases, const char **basenames,
2345 struct rset_key_control *kc)
2347 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2348 ZEBRA_RES res = ZEBRA_OK;
2350 for (i = 0; i < num_bases; i++)
2353 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2355 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2360 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2362 rset_nmem, rsets+i, kc);
2363 if (res != ZEBRA_OK)
2366 if (res != ZEBRA_OK)
2367 { /* must clean up the already created sets */
2369 rset_delete(rsets[i]);
2376 else if (num_bases == 0)
2377 *rset = rset_create_null(rset_nmem, kc, 0);
2379 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2385 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2386 Z_AttributesPlusTerm *zapt,
2387 const Odr_oid *attributeSet,
2388 zint hits_limit, NMEM stream,
2389 Z_SortKeySpecList *sort_sequence,
2392 struct rset_key_control *kc)
2394 ZEBRA_RES res = ZEBRA_OK;
2395 const char *index_type;
2396 char *search_type = NULL;
2397 char rank_type[128];
2400 char termz[IT_MAX_WORD+1];
2402 const char *xpath_use = 0;
2403 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2407 log_level_rpn = yaz_log_module_level("rpn");
2410 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2411 rank_type, &complete_flag, &sort_flag);
2413 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2414 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2415 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2416 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2418 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2422 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2423 rank_type, rset_nmem, rset, kc);
2424 /* consider if an X-Path query is used */
2425 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2426 xpath, MAX_XPATH_STEPS, stream);
2429 if (xpath[xpath_len-1].part[0] == '@')
2430 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2432 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2439 attr_init_APT(&relation, zapt, 2);
2440 relation_value = attr_find(&relation, NULL);
2442 if (relation_value == 103) /* alwaysmatches */
2444 *rset = 0; /* signal no "term" set */
2445 return rpn_search_xpath(zh, stream, rank_type, *rset,
2446 xpath_len, xpath, rset_nmem, rset, kc);
2451 /* search using one of the various search type strategies
2452 termz is our UTF-8 search term
2453 attributeSet is top-level default attribute set
2454 stream is ODR for search
2455 reg_id is the register type
2456 complete_flag is 1 for complete subfield, 0 for incomplete
2457 xpath_use is use-attribute to be used for X-Path search, 0 for none
2459 if (!strcmp(search_type, "phrase"))
2461 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2463 index_type, complete_flag, rank_type,
2468 else if (!strcmp(search_type, "and-list"))
2470 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2472 index_type, complete_flag, rank_type,
2477 else if (!strcmp(search_type, "or-list"))
2479 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2481 index_type, complete_flag, rank_type,
2486 else if (!strcmp(search_type, "local"))
2488 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2489 rank_type, rset_nmem, rset, kc);
2491 else if (!strcmp(search_type, "numeric"))
2493 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2495 index_type, complete_flag, rank_type,
2502 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2505 if (res != ZEBRA_OK)
2509 return rpn_search_xpath(zh, stream, rank_type, *rset,
2510 xpath_len, xpath, rset_nmem, rset, kc);
2513 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2514 const Odr_oid *attributeSet,
2516 NMEM stream, NMEM rset_nmem,
2517 Z_SortKeySpecList *sort_sequence,
2518 int num_bases, const char **basenames,
2519 RSET **result_sets, int *num_result_sets,
2520 Z_Operator *parent_op,
2521 struct rset_key_control *kc);
2523 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2526 ZEBRA_RES res = ZEBRA_OK;
2527 if (zs->which == Z_RPNStructure_complex)
2529 if (res == ZEBRA_OK)
2530 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2532 if (res == ZEBRA_OK)
2533 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2536 else if (zs->which == Z_RPNStructure_simple)
2538 if (zs->u.simple->which == Z_Operand_APT)
2540 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2541 AttrType global_hits_limit_attr;
2544 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2546 l = attr_find(&global_hits_limit_attr, NULL);
2554 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2555 const Odr_oid *attributeSet,
2557 NMEM stream, NMEM rset_nmem,
2558 Z_SortKeySpecList *sort_sequence,
2559 int num_bases, const char **basenames,
2562 RSET *result_sets = 0;
2563 int num_result_sets = 0;
2565 struct rset_key_control *kc = zebra_key_control_create(zh);
2567 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2570 num_bases, basenames,
2571 &result_sets, &num_result_sets,
2572 0 /* no parent op */,
2574 if (res != ZEBRA_OK)
2577 for (i = 0; i<num_result_sets; i++)
2578 rset_delete(result_sets[i]);
2583 assert(num_result_sets == 1);
2584 assert(result_sets);
2585 assert(*result_sets);
2586 *result_set = *result_sets;
2592 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2593 const Odr_oid *attributeSet, zint hits_limit,
2594 NMEM stream, NMEM rset_nmem,
2595 Z_SortKeySpecList *sort_sequence,
2596 int num_bases, const char **basenames,
2597 RSET **result_sets, int *num_result_sets,
2598 Z_Operator *parent_op,
2599 struct rset_key_control *kc)
2601 *num_result_sets = 0;
2602 if (zs->which == Z_RPNStructure_complex)
2605 Z_Operator *zop = zs->u.complex->roperator;
2606 RSET *result_sets_l = 0;
2607 int num_result_sets_l = 0;
2608 RSET *result_sets_r = 0;
2609 int num_result_sets_r = 0;
2611 res = rpn_search_structure(zh, zs->u.complex->s1,
2612 attributeSet, hits_limit, stream, rset_nmem,
2614 num_bases, basenames,
2615 &result_sets_l, &num_result_sets_l,
2617 if (res != ZEBRA_OK)
2620 for (i = 0; i<num_result_sets_l; i++)
2621 rset_delete(result_sets_l[i]);
2624 res = rpn_search_structure(zh, zs->u.complex->s2,
2625 attributeSet, hits_limit, stream, rset_nmem,
2627 num_bases, basenames,
2628 &result_sets_r, &num_result_sets_r,
2630 if (res != ZEBRA_OK)
2633 for (i = 0; i<num_result_sets_l; i++)
2634 rset_delete(result_sets_l[i]);
2635 for (i = 0; i<num_result_sets_r; i++)
2636 rset_delete(result_sets_r[i]);
2640 /* make a new list of result for all children */
2641 *num_result_sets = num_result_sets_l + num_result_sets_r;
2642 *result_sets = nmem_malloc(stream, *num_result_sets *
2643 sizeof(**result_sets));
2644 memcpy(*result_sets, result_sets_l,
2645 num_result_sets_l * sizeof(**result_sets));
2646 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2647 num_result_sets_r * sizeof(**result_sets));
2649 if (!parent_op || parent_op->which != zop->which
2650 || (zop->which != Z_Operator_and &&
2651 zop->which != Z_Operator_or))
2653 /* parent node different from this one (or non-present) */
2654 /* we must combine result sets now */
2658 case Z_Operator_and:
2659 rset = rset_create_and(rset_nmem, kc,
2661 *num_result_sets, *result_sets);
2664 rset = rset_create_or(rset_nmem, kc,
2665 kc->scope, 0, /* termid */
2666 *num_result_sets, *result_sets);
2668 case Z_Operator_and_not:
2669 rset = rset_create_not(rset_nmem, kc,
2674 case Z_Operator_prox:
2675 if (zop->u.prox->which != Z_ProximityOperator_known)
2678 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2682 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2684 zebra_setError_zint(zh,
2685 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2686 *zop->u.prox->u.known);
2691 rset = rset_create_prox(rset_nmem, kc,
2693 *num_result_sets, *result_sets,
2694 *zop->u.prox->ordered,
2695 (!zop->u.prox->exclusion ?
2696 0 : *zop->u.prox->exclusion),
2697 *zop->u.prox->relationType,
2698 *zop->u.prox->distance );
2702 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2705 *num_result_sets = 1;
2706 *result_sets = nmem_malloc(stream, *num_result_sets *
2707 sizeof(**result_sets));
2708 (*result_sets)[0] = rset;
2711 else if (zs->which == Z_RPNStructure_simple)
2716 if (zs->u.simple->which == Z_Operand_APT)
2718 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2719 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2720 attributeSet, hits_limit,
2721 stream, sort_sequence,
2722 num_bases, basenames, rset_nmem, &rset,
2724 if (res != ZEBRA_OK)
2727 else if (zs->u.simple->which == Z_Operand_resultSetId)
2729 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2730 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2734 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2735 zs->u.simple->u.resultSetId);
2742 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2745 *num_result_sets = 1;
2746 *result_sets = nmem_malloc(stream, *num_result_sets *
2747 sizeof(**result_sets));
2748 (*result_sets)[0] = rset;
2752 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2763 * c-file-style: "Stroustrup"
2764 * indent-tabs-mode: nil
2766 * vim: shiftwidth=4 tabstop=8 expandtab