1 /* $Id: rpnsearch.c,v 1.26 2007-12-03 14:33:22 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!"
211 static void add_non_space(const char *start, const char *end,
213 char *dst_term, int *dst_ptr,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
217 memcpy(dst_term + *dst_ptr, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
244 const char *res_buf = 0;
246 int r = zebra_map_tokenize(zm, *src, strlen(*src),
249 yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
251 strcat(dst_term, *src);
252 *src += strlen(*src);
257 for (i = 0; i < res_len; i++)
259 if (strchr(REGEX_CHARS, res_buf[i]))
260 wrbuf_putc(term_dict, '\\');
262 wrbuf_putc(term_dict, 1);
263 wrbuf_putc(term_dict, res_buf[i]);
265 r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272 const char **src, WRBUF term_dict, int space_split,
280 const char *space_start = 0;
281 const char *space_end = 0;
283 if (zebra_maps_is_icu(zm))
284 return term_100_icu(zm, src, term_dict, space_split, dst_term);
286 if (!term_pre(zm, src, NULL, NULL, !space_split))
293 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
296 if (**map == *CHR_SPACE)
299 else /* complete subfield only. */
301 if (**map == *CHR_SPACE)
302 { /* save space mapping for later .. */
307 else if (space_start)
308 { /* reload last space */
309 while (space_start < space_end)
311 if (strchr(REGEX_CHARS, *space_start))
312 wrbuf_putc(term_dict, '\\');
313 dst_term[j++] = *space_start;
314 wrbuf_putc(term_dict, *space_start);
319 space_start = space_end = 0;
324 add_non_space(s1, s0, term_dict, dst_term, &j,
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334 const char **src, WRBUF term_dict, int space_split,
342 if (!term_pre(zm, src, "#", "#", !space_split))
350 wrbuf_puts(term_dict, ".*");
351 dst_term[j++] = *s0++;
357 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358 if (space_split && **map == *CHR_SPACE)
362 add_non_space(s1, s0, term_dict, dst_term, &j,
366 dst_term[j++] = '\0';
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373 WRBUF term_dict, int *errors, int space_split,
381 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
384 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385 isdigit(((const unsigned char *)s0)[1]))
387 *errors = s0[1] - '0';
394 if (strchr("^\\()[].*+?|-", *s0))
397 wrbuf_putc(term_dict, *s0);
405 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
406 if (space_split && **map == *CHR_SPACE)
410 add_non_space(s1, s0, term_dict, dst_term, &j,
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422 WRBUF term_dict, int space_split, char *dst_term)
424 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src,
430 WRBUF term_dict, int space_split, char *dst_term)
437 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
445 dst_term[j++] = *s0++;
446 if (*s0 >= '0' && *s0 <= '9')
449 while (*s0 >= '0' && *s0 <= '9')
451 limit = limit * 10 + (*s0 - '0');
452 dst_term[j++] = *s0++;
458 wrbuf_puts(term_dict, ".?");
463 wrbuf_puts(term_dict, ".*");
469 wrbuf_puts(term_dict, ".*");
470 dst_term[j++] = *s0++;
475 wrbuf_puts(term_dict, ".");
476 dst_term[j++] = *s0++;
482 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483 if (space_split && **map == *CHR_SPACE)
487 add_non_space(s1, s0, term_dict, dst_term, &j,
491 dst_term[j++] = '\0';
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src,
498 WRBUF term_dict, int space_split,
499 char *dst_term, int right_truncate)
506 if (!term_pre(zm, src, "*!", "*!", !space_split))
514 wrbuf_puts(term_dict, ".*");
515 dst_term[j++] = *s0++;
520 wrbuf_putc(term_dict, '.');
521 dst_term[j++] = *s0++;
527 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528 if (space_split && **map == *CHR_SPACE)
532 add_non_space(s1, s0, term_dict, dst_term, &j,
537 wrbuf_puts(term_dict, ".*");
538 dst_term[j++] = '\0';
544 /* gen_regular_rel - generate regular expression from relation
545 * val: border value (inclusive)
546 * islt: 1 if <=; 0 if >=.
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
550 char dst_buf[20*5*20]; /* assuming enough for expansion */
557 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
561 strcpy(dst, "(-[0-9]+|(");
569 strcpy(dst, "([0-9]+|-(");
580 sprintf(numstr, "%d", val);
581 for (w = strlen(numstr); --w >= 0; pos++)
600 strcpy(dst + dst_p, numstr);
601 dst_p = strlen(dst) - pos - 1;
629 for (i = 0; i<pos; i++)
642 /* match everything less than 10^(pos-1) */
644 for (i = 1; i<pos; i++)
645 strcat(dst, "[0-9]?");
649 /* match everything greater than 10^pos */
650 for (i = 0; i <= pos; i++)
651 strcat(dst, "[0-9]");
652 strcat(dst, "[0-9]*");
655 wrbuf_puts(term_dict, dst);
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
660 const char *src = wrbuf_cstr(wsrc);
661 if (src[*indx] == '\\')
663 wrbuf_putc(term_p, src[*indx]);
666 wrbuf_putc(term_p, src[*indx]);
671 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
672 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673 * >= abc ([b-].*|a[c-].*|ab[c-].*)
674 * ([^-a].*|a[^-b].*|ab[c-].*)
675 * < abc ([-0].*|a[-a].*|ab[-b].*)
676 * ([^a-].*|a[^b-].*|ab[^c-].*)
677 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
678 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681 const char **term_sub, WRBUF term_dict,
682 const Odr_oid *attributeSet,
683 zebra_map_t zm, int space_split, char *term_dst,
689 WRBUF term_component = wrbuf_alloc();
691 attr_init_APT(&relation, zapt, 2);
692 relation_value = attr_find(&relation, NULL);
695 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696 switch (relation_value)
699 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
701 wrbuf_destroy(term_component);
704 yaz_log(log_level_rpn, "Relation <");
706 wrbuf_putc(term_dict, '(');
707 for (i = 0; i < wrbuf_len(term_component); )
712 wrbuf_putc(term_dict, '|');
714 string_rel_add_char(term_dict, term_component, &j);
716 wrbuf_putc(term_dict, '[');
718 wrbuf_putc(term_dict, '^');
720 wrbuf_putc(term_dict, 1);
721 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
723 string_rel_add_char(term_dict, term_component, &i);
724 wrbuf_putc(term_dict, '-');
726 wrbuf_putc(term_dict, ']');
727 wrbuf_putc(term_dict, '.');
728 wrbuf_putc(term_dict, '*');
730 wrbuf_putc(term_dict, ')');
733 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
735 wrbuf_destroy(term_component);
738 yaz_log(log_level_rpn, "Relation <=");
740 wrbuf_putc(term_dict, '(');
741 for (i = 0; i < wrbuf_len(term_component); )
746 string_rel_add_char(term_dict, term_component, &j);
747 wrbuf_putc(term_dict, '[');
749 wrbuf_putc(term_dict, '^');
751 wrbuf_putc(term_dict, 1);
752 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
754 string_rel_add_char(term_dict, term_component, &i);
755 wrbuf_putc(term_dict, '-');
757 wrbuf_putc(term_dict, ']');
758 wrbuf_putc(term_dict, '.');
759 wrbuf_putc(term_dict, '*');
761 wrbuf_putc(term_dict, '|');
763 for (i = 0; i < wrbuf_len(term_component); )
764 string_rel_add_char(term_dict, term_component, &i);
765 wrbuf_putc(term_dict, ')');
768 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
770 wrbuf_destroy(term_component);
773 yaz_log(log_level_rpn, "Relation >");
775 wrbuf_putc(term_dict, '(');
776 for (i = 0; i < wrbuf_len(term_component); )
781 string_rel_add_char(term_dict, term_component, &j);
782 wrbuf_putc(term_dict, '[');
784 wrbuf_putc(term_dict, '^');
785 wrbuf_putc(term_dict, '-');
786 string_rel_add_char(term_dict, term_component, &i);
788 wrbuf_putc(term_dict, ']');
789 wrbuf_putc(term_dict, '.');
790 wrbuf_putc(term_dict, '*');
792 wrbuf_putc(term_dict, '|');
794 for (i = 0; i < wrbuf_len(term_component); )
795 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, '.');
797 wrbuf_putc(term_dict, '+');
798 wrbuf_putc(term_dict, ')');
801 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
803 wrbuf_destroy(term_component);
806 yaz_log(log_level_rpn, "Relation >=");
808 wrbuf_putc(term_dict, '(');
809 for (i = 0; i < wrbuf_len(term_component); )
814 wrbuf_putc(term_dict, '|');
816 string_rel_add_char(term_dict, term_component, &j);
817 wrbuf_putc(term_dict, '[');
819 if (i < wrbuf_len(term_component)-1)
821 wrbuf_putc(term_dict, '^');
822 wrbuf_putc(term_dict, '-');
823 string_rel_add_char(term_dict, term_component, &i);
827 string_rel_add_char(term_dict, term_component, &i);
828 wrbuf_putc(term_dict, '-');
830 wrbuf_putc(term_dict, ']');
831 wrbuf_putc(term_dict, '.');
832 wrbuf_putc(term_dict, '*');
834 wrbuf_putc(term_dict, ')');
841 yaz_log(log_level_rpn, "Relation =");
842 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
844 wrbuf_destroy(term_component);
847 wrbuf_puts(term_dict, "(");
848 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849 wrbuf_puts(term_dict, ")");
852 yaz_log(log_level_rpn, "Relation always matches");
853 /* skip to end of term (we don't care what it is) */
854 while (**term_sub != '\0')
858 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859 wrbuf_destroy(term_component);
862 wrbuf_destroy(term_component);
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867 const char **term_sub,
869 const Odr_oid *attributeSet, NMEM stream,
870 struct grep_info *grep_info,
871 const char *index_type, int complete_flag,
873 const char *xpath_use,
874 struct ord_list **ol);
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877 Z_AttributesPlusTerm *zapt,
878 zint *hits_limit_value,
879 const char **term_ref_id_str,
882 AttrType term_ref_id_attr;
883 AttrType hits_limit_attr;
886 attr_init_APT(&hits_limit_attr, zapt, 11);
887 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
889 attr_init_APT(&term_ref_id_attr, zapt, 10);
890 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891 if (term_ref_id_int >= 0)
893 char *res = nmem_malloc(nmem, 20);
894 sprintf(res, "%d", term_ref_id_int);
895 *term_ref_id_str = res;
898 /* no limit given ? */
899 if (*hits_limit_value == -1)
901 if (*term_ref_id_str)
903 /* use global if term_ref is present */
904 *hits_limit_value = zh->approx_limit;
908 /* no counting if term_ref is not present */
909 *hits_limit_value = 0;
912 else if (*hits_limit_value == 0)
914 /* 0 is the same as global limit */
915 *hits_limit_value = zh->approx_limit;
917 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918 *term_ref_id_str ? *term_ref_id_str : "none",
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924 Z_AttributesPlusTerm *zapt,
925 const char **term_sub,
926 const Odr_oid *attributeSet, NMEM stream,
927 struct grep_info *grep_info,
928 const char *index_type, int complete_flag,
930 const char *rank_type,
931 const char *xpath_use,
934 struct rset_key_control *kc)
938 zint hits_limit_value;
939 const char *term_ref_id_str = 0;
940 WRBUF term_dict = wrbuf_alloc();
943 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
945 grep_info->isam_p_indx = 0;
946 res = string_term(zh, zapt, term_sub, term_dict,
947 attributeSet, stream, grep_info,
948 index_type, complete_flag,
949 term_dst, xpath_use, &ol);
950 wrbuf_destroy(term_dict);
953 if (!*term_sub) /* no more terms ? */
955 yaz_log(log_level_rpn, "term: %s", term_dst);
956 *rset = rset_trunc(zh, grep_info->isam_p_buf,
957 grep_info->isam_p_indx, term_dst,
958 strlen(term_dst), rank_type, 1 /* preserve pos */,
959 zapt->term->which, rset_nmem,
960 kc, kc->scope, ol, index_type, hits_limit_value,
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968 const char **term_sub,
970 const Odr_oid *attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 const char *index_type, int complete_flag,
974 const char *xpath_use,
975 struct ord_list **ol)
979 int truncation_value;
981 struct rpn_char_map_info rcmi;
983 int space_split = complete_flag ? 0 : 1;
986 int max_pos, prefix_len = 0;
990 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
992 *ol = ord_list_create(stream);
994 rpn_char_map_prepare(zh->reg, zm, &rcmi);
995 attr_init_APT(&truncation, zapt, 5);
996 truncation_value = attr_find(&truncation, NULL);
997 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
999 termp = *term_sub; /* start of term for each database */
1001 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002 attributeSet, &ord) != ZEBRA_OK)
1008 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1010 *ol = ord_list_append(stream, *ol, ord);
1011 ord_len = key_SU_encode(ord, ord_buf);
1013 wrbuf_putc(term_dict, '(');
1015 for (i = 0; i<ord_len; i++)
1017 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1018 wrbuf_putc(term_dict, ord_buf[i]);
1020 wrbuf_putc(term_dict, ')');
1022 prefix_len = wrbuf_len(term_dict);
1024 switch (truncation_value)
1026 case -1: /* not specified */
1027 case 100: /* do not truncate */
1028 if (!string_relation(zh, zapt, &termp, term_dict,
1030 zm, space_split, term_dst,
1035 zebra_setError(zh, relation_error, 0);
1042 case 1: /* right truncation */
1043 wrbuf_putc(term_dict, '(');
1044 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1049 wrbuf_puts(term_dict, ".*)");
1051 case 2: /* keft truncation */
1052 wrbuf_puts(term_dict, "(.*");
1053 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1058 wrbuf_putc(term_dict, ')');
1060 case 3: /* left&right truncation */
1061 wrbuf_puts(term_dict, "(.*");
1062 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1067 wrbuf_puts(term_dict, ".*)");
1069 case 101: /* process # in term */
1070 wrbuf_putc(term_dict, '(');
1071 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1076 wrbuf_puts(term_dict, ")");
1078 case 102: /* Regexp-1 */
1079 wrbuf_putc(term_dict, '(');
1080 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1085 wrbuf_putc(term_dict, ')');
1087 case 103: /* Regexp-2 */
1089 wrbuf_putc(term_dict, '(');
1090 if (!term_103(zm, &termp, term_dict, ®ex_range,
1091 space_split, term_dst))
1096 wrbuf_putc(term_dict, ')');
1098 case 104: /* process # and ! in term */
1099 wrbuf_putc(term_dict, '(');
1100 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1105 wrbuf_putc(term_dict, ')');
1107 case 105: /* process * and ! in term */
1108 wrbuf_putc(term_dict, '(');
1109 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1114 wrbuf_putc(term_dict, ')');
1116 case 106: /* process * and ! in term */
1117 wrbuf_putc(term_dict, '(');
1118 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1123 wrbuf_putc(term_dict, ')');
1126 zebra_setError_zint(zh,
1127 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1134 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135 esc_str(buf, sizeof(buf), input, strlen(input));
1138 WRBUF pr_wr = wrbuf_alloc();
1140 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142 wrbuf_destroy(pr_wr);
1144 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145 grep_info, &max_pos,
1146 ord_len /* number of "exact" chars */,
1149 zebra_set_partial_result(zh);
1151 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1153 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1159 static void grep_info_delete(struct grep_info *grep_info)
1162 xfree(grep_info->term_no);
1164 xfree(grep_info->isam_p_buf);
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168 Z_AttributesPlusTerm *zapt,
1169 struct grep_info *grep_info,
1170 const char *index_type)
1173 grep_info->term_no = 0;
1175 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176 grep_info->isam_p_size = 0;
1177 grep_info->isam_p_buf = NULL;
1179 grep_info->index_type = index_type;
1180 grep_info->termset = 0;
1186 attr_init_APT(&truncmax, zapt, 13);
1187 truncmax_value = attr_find(&truncmax, NULL);
1188 if (truncmax_value != -1)
1189 grep_info->trunc_max = truncmax_value;
1194 int termset_value_numeric;
1195 const char *termset_value_string;
1197 attr_init_APT(&termset, zapt, 8);
1198 termset_value_numeric =
1199 attr_find_ex(&termset, NULL, &termset_value_string);
1200 if (termset_value_numeric != -1)
1203 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1207 const char *termset_name = 0;
1208 if (termset_value_numeric != -2)
1211 sprintf(resname, "%d", termset_value_numeric);
1212 termset_name = resname;
1215 termset_name = termset_value_string;
1216 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218 if (!grep_info->termset)
1220 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1230 \brief Create result set(s) for list of terms
1231 \param zh Zebra Handle
1232 \param zapt Attributes Plust Term (RPN leaf)
1233 \param termz term as used in query but converted to UTF-8
1234 \param attributeSet default attribute set
1235 \param stream memory for result
1236 \param index_type register type ("w", "p",..)
1237 \param complete_flag whether it's phrases or not
1238 \param rank_type term flags for ranking
1239 \param xpath_use use attribute for X-Path (-1 for no X-path)
1240 \param rset_nmem memory for result sets
1241 \param result_sets output result set for each term in list (output)
1242 \param num_result_sets number of output result sets
1243 \param kc rset key control to be used for created result sets
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246 Z_AttributesPlusTerm *zapt,
1248 const Odr_oid *attributeSet,
1250 const char *index_type, int complete_flag,
1251 const char *rank_type,
1252 const char *xpath_use,
1254 RSET **result_sets, int *num_result_sets,
1255 struct rset_key_control *kc)
1257 char term_dst[IT_MAX_WORD+1];
1258 struct grep_info grep_info;
1259 const char *termp = termz;
1262 *num_result_sets = 0;
1264 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1270 if (alloc_sets == *num_result_sets)
1273 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1276 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277 alloc_sets = alloc_sets + add;
1278 *result_sets = rnew;
1280 res = term_trunc(zh, zapt, &termp, attributeSet,
1282 index_type, complete_flag,
1283 term_dst, rank_type,
1284 xpath_use, rset_nmem,
1285 &(*result_sets)[*num_result_sets],
1287 if (res != ZEBRA_OK)
1290 for (i = 0; i < *num_result_sets; i++)
1291 rset_delete((*result_sets)[i]);
1292 grep_info_delete(&grep_info);
1295 if ((*result_sets)[*num_result_sets] == 0)
1297 (*num_result_sets)++;
1302 grep_info_delete(&grep_info);
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307 Z_AttributesPlusTerm *zapt,
1308 const Odr_oid *attributeSet,
1309 const char *index_type,
1312 struct rset_key_control *kc)
1318 char term_dict[100];
1322 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1324 attr_init_APT(&position, zapt, 3);
1325 position_value = attr_find(&position, NULL);
1326 switch(position_value)
1335 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1341 if (!zebra_maps_is_first_in_field(zm))
1343 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1348 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1349 attributeSet, &ord) != ZEBRA_OK)
1353 ord_len = key_SU_encode(ord, ord_buf);
1354 memcpy(term_dict, ord_buf, ord_len);
1355 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1356 val = dict_lookup(zh->reg->dict, term_dict);
1359 assert(*val == sizeof(ISAM_P));
1360 memcpy(&isam_p, val+1, sizeof(isam_p));
1362 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1368 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1369 Z_AttributesPlusTerm *zapt,
1370 const char *termz_org,
1371 const Odr_oid *attributeSet,
1373 const char *index_type, int complete_flag,
1374 const char *rank_type,
1375 const char *xpath_use,
1378 struct rset_key_control *kc)
1380 RSET *result_sets = 0;
1381 int num_result_sets = 0;
1383 term_list_trunc(zh, zapt, termz_org, attributeSet,
1384 stream, index_type, complete_flag,
1385 rank_type, xpath_use,
1387 &result_sets, &num_result_sets, kc);
1389 if (res != ZEBRA_OK)
1392 if (num_result_sets > 0)
1395 res = rpn_search_APT_position(zh, zapt, attributeSet,
1397 rset_nmem, &first_set,
1399 if (res != ZEBRA_OK)
1402 for (i = 0; i<num_result_sets; i++)
1403 rset_delete(result_sets[i]);
1408 RSET *nsets = nmem_malloc(stream,
1409 sizeof(RSET) * (num_result_sets+1));
1410 nsets[0] = first_set;
1411 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1412 result_sets = nsets;
1416 if (num_result_sets == 0)
1417 *rset = rset_create_null(rset_nmem, kc, 0);
1418 else if (num_result_sets == 1)
1419 *rset = result_sets[0];
1421 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1422 num_result_sets, result_sets,
1423 1 /* ordered */, 0 /* exclusion */,
1424 3 /* relation */, 1 /* distance */);
1430 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1431 Z_AttributesPlusTerm *zapt,
1432 const char *termz_org,
1433 const Odr_oid *attributeSet,
1435 const char *index_type,
1437 const char *rank_type,
1438 const char *xpath_use,
1441 struct rset_key_control *kc)
1443 RSET *result_sets = 0;
1444 int num_result_sets = 0;
1447 term_list_trunc(zh, zapt, termz_org, attributeSet,
1448 stream, index_type, complete_flag,
1449 rank_type, xpath_use,
1451 &result_sets, &num_result_sets, kc);
1452 if (res != ZEBRA_OK)
1455 for (i = 0; i<num_result_sets; i++)
1458 res = rpn_search_APT_position(zh, zapt, attributeSet,
1460 rset_nmem, &first_set,
1462 if (res != ZEBRA_OK)
1464 for (i = 0; i<num_result_sets; i++)
1465 rset_delete(result_sets[i]);
1473 tmp_set[0] = first_set;
1474 tmp_set[1] = result_sets[i];
1476 result_sets[i] = rset_create_prox(
1477 rset_nmem, kc, kc->scope,
1479 1 /* ordered */, 0 /* exclusion */,
1480 3 /* relation */, 1 /* distance */);
1483 if (num_result_sets == 0)
1484 *rset = rset_create_null(rset_nmem, kc, 0);
1485 else if (num_result_sets == 1)
1486 *rset = result_sets[0];
1488 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1489 num_result_sets, result_sets);
1495 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1496 Z_AttributesPlusTerm *zapt,
1497 const char *termz_org,
1498 const Odr_oid *attributeSet,
1500 const char *index_type,
1502 const char *rank_type,
1503 const char *xpath_use,
1506 struct rset_key_control *kc)
1508 RSET *result_sets = 0;
1509 int num_result_sets = 0;
1512 term_list_trunc(zh, zapt, termz_org, attributeSet,
1513 stream, index_type, complete_flag,
1514 rank_type, xpath_use,
1516 &result_sets, &num_result_sets,
1518 if (res != ZEBRA_OK)
1520 for (i = 0; i<num_result_sets; i++)
1523 res = rpn_search_APT_position(zh, zapt, attributeSet,
1525 rset_nmem, &first_set,
1527 if (res != ZEBRA_OK)
1529 for (i = 0; i<num_result_sets; i++)
1530 rset_delete(result_sets[i]);
1538 tmp_set[0] = first_set;
1539 tmp_set[1] = result_sets[i];
1541 result_sets[i] = rset_create_prox(
1542 rset_nmem, kc, kc->scope,
1544 1 /* ordered */, 0 /* exclusion */,
1545 3 /* relation */, 1 /* distance */);
1550 if (num_result_sets == 0)
1551 *rset = rset_create_null(rset_nmem, kc, 0);
1552 else if (num_result_sets == 1)
1553 *rset = result_sets[0];
1555 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1556 num_result_sets, result_sets);
1562 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1563 const char **term_sub,
1565 const Odr_oid *attributeSet,
1566 struct grep_info *grep_info,
1576 WRBUF term_num = wrbuf_alloc();
1579 attr_init_APT(&relation, zapt, 2);
1580 relation_value = attr_find(&relation, NULL);
1582 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1584 switch (relation_value)
1587 yaz_log(log_level_rpn, "Relation <");
1588 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1590 wrbuf_destroy(term_num);
1593 term_value = atoi(wrbuf_cstr(term_num));
1594 gen_regular_rel(term_dict, term_value-1, 1);
1597 yaz_log(log_level_rpn, "Relation <=");
1598 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1600 wrbuf_destroy(term_num);
1603 term_value = atoi(wrbuf_cstr(term_num));
1604 gen_regular_rel(term_dict, term_value, 1);
1607 yaz_log(log_level_rpn, "Relation >=");
1608 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1610 wrbuf_destroy(term_num);
1613 term_value = atoi(wrbuf_cstr(term_num));
1614 gen_regular_rel(term_dict, term_value, 0);
1617 yaz_log(log_level_rpn, "Relation >");
1618 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1620 wrbuf_destroy(term_num);
1623 term_value = atoi(wrbuf_cstr(term_num));
1624 gen_regular_rel(term_dict, term_value+1, 0);
1628 yaz_log(log_level_rpn, "Relation =");
1629 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1631 wrbuf_destroy(term_num);
1634 term_value = atoi(wrbuf_cstr(term_num));
1635 wrbuf_printf(term_dict, "(0*%d)", term_value);
1638 /* term_tmp untouched.. */
1639 while (**term_sub != '\0')
1643 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1644 wrbuf_destroy(term_num);
1647 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1648 0, grep_info, max_pos, 0, grep_handle);
1651 zebra_set_partial_result(zh);
1653 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1654 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1655 wrbuf_destroy(term_num);
1659 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1660 const char **term_sub,
1662 const Odr_oid *attributeSet, NMEM stream,
1663 struct grep_info *grep_info,
1664 const char *index_type, int complete_flag,
1666 const char *xpath_use,
1667 struct ord_list **ol)
1670 struct rpn_char_map_info rcmi;
1672 int relation_error = 0;
1673 int ord, ord_len, i;
1675 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1677 *ol = ord_list_create(stream);
1679 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1683 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1684 attributeSet, &ord) != ZEBRA_OK)
1689 wrbuf_rewind(term_dict);
1691 *ol = ord_list_append(stream, *ol, ord);
1693 ord_len = key_SU_encode(ord, ord_buf);
1695 wrbuf_putc(term_dict, '(');
1696 for (i = 0; i < ord_len; i++)
1698 wrbuf_putc(term_dict, 1);
1699 wrbuf_putc(term_dict, ord_buf[i]);
1701 wrbuf_putc(term_dict, ')');
1703 if (!numeric_relation(zh, zapt, &termp, term_dict,
1704 attributeSet, grep_info, &max_pos, zm,
1705 term_dst, &relation_error))
1709 zebra_setError(zh, relation_error, 0);
1716 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1721 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1722 Z_AttributesPlusTerm *zapt,
1724 const Odr_oid *attributeSet,
1726 const char *index_type,
1728 const char *rank_type,
1729 const char *xpath_use,
1732 struct rset_key_control *kc)
1734 char term_dst[IT_MAX_WORD+1];
1735 const char *termp = termz;
1736 RSET *result_sets = 0;
1737 int num_result_sets = 0;
1739 struct grep_info grep_info;
1741 zint hits_limit_value;
1742 const char *term_ref_id_str = 0;
1744 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1747 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1748 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1752 struct ord_list *ol;
1753 WRBUF term_dict = wrbuf_alloc();
1754 if (alloc_sets == num_result_sets)
1757 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1760 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1761 alloc_sets = alloc_sets + add;
1764 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1765 grep_info.isam_p_indx = 0;
1766 res = numeric_term(zh, zapt, &termp, term_dict,
1767 attributeSet, stream, &grep_info,
1768 index_type, complete_flag,
1769 term_dst, xpath_use, &ol);
1770 wrbuf_destroy(term_dict);
1771 if (res == ZEBRA_FAIL || termp == 0)
1773 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1774 result_sets[num_result_sets] =
1775 rset_trunc(zh, grep_info.isam_p_buf,
1776 grep_info.isam_p_indx, term_dst,
1777 strlen(term_dst), rank_type,
1778 0 /* preserve position */,
1779 zapt->term->which, rset_nmem,
1780 kc, kc->scope, ol, index_type,
1783 if (!result_sets[num_result_sets])
1789 grep_info_delete(&grep_info);
1791 if (res != ZEBRA_OK)
1793 if (num_result_sets == 0)
1794 *rset = rset_create_null(rset_nmem, kc, 0);
1795 else if (num_result_sets == 1)
1796 *rset = result_sets[0];
1798 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1799 num_result_sets, result_sets);
1805 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1806 Z_AttributesPlusTerm *zapt,
1808 const Odr_oid *attributeSet,
1810 const char *rank_type, NMEM rset_nmem,
1812 struct rset_key_control *kc)
1815 zint sysno = atozint(termz);
1819 rec = rec_get(zh->reg->records, sysno);
1827 *rset = rset_create_null(rset_nmem, kc, 0);
1833 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1834 res_get(zh->res, "setTmpDir"), 0);
1835 rsfd = rset_open(*rset, RSETF_WRITE);
1840 rset_write(rsfd, &key);
1846 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1847 const Odr_oid *attributeSet, NMEM stream,
1848 Z_SortKeySpecList *sort_sequence,
1849 const char *rank_type,
1852 struct rset_key_control *kc)
1855 int sort_relation_value;
1856 AttrType sort_relation_type;
1861 attr_init_APT(&sort_relation_type, zapt, 7);
1862 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1864 if (!sort_sequence->specs)
1866 sort_sequence->num_specs = 10;
1867 sort_sequence->specs = (Z_SortKeySpec **)
1868 nmem_malloc(stream, sort_sequence->num_specs *
1869 sizeof(*sort_sequence->specs));
1870 for (i = 0; i<sort_sequence->num_specs; i++)
1871 sort_sequence->specs[i] = 0;
1873 if (zapt->term->which != Z_Term_general)
1876 i = atoi_n((char *) zapt->term->u.general->buf,
1877 zapt->term->u.general->len);
1878 if (i >= sort_sequence->num_specs)
1880 sprintf(termz, "%d", i);
1882 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1883 sks->sortElement = (Z_SortElement *)
1884 nmem_malloc(stream, sizeof(*sks->sortElement));
1885 sks->sortElement->which = Z_SortElement_generic;
1886 sk = sks->sortElement->u.generic = (Z_SortKey *)
1887 nmem_malloc(stream, sizeof(*sk));
1888 sk->which = Z_SortKey_sortAttributes;
1889 sk->u.sortAttributes = (Z_SortAttributes *)
1890 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1892 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1893 sk->u.sortAttributes->list = zapt->attributes;
1895 sks->sortRelation = (int *)
1896 nmem_malloc(stream, sizeof(*sks->sortRelation));
1897 if (sort_relation_value == 1)
1898 *sks->sortRelation = Z_SortKeySpec_ascending;
1899 else if (sort_relation_value == 2)
1900 *sks->sortRelation = Z_SortKeySpec_descending;
1902 *sks->sortRelation = Z_SortKeySpec_ascending;
1904 sks->caseSensitivity = (int *)
1905 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1906 *sks->caseSensitivity = 0;
1908 sks->which = Z_SortKeySpec_null;
1909 sks->u.null = odr_nullval ();
1910 sort_sequence->specs[i] = sks;
1911 *rset = rset_create_null(rset_nmem, kc, 0);
1916 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1917 const Odr_oid *attributeSet,
1918 struct xpath_location_step *xpath, int max,
1921 const Odr_oid *curAttributeSet = attributeSet;
1923 const char *use_string = 0;
1925 attr_init_APT(&use, zapt, 1);
1926 attr_find_ex(&use, &curAttributeSet, &use_string);
1928 if (!use_string || *use_string != '/')
1931 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1936 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1937 const char *index_type, const char *term,
1938 const char *xpath_use,
1940 struct rset_key_control *kc)
1942 struct grep_info grep_info;
1943 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1944 zinfo_index_category_index,
1945 index_type, xpath_use);
1946 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1947 return rset_create_null(rset_nmem, kc, 0);
1950 return rset_create_null(rset_nmem, kc, 0);
1956 WRBUF term_dict = wrbuf_alloc();
1957 int ord_len = key_SU_encode(ord, ord_buf);
1958 int term_type = Z_Term_characterString;
1959 const char *flags = "void";
1961 wrbuf_putc(term_dict, '(');
1962 for (i = 0; i<ord_len; i++)
1964 wrbuf_putc(term_dict, 1);
1965 wrbuf_putc(term_dict, ord_buf[i]);
1967 wrbuf_putc(term_dict, ')');
1968 wrbuf_puts(term_dict, term);
1970 grep_info.isam_p_indx = 0;
1971 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1972 &grep_info, &max_pos, 0, grep_handle);
1973 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1974 grep_info.isam_p_indx);
1975 rset = rset_trunc(zh, grep_info.isam_p_buf,
1976 grep_info.isam_p_indx, term, strlen(term),
1977 flags, 1, term_type, rset_nmem,
1978 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1979 0 /* term_ref_id_str */);
1980 grep_info_delete(&grep_info);
1981 wrbuf_destroy(term_dict);
1987 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1988 NMEM stream, const char *rank_type, RSET rset,
1989 int xpath_len, struct xpath_location_step *xpath,
1992 struct rset_key_control *kc)
1995 int always_matches = rset ? 0 : 1;
2003 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2004 for (i = 0; i<xpath_len; i++)
2006 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2018 a[@attr = value]/b[@other = othervalue]
2020 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2021 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2022 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2023 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2024 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2025 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2029 dict_grep_cmap(zh->reg->dict, 0, 0);
2032 int level = xpath_len;
2035 while (--level >= 0)
2037 WRBUF xpath_rev = wrbuf_alloc();
2039 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2041 for (i = level; i >= 1; --i)
2043 const char *cp = xpath[i].part;
2049 wrbuf_puts(xpath_rev, "[^/]*");
2050 else if (*cp == ' ')
2051 wrbuf_puts(xpath_rev, "\001 ");
2053 wrbuf_putc(xpath_rev, *cp);
2055 /* wrbuf_putc does not null-terminate , but
2056 wrbuf_puts below ensures it does.. so xpath_rev
2057 is OK iff length is > 0 */
2059 wrbuf_puts(xpath_rev, "/");
2061 else if (i == 1) /* // case */
2062 wrbuf_puts(xpath_rev, ".*");
2064 if (xpath[level].predicate &&
2065 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2066 xpath[level].predicate->u.relation.name[0])
2068 WRBUF wbuf = wrbuf_alloc();
2069 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2070 if (xpath[level].predicate->u.relation.value)
2072 const char *cp = xpath[level].predicate->u.relation.value;
2073 wrbuf_putc(wbuf, '=');
2077 if (strchr(REGEX_CHARS, *cp))
2078 wrbuf_putc(wbuf, '\\');
2079 wrbuf_putc(wbuf, *cp);
2083 rset_attr = xpath_trunc(
2084 zh, stream, "0", wrbuf_cstr(wbuf),
2085 ZEBRA_XPATH_ATTR_NAME,
2087 wrbuf_destroy(wbuf);
2093 wrbuf_destroy(xpath_rev);
2097 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2098 wrbuf_cstr(xpath_rev));
2099 if (wrbuf_len(xpath_rev))
2101 rset_start_tag = xpath_trunc(zh, stream, "0",
2102 wrbuf_cstr(xpath_rev),
2103 ZEBRA_XPATH_ELM_BEGIN,
2106 rset = rset_start_tag;
2109 rset_end_tag = xpath_trunc(zh, stream, "0",
2110 wrbuf_cstr(xpath_rev),
2111 ZEBRA_XPATH_ELM_END,
2114 rset = rset_create_between(rset_nmem, kc, kc->scope,
2115 rset_start_tag, rset,
2116 rset_end_tag, rset_attr);
2119 wrbuf_destroy(xpath_rev);
2127 #define MAX_XPATH_STEPS 10
2129 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2130 Z_AttributesPlusTerm *zapt,
2131 const Odr_oid *attributeSet, NMEM stream,
2132 Z_SortKeySpecList *sort_sequence,
2135 struct rset_key_control *kc);
2137 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2138 const Odr_oid *attributeSet, NMEM stream,
2139 Z_SortKeySpecList *sort_sequence,
2140 int num_bases, const char **basenames,
2143 struct rset_key_control *kc)
2145 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2146 ZEBRA_RES res = ZEBRA_OK;
2148 for (i = 0; i < num_bases; i++)
2151 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2153 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2158 res = rpn_search_database(zh, zapt, attributeSet, stream,
2160 rset_nmem, rsets+i, kc);
2161 if (res != ZEBRA_OK)
2164 if (res != ZEBRA_OK)
2165 { /* must clean up the already created sets */
2167 rset_delete(rsets[i]);
2174 else if (num_bases == 0)
2175 *rset = rset_create_null(rset_nmem, kc, 0);
2177 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2183 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2184 Z_AttributesPlusTerm *zapt,
2185 const Odr_oid *attributeSet, NMEM stream,
2186 Z_SortKeySpecList *sort_sequence,
2189 struct rset_key_control *kc)
2191 ZEBRA_RES res = ZEBRA_OK;
2192 const char *index_type;
2193 char *search_type = NULL;
2194 char rank_type[128];
2197 char termz[IT_MAX_WORD+1];
2199 const char *xpath_use = 0;
2200 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2204 log_level_rpn = yaz_log_module_level("rpn");
2207 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2208 rank_type, &complete_flag, &sort_flag);
2210 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2211 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2212 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2213 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2215 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2219 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2220 rank_type, rset_nmem, rset, kc);
2221 /* consider if an X-Path query is used */
2222 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2223 xpath, MAX_XPATH_STEPS, stream);
2226 if (xpath[xpath_len-1].part[0] == '@')
2227 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2229 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2236 attr_init_APT(&relation, zapt, 2);
2237 relation_value = attr_find(&relation, NULL);
2239 if (relation_value == 103) /* alwaysmatches */
2241 *rset = 0; /* signal no "term" set */
2242 return rpn_search_xpath(zh, stream, rank_type, *rset,
2243 xpath_len, xpath, rset_nmem, rset, kc);
2248 /* search using one of the various search type strategies
2249 termz is our UTF-8 search term
2250 attributeSet is top-level default attribute set
2251 stream is ODR for search
2252 reg_id is the register type
2253 complete_flag is 1 for complete subfield, 0 for incomplete
2254 xpath_use is use-attribute to be used for X-Path search, 0 for none
2256 if (!strcmp(search_type, "phrase"))
2258 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2259 index_type, complete_flag, rank_type,
2264 else if (!strcmp(search_type, "and-list"))
2266 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2267 index_type, complete_flag, rank_type,
2272 else if (!strcmp(search_type, "or-list"))
2274 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2275 index_type, complete_flag, rank_type,
2280 else if (!strcmp(search_type, "local"))
2282 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2283 rank_type, rset_nmem, rset, kc);
2285 else if (!strcmp(search_type, "numeric"))
2287 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2288 index_type, complete_flag, rank_type,
2295 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2298 if (res != ZEBRA_OK)
2302 return rpn_search_xpath(zh, stream, rank_type, *rset,
2303 xpath_len, xpath, rset_nmem, rset, kc);
2306 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2307 const Odr_oid *attributeSet,
2308 NMEM stream, NMEM rset_nmem,
2309 Z_SortKeySpecList *sort_sequence,
2310 int num_bases, const char **basenames,
2311 RSET **result_sets, int *num_result_sets,
2312 Z_Operator *parent_op,
2313 struct rset_key_control *kc);
2315 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2318 ZEBRA_RES res = ZEBRA_OK;
2319 if (zs->which == Z_RPNStructure_complex)
2321 if (res == ZEBRA_OK)
2322 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2324 if (res == ZEBRA_OK)
2325 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2328 else if (zs->which == Z_RPNStructure_simple)
2330 if (zs->u.simple->which == Z_Operand_APT)
2332 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2333 AttrType global_hits_limit_attr;
2336 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2338 l = attr_find(&global_hits_limit_attr, NULL);
2346 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2347 const Odr_oid *attributeSet,
2348 NMEM stream, NMEM rset_nmem,
2349 Z_SortKeySpecList *sort_sequence,
2350 int num_bases, const char **basenames,
2353 RSET *result_sets = 0;
2354 int num_result_sets = 0;
2356 struct rset_key_control *kc = zebra_key_control_create(zh);
2358 res = rpn_search_structure(zh, zs, attributeSet,
2361 num_bases, basenames,
2362 &result_sets, &num_result_sets,
2363 0 /* no parent op */,
2365 if (res != ZEBRA_OK)
2368 for (i = 0; i<num_result_sets; i++)
2369 rset_delete(result_sets[i]);
2374 assert(num_result_sets == 1);
2375 assert(result_sets);
2376 assert(*result_sets);
2377 *result_set = *result_sets;
2383 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2384 const Odr_oid *attributeSet,
2385 NMEM stream, NMEM rset_nmem,
2386 Z_SortKeySpecList *sort_sequence,
2387 int num_bases, const char **basenames,
2388 RSET **result_sets, int *num_result_sets,
2389 Z_Operator *parent_op,
2390 struct rset_key_control *kc)
2392 *num_result_sets = 0;
2393 if (zs->which == Z_RPNStructure_complex)
2396 Z_Operator *zop = zs->u.complex->roperator;
2397 RSET *result_sets_l = 0;
2398 int num_result_sets_l = 0;
2399 RSET *result_sets_r = 0;
2400 int num_result_sets_r = 0;
2402 res = rpn_search_structure(zh, zs->u.complex->s1,
2403 attributeSet, stream, rset_nmem,
2405 num_bases, basenames,
2406 &result_sets_l, &num_result_sets_l,
2408 if (res != ZEBRA_OK)
2411 for (i = 0; i<num_result_sets_l; i++)
2412 rset_delete(result_sets_l[i]);
2415 res = rpn_search_structure(zh, zs->u.complex->s2,
2416 attributeSet, stream, rset_nmem,
2418 num_bases, basenames,
2419 &result_sets_r, &num_result_sets_r,
2421 if (res != ZEBRA_OK)
2424 for (i = 0; i<num_result_sets_l; i++)
2425 rset_delete(result_sets_l[i]);
2426 for (i = 0; i<num_result_sets_r; i++)
2427 rset_delete(result_sets_r[i]);
2431 /* make a new list of result for all children */
2432 *num_result_sets = num_result_sets_l + num_result_sets_r;
2433 *result_sets = nmem_malloc(stream, *num_result_sets *
2434 sizeof(**result_sets));
2435 memcpy(*result_sets, result_sets_l,
2436 num_result_sets_l * sizeof(**result_sets));
2437 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2438 num_result_sets_r * sizeof(**result_sets));
2440 if (!parent_op || parent_op->which != zop->which
2441 || (zop->which != Z_Operator_and &&
2442 zop->which != Z_Operator_or))
2444 /* parent node different from this one (or non-present) */
2445 /* we must combine result sets now */
2449 case Z_Operator_and:
2450 rset = rset_create_and(rset_nmem, kc,
2452 *num_result_sets, *result_sets);
2455 rset = rset_create_or(rset_nmem, kc,
2456 kc->scope, 0, /* termid */
2457 *num_result_sets, *result_sets);
2459 case Z_Operator_and_not:
2460 rset = rset_create_not(rset_nmem, kc,
2465 case Z_Operator_prox:
2466 if (zop->u.prox->which != Z_ProximityOperator_known)
2469 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2473 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2475 zebra_setError_zint(zh,
2476 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2477 *zop->u.prox->u.known);
2482 rset = rset_create_prox(rset_nmem, kc,
2484 *num_result_sets, *result_sets,
2485 *zop->u.prox->ordered,
2486 (!zop->u.prox->exclusion ?
2487 0 : *zop->u.prox->exclusion),
2488 *zop->u.prox->relationType,
2489 *zop->u.prox->distance );
2493 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2496 *num_result_sets = 1;
2497 *result_sets = nmem_malloc(stream, *num_result_sets *
2498 sizeof(**result_sets));
2499 (*result_sets)[0] = rset;
2502 else if (zs->which == Z_RPNStructure_simple)
2507 if (zs->u.simple->which == Z_Operand_APT)
2509 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2510 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2511 attributeSet, stream, sort_sequence,
2512 num_bases, basenames, rset_nmem, &rset,
2514 if (res != ZEBRA_OK)
2517 else if (zs->u.simple->which == Z_Operand_resultSetId)
2519 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2520 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2524 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2525 zs->u.simple->u.resultSetId);
2532 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2535 *num_result_sets = 1;
2536 *result_sets = nmem_malloc(stream, *num_result_sets *
2537 sizeof(**result_sets));
2538 (*result_sets)[0] = rset;
2542 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2553 * indent-tabs-mode: nil
2555 * vim: shiftwidth=4 tabstop=8 expandtab