1 /* $Id: rpnsearch.c,v 1.24 2007-11-30 12:19:08 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!"
211 static void add_non_space(const char *start, const char *end,
213 char *dst_term, int *dst_ptr,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
217 memcpy(dst_term + *dst_ptr, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
244 const char *res_buf = 0;
246 int r = zebra_map_tokenize(zm, *src, strlen(*src),
249 yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
251 strcat(dst_term, *src);
252 *src += strlen(*src);
257 for (i = 0; i < res_len; i++)
259 if (strchr(REGEX_CHARS, res_buf[i]))
260 wrbuf_putc(term_dict, '\\');
262 wrbuf_putc(term_dict, 1);
263 wrbuf_putc(term_dict, res_buf[i]);
265 r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272 const char **src, WRBUF term_dict, int space_split,
280 const char *space_start = 0;
281 const char *space_end = 0;
283 if (zebra_maps_is_icu(zm))
284 return term_100_icu(zm, src, term_dict, space_split, dst_term);
286 if (!term_pre(zm, src, NULL, NULL, !space_split))
293 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
296 if (**map == *CHR_SPACE)
299 else /* complete subfield only. */
301 if (**map == *CHR_SPACE)
302 { /* save space mapping for later .. */
307 else if (space_start)
308 { /* reload last space */
309 while (space_start < space_end)
311 if (strchr(REGEX_CHARS, *space_start))
312 wrbuf_putc(term_dict, '\\');
313 dst_term[j++] = *space_start;
314 wrbuf_putc(term_dict, *space_start);
319 space_start = space_end = 0;
324 add_non_space(s1, s0, term_dict, dst_term, &j,
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334 const char **src, WRBUF term_dict, int space_split,
342 if (!term_pre(zm, src, "#", "#", !space_split))
350 wrbuf_puts(term_dict, ".*");
351 dst_term[j++] = *s0++;
357 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358 if (space_split && **map == *CHR_SPACE)
362 add_non_space(s1, s0, term_dict, dst_term, &j,
366 dst_term[j++] = '\0';
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373 WRBUF term_dict, int *errors, int space_split,
381 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
384 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385 isdigit(((const unsigned char *)s0)[1]))
387 *errors = s0[1] - '0';
394 if (strchr("^\\()[].*+?|-", *s0))
397 wrbuf_putc(term_dict, *s0);
405 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
406 if (space_split && **map == *CHR_SPACE)
410 add_non_space(s1, s0, term_dict, dst_term, &j,
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422 WRBUF term_dict, int space_split, char *dst_term)
424 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src,
430 WRBUF term_dict, int space_split, char *dst_term)
437 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
445 dst_term[j++] = *s0++;
446 if (*s0 >= '0' && *s0 <= '9')
449 while (*s0 >= '0' && *s0 <= '9')
451 limit = limit * 10 + (*s0 - '0');
452 dst_term[j++] = *s0++;
458 wrbuf_puts(term_dict, ".?");
463 wrbuf_puts(term_dict, ".*");
469 wrbuf_puts(term_dict, ".*");
470 dst_term[j++] = *s0++;
475 wrbuf_puts(term_dict, ".");
476 dst_term[j++] = *s0++;
482 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483 if (space_split && **map == *CHR_SPACE)
487 add_non_space(s1, s0, term_dict, dst_term, &j,
491 dst_term[j++] = '\0';
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src,
498 WRBUF term_dict, int space_split,
499 char *dst_term, int right_truncate)
506 if (!term_pre(zm, src, "*!", "*!", !space_split))
514 wrbuf_puts(term_dict, ".*");
515 dst_term[j++] = *s0++;
520 wrbuf_putc(term_dict, '.');
521 dst_term[j++] = *s0++;
527 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528 if (space_split && **map == *CHR_SPACE)
532 add_non_space(s1, s0, term_dict, dst_term, &j,
537 wrbuf_puts(term_dict, ".*");
538 dst_term[j++] = '\0';
544 /* gen_regular_rel - generate regular expression from relation
545 * val: border value (inclusive)
546 * islt: 1 if <=; 0 if >=.
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
550 char dst_buf[20*5*20]; /* assuming enough for expansion */
557 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
561 strcpy(dst, "(-[0-9]+|(");
569 strcpy(dst, "([0-9]+|-(");
580 sprintf(numstr, "%d", val);
581 for (w = strlen(numstr); --w >= 0; pos++)
600 strcpy(dst + dst_p, numstr);
601 dst_p = strlen(dst) - pos - 1;
629 for (i = 0; i<pos; i++)
642 /* match everything less than 10^(pos-1) */
644 for (i = 1; i<pos; i++)
645 strcat(dst, "[0-9]?");
649 /* match everything greater than 10^pos */
650 for (i = 0; i <= pos; i++)
651 strcat(dst, "[0-9]");
652 strcat(dst, "[0-9]*");
655 wrbuf_puts(term_dict, dst);
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
660 const char *src = wrbuf_cstr(wsrc);
661 if (src[*indx] == '\\')
663 wrbuf_putc(term_p, src[*indx]);
666 wrbuf_putc(term_p, src[*indx]);
671 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
672 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673 * >= abc ([b-].*|a[c-].*|ab[c-].*)
674 * ([^-a].*|a[^-b].*|ab[c-].*)
675 * < abc ([-0].*|a[-a].*|ab[-b].*)
676 * ([^a-].*|a[^b-].*|ab[^c-].*)
677 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
678 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681 const char **term_sub, WRBUF term_dict,
682 const Odr_oid *attributeSet,
683 zebra_map_t zm, int space_split, char *term_dst,
689 WRBUF term_component = wrbuf_alloc();
691 attr_init_APT(&relation, zapt, 2);
692 relation_value = attr_find(&relation, NULL);
695 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696 switch (relation_value)
699 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
701 wrbuf_destroy(term_component);
704 yaz_log(log_level_rpn, "Relation <");
706 wrbuf_putc(term_dict, '(');
707 for (i = 0; i < wrbuf_len(term_component); )
712 wrbuf_putc(term_dict, '|');
714 string_rel_add_char(term_dict, term_component, &j);
716 wrbuf_putc(term_dict, '[');
718 wrbuf_putc(term_dict, '^');
720 wrbuf_putc(term_dict, 1);
721 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
723 string_rel_add_char(term_dict, term_component, &i);
724 wrbuf_putc(term_dict, '-');
726 wrbuf_putc(term_dict, ']');
727 wrbuf_putc(term_dict, '.');
728 wrbuf_putc(term_dict, '*');
730 wrbuf_putc(term_dict, ')');
733 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
735 wrbuf_destroy(term_component);
738 yaz_log(log_level_rpn, "Relation <=");
740 wrbuf_putc(term_dict, '(');
741 for (i = 0; i < wrbuf_len(term_component); )
746 string_rel_add_char(term_dict, term_component, &j);
747 wrbuf_putc(term_dict, '[');
749 wrbuf_putc(term_dict, '^');
751 wrbuf_putc(term_dict, 1);
752 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
754 string_rel_add_char(term_dict, term_component, &i);
755 wrbuf_putc(term_dict, '-');
757 wrbuf_putc(term_dict, ']');
758 wrbuf_putc(term_dict, '.');
759 wrbuf_putc(term_dict, '*');
761 wrbuf_putc(term_dict, '|');
763 for (i = 0; i < wrbuf_len(term_component); )
764 string_rel_add_char(term_dict, term_component, &i);
765 wrbuf_putc(term_dict, ')');
768 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
770 wrbuf_destroy(term_component);
773 yaz_log(log_level_rpn, "Relation >");
775 wrbuf_putc(term_dict, '(');
776 for (i = 0; i < wrbuf_len(term_component); )
781 string_rel_add_char(term_dict, term_component, &j);
782 wrbuf_putc(term_dict, '[');
784 wrbuf_putc(term_dict, '^');
785 wrbuf_putc(term_dict, '-');
786 string_rel_add_char(term_dict, term_component, &i);
788 wrbuf_putc(term_dict, ']');
789 wrbuf_putc(term_dict, '.');
790 wrbuf_putc(term_dict, '*');
792 wrbuf_putc(term_dict, '|');
794 for (i = 0; i < wrbuf_len(term_component); )
795 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, '.');
797 wrbuf_putc(term_dict, '+');
798 wrbuf_putc(term_dict, ')');
801 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
803 wrbuf_destroy(term_component);
806 yaz_log(log_level_rpn, "Relation >=");
808 wrbuf_putc(term_dict, '(');
809 for (i = 0; i < wrbuf_len(term_component); )
814 wrbuf_putc(term_dict, '|');
816 string_rel_add_char(term_dict, term_component, &j);
817 wrbuf_putc(term_dict, '[');
819 if (i < wrbuf_len(term_component)-1)
821 wrbuf_putc(term_dict, '^');
822 wrbuf_putc(term_dict, '-');
823 string_rel_add_char(term_dict, term_component, &i);
827 string_rel_add_char(term_dict, term_component, &i);
828 wrbuf_putc(term_dict, '-');
830 wrbuf_putc(term_dict, ']');
831 wrbuf_putc(term_dict, '.');
832 wrbuf_putc(term_dict, '*');
834 wrbuf_putc(term_dict, ')');
841 yaz_log(log_level_rpn, "Relation =");
842 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
844 wrbuf_destroy(term_component);
847 wrbuf_puts(term_dict, "(");
848 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849 wrbuf_puts(term_dict, ")");
852 yaz_log(log_level_rpn, "Relation always matches");
853 /* skip to end of term (we don't care what it is) */
854 while (**term_sub != '\0')
858 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859 wrbuf_destroy(term_component);
862 wrbuf_destroy(term_component);
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867 const char **term_sub,
869 const Odr_oid *attributeSet, NMEM stream,
870 struct grep_info *grep_info,
871 const char *index_type, int complete_flag,
873 const char *xpath_use,
874 struct ord_list **ol);
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877 Z_AttributesPlusTerm *zapt,
878 zint *hits_limit_value,
879 const char **term_ref_id_str,
882 AttrType term_ref_id_attr;
883 AttrType hits_limit_attr;
886 attr_init_APT(&hits_limit_attr, zapt, 11);
887 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
889 attr_init_APT(&term_ref_id_attr, zapt, 10);
890 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891 if (term_ref_id_int >= 0)
893 char *res = nmem_malloc(nmem, 20);
894 sprintf(res, "%d", term_ref_id_int);
895 *term_ref_id_str = res;
898 /* no limit given ? */
899 if (*hits_limit_value == -1)
901 if (*term_ref_id_str)
903 /* use global if term_ref is present */
904 *hits_limit_value = zh->approx_limit;
908 /* no counting if term_ref is not present */
909 *hits_limit_value = 0;
912 else if (*hits_limit_value == 0)
914 /* 0 is the same as global limit */
915 *hits_limit_value = zh->approx_limit;
917 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918 *term_ref_id_str ? *term_ref_id_str : "none",
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924 Z_AttributesPlusTerm *zapt,
925 const char **term_sub,
926 const Odr_oid *attributeSet, NMEM stream,
927 struct grep_info *grep_info,
928 const char *index_type, int complete_flag,
930 const char *rank_type,
931 const char *xpath_use,
934 struct rset_key_control *kc)
938 zint hits_limit_value;
939 const char *term_ref_id_str = 0;
940 WRBUF term_dict = wrbuf_alloc();
943 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
945 grep_info->isam_p_indx = 0;
946 res = string_term(zh, zapt, term_sub, term_dict,
947 attributeSet, stream, grep_info,
948 index_type, complete_flag,
949 term_dst, xpath_use, &ol);
950 wrbuf_destroy(term_dict);
953 if (!*term_sub) /* no more terms ? */
955 yaz_log(log_level_rpn, "term: %s", term_dst);
956 *rset = rset_trunc(zh, grep_info->isam_p_buf,
957 grep_info->isam_p_indx, term_dst,
958 strlen(term_dst), rank_type, 1 /* preserve pos */,
959 zapt->term->which, rset_nmem,
960 kc, kc->scope, ol, index_type, hits_limit_value,
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968 const char **term_sub,
970 const Odr_oid *attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 const char *index_type, int complete_flag,
974 const char *xpath_use,
975 struct ord_list **ol)
979 int truncation_value;
981 struct rpn_char_map_info rcmi;
983 int space_split = complete_flag ? 0 : 1;
986 int max_pos, prefix_len = 0;
990 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
992 *ol = ord_list_create(stream);
994 rpn_char_map_prepare(zh->reg, zm, &rcmi);
995 attr_init_APT(&truncation, zapt, 5);
996 truncation_value = attr_find(&truncation, NULL);
997 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
999 termp = *term_sub; /* start of term for each database */
1001 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002 attributeSet, &ord) != ZEBRA_OK)
1008 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1010 *ol = ord_list_append(stream, *ol, ord);
1011 ord_len = key_SU_encode(ord, ord_buf);
1013 wrbuf_putc(term_dict, '(');
1015 for (i = 0; i<ord_len; i++)
1017 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1018 wrbuf_putc(term_dict, ord_buf[i]);
1020 wrbuf_putc(term_dict, ')');
1022 prefix_len = wrbuf_len(term_dict);
1024 switch (truncation_value)
1026 case -1: /* not specified */
1027 case 100: /* do not truncate */
1028 if (!string_relation(zh, zapt, &termp, term_dict,
1030 zm, space_split, term_dst,
1035 zebra_setError(zh, relation_error, 0);
1042 case 1: /* right truncation */
1043 wrbuf_putc(term_dict, '(');
1044 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1049 wrbuf_puts(term_dict, ".*)");
1051 case 2: /* keft truncation */
1052 wrbuf_puts(term_dict, "(.*");
1053 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1058 wrbuf_putc(term_dict, ')');
1060 case 3: /* left&right truncation */
1061 wrbuf_puts(term_dict, "(.*");
1062 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1067 wrbuf_puts(term_dict, ".*)");
1069 case 101: /* process # in term */
1070 wrbuf_putc(term_dict, '(');
1071 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1076 wrbuf_puts(term_dict, ")");
1078 case 102: /* Regexp-1 */
1079 wrbuf_putc(term_dict, '(');
1080 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1085 wrbuf_putc(term_dict, ')');
1087 case 103: /* Regexp-2 */
1089 wrbuf_putc(term_dict, '(');
1090 if (!term_103(zm, &termp, term_dict, ®ex_range,
1091 space_split, term_dst))
1096 wrbuf_putc(term_dict, ')');
1098 case 104: /* process # and ! in term */
1099 wrbuf_putc(term_dict, '(');
1100 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1105 wrbuf_putc(term_dict, ')');
1107 case 105: /* process * and ! in term */
1108 wrbuf_putc(term_dict, '(');
1109 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1114 wrbuf_putc(term_dict, ')');
1116 case 106: /* process * and ! in term */
1117 wrbuf_putc(term_dict, '(');
1118 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1123 wrbuf_putc(term_dict, ')');
1126 zebra_setError_zint(zh,
1127 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1134 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135 esc_str(buf, sizeof(buf), input, strlen(input));
1138 WRBUF pr_wr = wrbuf_alloc();
1140 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142 wrbuf_destroy(pr_wr);
1144 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145 grep_info, &max_pos,
1146 ord_len /* number of "exact" chars */,
1149 zebra_set_partial_result(zh);
1151 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1153 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1159 static void grep_info_delete(struct grep_info *grep_info)
1162 xfree(grep_info->term_no);
1164 xfree(grep_info->isam_p_buf);
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168 Z_AttributesPlusTerm *zapt,
1169 struct grep_info *grep_info,
1170 const char *index_type)
1173 grep_info->term_no = 0;
1175 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176 grep_info->isam_p_size = 0;
1177 grep_info->isam_p_buf = NULL;
1179 grep_info->index_type = index_type;
1180 grep_info->termset = 0;
1186 attr_init_APT(&truncmax, zapt, 13);
1187 truncmax_value = attr_find(&truncmax, NULL);
1188 if (truncmax_value != -1)
1189 grep_info->trunc_max = truncmax_value;
1194 int termset_value_numeric;
1195 const char *termset_value_string;
1197 attr_init_APT(&termset, zapt, 8);
1198 termset_value_numeric =
1199 attr_find_ex(&termset, NULL, &termset_value_string);
1200 if (termset_value_numeric != -1)
1203 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1207 const char *termset_name = 0;
1208 if (termset_value_numeric != -2)
1211 sprintf(resname, "%d", termset_value_numeric);
1212 termset_name = resname;
1215 termset_name = termset_value_string;
1216 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218 if (!grep_info->termset)
1220 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1230 \brief Create result set(s) for list of terms
1231 \param zh Zebra Handle
1232 \param zapt Attributes Plust Term (RPN leaf)
1233 \param termz term as used in query but converted to UTF-8
1234 \param attributeSet default attribute set
1235 \param stream memory for result
1236 \param index_type register type ("w", "p",..)
1237 \param complete_flag whether it's phrases or not
1238 \param rank_type term flags for ranking
1239 \param xpath_use use attribute for X-Path (-1 for no X-path)
1240 \param rset_nmem memory for result sets
1241 \param result_sets output result set for each term in list (output)
1242 \param num_result_sets number of output result sets
1243 \param kc rset key control to be used for created result sets
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246 Z_AttributesPlusTerm *zapt,
1248 const Odr_oid *attributeSet,
1250 const char *index_type, int complete_flag,
1251 const char *rank_type,
1252 const char *xpath_use,
1254 RSET **result_sets, int *num_result_sets,
1255 struct rset_key_control *kc)
1257 char term_dst[IT_MAX_WORD+1];
1258 struct grep_info grep_info;
1259 const char *termp = termz;
1262 *num_result_sets = 0;
1264 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1270 if (alloc_sets == *num_result_sets)
1273 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1276 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277 alloc_sets = alloc_sets + add;
1278 *result_sets = rnew;
1280 res = term_trunc(zh, zapt, &termp, attributeSet,
1282 index_type, complete_flag,
1283 term_dst, rank_type,
1284 xpath_use, rset_nmem,
1285 &(*result_sets)[*num_result_sets],
1287 if (res != ZEBRA_OK)
1290 for (i = 0; i < *num_result_sets; i++)
1291 rset_delete((*result_sets)[i]);
1292 grep_info_delete(&grep_info);
1295 if ((*result_sets)[*num_result_sets] == 0)
1297 (*num_result_sets)++;
1302 grep_info_delete(&grep_info);
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307 Z_AttributesPlusTerm *zapt,
1308 const Odr_oid *attributeSet,
1309 const char *index_type,
1312 struct rset_key_control *kc)
1318 char term_dict[100];
1322 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1324 attr_init_APT(&position, zapt, 3);
1325 position_value = attr_find(&position, NULL);
1326 switch(position_value)
1335 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1341 if (!zebra_maps_is_first_in_field(zm))
1343 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1348 if (!zh->reg->isamb && !zh->reg->isamc)
1350 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1355 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1356 attributeSet, &ord) != ZEBRA_OK)
1360 ord_len = key_SU_encode(ord, ord_buf);
1361 memcpy(term_dict, ord_buf, ord_len);
1362 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1363 val = dict_lookup(zh->reg->dict, term_dict);
1366 assert(*val == sizeof(ISAM_P));
1367 memcpy(&isam_p, val+1, sizeof(isam_p));
1370 *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1371 zh->reg->isamb, isam_p, 0);
1372 else if (zh->reg->isamc)
1373 *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1374 zh->reg->isamc, isam_p, 0);
1379 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1380 Z_AttributesPlusTerm *zapt,
1381 const char *termz_org,
1382 const Odr_oid *attributeSet,
1384 const char *index_type, int complete_flag,
1385 const char *rank_type,
1386 const char *xpath_use,
1389 struct rset_key_control *kc)
1391 RSET *result_sets = 0;
1392 int num_result_sets = 0;
1394 term_list_trunc(zh, zapt, termz_org, attributeSet,
1395 stream, index_type, complete_flag,
1396 rank_type, xpath_use,
1398 &result_sets, &num_result_sets, kc);
1400 if (res != ZEBRA_OK)
1403 if (num_result_sets > 0)
1406 res = rpn_search_APT_position(zh, zapt, attributeSet,
1408 rset_nmem, &first_set,
1410 if (res != ZEBRA_OK)
1414 RSET *nsets = nmem_malloc(stream,
1415 sizeof(RSET) * (num_result_sets+1));
1416 nsets[0] = first_set;
1417 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1418 result_sets = nsets;
1422 if (num_result_sets == 0)
1423 *rset = rset_create_null(rset_nmem, kc, 0);
1424 else if (num_result_sets == 1)
1425 *rset = result_sets[0];
1427 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1428 num_result_sets, result_sets,
1429 1 /* ordered */, 0 /* exclusion */,
1430 3 /* relation */, 1 /* distance */);
1436 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1437 Z_AttributesPlusTerm *zapt,
1438 const char *termz_org,
1439 const Odr_oid *attributeSet,
1441 const char *index_type,
1443 const char *rank_type,
1444 const char *xpath_use,
1447 struct rset_key_control *kc)
1449 RSET *result_sets = 0;
1450 int num_result_sets = 0;
1453 term_list_trunc(zh, zapt, termz_org, attributeSet,
1454 stream, index_type, complete_flag,
1455 rank_type, xpath_use,
1457 &result_sets, &num_result_sets, kc);
1458 if (res != ZEBRA_OK)
1461 for (i = 0; i<num_result_sets; i++)
1464 res = rpn_search_APT_position(zh, zapt, attributeSet,
1466 rset_nmem, &first_set,
1468 if (res != ZEBRA_OK)
1470 for (i = 0; i<num_result_sets; i++)
1471 rset_delete(result_sets[i]);
1479 tmp_set[0] = first_set;
1480 tmp_set[1] = result_sets[i];
1482 result_sets[i] = rset_create_prox(
1483 rset_nmem, kc, kc->scope,
1485 1 /* ordered */, 0 /* exclusion */,
1486 3 /* relation */, 1 /* distance */);
1489 if (num_result_sets == 0)
1490 *rset = rset_create_null(rset_nmem, kc, 0);
1491 else if (num_result_sets == 1)
1492 *rset = result_sets[0];
1494 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1495 num_result_sets, result_sets);
1501 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1502 Z_AttributesPlusTerm *zapt,
1503 const char *termz_org,
1504 const Odr_oid *attributeSet,
1506 const char *index_type,
1508 const char *rank_type,
1509 const char *xpath_use,
1512 struct rset_key_control *kc)
1514 RSET *result_sets = 0;
1515 int num_result_sets = 0;
1518 term_list_trunc(zh, zapt, termz_org, attributeSet,
1519 stream, index_type, complete_flag,
1520 rank_type, xpath_use,
1522 &result_sets, &num_result_sets,
1524 if (res != ZEBRA_OK)
1526 for (i = 0; i<num_result_sets; i++)
1529 res = rpn_search_APT_position(zh, zapt, attributeSet,
1531 rset_nmem, &first_set,
1533 if (res != ZEBRA_OK)
1535 for (i = 0; i<num_result_sets; i++)
1536 rset_delete(result_sets[i]);
1544 tmp_set[0] = first_set;
1545 tmp_set[1] = result_sets[i];
1547 result_sets[i] = rset_create_prox(
1548 rset_nmem, kc, kc->scope,
1550 1 /* ordered */, 0 /* exclusion */,
1551 3 /* relation */, 1 /* distance */);
1556 if (num_result_sets == 0)
1557 *rset = rset_create_null(rset_nmem, kc, 0);
1558 else if (num_result_sets == 1)
1559 *rset = result_sets[0];
1561 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1562 num_result_sets, result_sets);
1568 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1569 const char **term_sub,
1571 const Odr_oid *attributeSet,
1572 struct grep_info *grep_info,
1582 WRBUF term_num = wrbuf_alloc();
1585 attr_init_APT(&relation, zapt, 2);
1586 relation_value = attr_find(&relation, NULL);
1588 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1590 switch (relation_value)
1593 yaz_log(log_level_rpn, "Relation <");
1594 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1596 wrbuf_destroy(term_num);
1599 term_value = atoi(wrbuf_cstr(term_num));
1600 gen_regular_rel(term_dict, term_value-1, 1);
1603 yaz_log(log_level_rpn, "Relation <=");
1604 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1606 wrbuf_destroy(term_num);
1609 term_value = atoi(wrbuf_cstr(term_num));
1610 gen_regular_rel(term_dict, term_value, 1);
1613 yaz_log(log_level_rpn, "Relation >=");
1614 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1616 wrbuf_destroy(term_num);
1619 term_value = atoi(wrbuf_cstr(term_num));
1620 gen_regular_rel(term_dict, term_value, 0);
1623 yaz_log(log_level_rpn, "Relation >");
1624 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1626 wrbuf_destroy(term_num);
1629 term_value = atoi(wrbuf_cstr(term_num));
1630 gen_regular_rel(term_dict, term_value+1, 0);
1634 yaz_log(log_level_rpn, "Relation =");
1635 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1637 wrbuf_destroy(term_num);
1640 term_value = atoi(wrbuf_cstr(term_num));
1641 wrbuf_printf(term_dict, "(0*%d)", term_value);
1644 /* term_tmp untouched.. */
1645 while (**term_sub != '\0')
1649 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1650 wrbuf_destroy(term_num);
1653 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1654 0, grep_info, max_pos, 0, grep_handle);
1657 zebra_set_partial_result(zh);
1659 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1660 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1661 wrbuf_destroy(term_num);
1665 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1666 const char **term_sub,
1668 const Odr_oid *attributeSet, NMEM stream,
1669 struct grep_info *grep_info,
1670 const char *index_type, int complete_flag,
1672 const char *xpath_use,
1673 struct ord_list **ol)
1676 struct rpn_char_map_info rcmi;
1678 int relation_error = 0;
1679 int ord, ord_len, i;
1681 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1683 *ol = ord_list_create(stream);
1685 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1689 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1690 attributeSet, &ord) != ZEBRA_OK)
1695 wrbuf_rewind(term_dict);
1697 *ol = ord_list_append(stream, *ol, ord);
1699 ord_len = key_SU_encode(ord, ord_buf);
1701 wrbuf_putc(term_dict, '(');
1702 for (i = 0; i < ord_len; i++)
1704 wrbuf_putc(term_dict, 1);
1705 wrbuf_putc(term_dict, ord_buf[i]);
1707 wrbuf_putc(term_dict, ')');
1709 if (!numeric_relation(zh, zapt, &termp, term_dict,
1710 attributeSet, grep_info, &max_pos, zm,
1711 term_dst, &relation_error))
1715 zebra_setError(zh, relation_error, 0);
1722 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1727 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1728 Z_AttributesPlusTerm *zapt,
1730 const Odr_oid *attributeSet,
1732 const char *index_type,
1734 const char *rank_type,
1735 const char *xpath_use,
1738 struct rset_key_control *kc)
1740 char term_dst[IT_MAX_WORD+1];
1741 const char *termp = termz;
1742 RSET *result_sets = 0;
1743 int num_result_sets = 0;
1745 struct grep_info grep_info;
1747 zint hits_limit_value;
1748 const char *term_ref_id_str = 0;
1750 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1753 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1754 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1758 struct ord_list *ol;
1759 WRBUF term_dict = wrbuf_alloc();
1760 if (alloc_sets == num_result_sets)
1763 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1766 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1767 alloc_sets = alloc_sets + add;
1770 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1771 grep_info.isam_p_indx = 0;
1772 res = numeric_term(zh, zapt, &termp, term_dict,
1773 attributeSet, stream, &grep_info,
1774 index_type, complete_flag,
1775 term_dst, xpath_use, &ol);
1776 wrbuf_destroy(term_dict);
1777 if (res == ZEBRA_FAIL || termp == 0)
1779 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1780 result_sets[num_result_sets] =
1781 rset_trunc(zh, grep_info.isam_p_buf,
1782 grep_info.isam_p_indx, term_dst,
1783 strlen(term_dst), rank_type,
1784 0 /* preserve position */,
1785 zapt->term->which, rset_nmem,
1786 kc, kc->scope, ol, index_type,
1789 if (!result_sets[num_result_sets])
1795 grep_info_delete(&grep_info);
1797 if (res != ZEBRA_OK)
1799 if (num_result_sets == 0)
1800 *rset = rset_create_null(rset_nmem, kc, 0);
1801 else if (num_result_sets == 1)
1802 *rset = result_sets[0];
1804 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1805 num_result_sets, result_sets);
1811 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1812 Z_AttributesPlusTerm *zapt,
1814 const Odr_oid *attributeSet,
1816 const char *rank_type, NMEM rset_nmem,
1818 struct rset_key_control *kc)
1821 zint sysno = atozint(termz);
1825 rec = rec_get(zh->reg->records, sysno);
1833 *rset = rset_create_null(rset_nmem, kc, 0);
1839 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1840 res_get(zh->res, "setTmpDir"), 0);
1841 rsfd = rset_open(*rset, RSETF_WRITE);
1846 rset_write(rsfd, &key);
1852 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1853 const Odr_oid *attributeSet, NMEM stream,
1854 Z_SortKeySpecList *sort_sequence,
1855 const char *rank_type,
1858 struct rset_key_control *kc)
1861 int sort_relation_value;
1862 AttrType sort_relation_type;
1867 attr_init_APT(&sort_relation_type, zapt, 7);
1868 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1870 if (!sort_sequence->specs)
1872 sort_sequence->num_specs = 10;
1873 sort_sequence->specs = (Z_SortKeySpec **)
1874 nmem_malloc(stream, sort_sequence->num_specs *
1875 sizeof(*sort_sequence->specs));
1876 for (i = 0; i<sort_sequence->num_specs; i++)
1877 sort_sequence->specs[i] = 0;
1879 if (zapt->term->which != Z_Term_general)
1882 i = atoi_n((char *) zapt->term->u.general->buf,
1883 zapt->term->u.general->len);
1884 if (i >= sort_sequence->num_specs)
1886 sprintf(termz, "%d", i);
1888 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1889 sks->sortElement = (Z_SortElement *)
1890 nmem_malloc(stream, sizeof(*sks->sortElement));
1891 sks->sortElement->which = Z_SortElement_generic;
1892 sk = sks->sortElement->u.generic = (Z_SortKey *)
1893 nmem_malloc(stream, sizeof(*sk));
1894 sk->which = Z_SortKey_sortAttributes;
1895 sk->u.sortAttributes = (Z_SortAttributes *)
1896 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1898 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1899 sk->u.sortAttributes->list = zapt->attributes;
1901 sks->sortRelation = (int *)
1902 nmem_malloc(stream, sizeof(*sks->sortRelation));
1903 if (sort_relation_value == 1)
1904 *sks->sortRelation = Z_SortKeySpec_ascending;
1905 else if (sort_relation_value == 2)
1906 *sks->sortRelation = Z_SortKeySpec_descending;
1908 *sks->sortRelation = Z_SortKeySpec_ascending;
1910 sks->caseSensitivity = (int *)
1911 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1912 *sks->caseSensitivity = 0;
1914 sks->which = Z_SortKeySpec_null;
1915 sks->u.null = odr_nullval ();
1916 sort_sequence->specs[i] = sks;
1917 *rset = rset_create_null(rset_nmem, kc, 0);
1922 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1923 const Odr_oid *attributeSet,
1924 struct xpath_location_step *xpath, int max,
1927 const Odr_oid *curAttributeSet = attributeSet;
1929 const char *use_string = 0;
1931 attr_init_APT(&use, zapt, 1);
1932 attr_find_ex(&use, &curAttributeSet, &use_string);
1934 if (!use_string || *use_string != '/')
1937 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1942 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1943 const char *index_type, const char *term,
1944 const char *xpath_use,
1946 struct rset_key_control *kc)
1948 struct grep_info grep_info;
1949 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1950 zinfo_index_category_index,
1951 index_type, xpath_use);
1952 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1953 return rset_create_null(rset_nmem, kc, 0);
1956 return rset_create_null(rset_nmem, kc, 0);
1962 WRBUF term_dict = wrbuf_alloc();
1963 int ord_len = key_SU_encode(ord, ord_buf);
1964 int term_type = Z_Term_characterString;
1965 const char *flags = "void";
1967 wrbuf_putc(term_dict, '(');
1968 for (i = 0; i<ord_len; i++)
1970 wrbuf_putc(term_dict, 1);
1971 wrbuf_putc(term_dict, ord_buf[i]);
1973 wrbuf_putc(term_dict, ')');
1974 wrbuf_puts(term_dict, term);
1976 grep_info.isam_p_indx = 0;
1977 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1978 &grep_info, &max_pos, 0, grep_handle);
1979 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1980 grep_info.isam_p_indx);
1981 rset = rset_trunc(zh, grep_info.isam_p_buf,
1982 grep_info.isam_p_indx, term, strlen(term),
1983 flags, 1, term_type, rset_nmem,
1984 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1985 0 /* term_ref_id_str */);
1986 grep_info_delete(&grep_info);
1987 wrbuf_destroy(term_dict);
1993 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1994 NMEM stream, const char *rank_type, RSET rset,
1995 int xpath_len, struct xpath_location_step *xpath,
1998 struct rset_key_control *kc)
2001 int always_matches = rset ? 0 : 1;
2009 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2010 for (i = 0; i<xpath_len; i++)
2012 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2024 a[@attr = value]/b[@other = othervalue]
2026 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2027 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2028 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2029 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2030 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2031 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2035 dict_grep_cmap(zh->reg->dict, 0, 0);
2038 int level = xpath_len;
2041 while (--level >= 0)
2043 WRBUF xpath_rev = wrbuf_alloc();
2045 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2047 for (i = level; i >= 1; --i)
2049 const char *cp = xpath[i].part;
2055 wrbuf_puts(xpath_rev, "[^/]*");
2056 else if (*cp == ' ')
2057 wrbuf_puts(xpath_rev, "\001 ");
2059 wrbuf_putc(xpath_rev, *cp);
2061 /* wrbuf_putc does not null-terminate , but
2062 wrbuf_puts below ensures it does.. so xpath_rev
2063 is OK iff length is > 0 */
2065 wrbuf_puts(xpath_rev, "/");
2067 else if (i == 1) /* // case */
2068 wrbuf_puts(xpath_rev, ".*");
2070 if (xpath[level].predicate &&
2071 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2072 xpath[level].predicate->u.relation.name[0])
2074 WRBUF wbuf = wrbuf_alloc();
2075 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2076 if (xpath[level].predicate->u.relation.value)
2078 const char *cp = xpath[level].predicate->u.relation.value;
2079 wrbuf_putc(wbuf, '=');
2083 if (strchr(REGEX_CHARS, *cp))
2084 wrbuf_putc(wbuf, '\\');
2085 wrbuf_putc(wbuf, *cp);
2089 rset_attr = xpath_trunc(
2090 zh, stream, "0", wrbuf_cstr(wbuf),
2091 ZEBRA_XPATH_ATTR_NAME,
2093 wrbuf_destroy(wbuf);
2099 wrbuf_destroy(xpath_rev);
2103 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2104 wrbuf_cstr(xpath_rev));
2105 if (wrbuf_len(xpath_rev))
2107 rset_start_tag = xpath_trunc(zh, stream, "0",
2108 wrbuf_cstr(xpath_rev),
2109 ZEBRA_XPATH_ELM_BEGIN,
2112 rset = rset_start_tag;
2115 rset_end_tag = xpath_trunc(zh, stream, "0",
2116 wrbuf_cstr(xpath_rev),
2117 ZEBRA_XPATH_ELM_END,
2120 rset = rset_create_between(rset_nmem, kc, kc->scope,
2121 rset_start_tag, rset,
2122 rset_end_tag, rset_attr);
2125 wrbuf_destroy(xpath_rev);
2133 #define MAX_XPATH_STEPS 10
2135 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2136 Z_AttributesPlusTerm *zapt,
2137 const Odr_oid *attributeSet, NMEM stream,
2138 Z_SortKeySpecList *sort_sequence,
2141 struct rset_key_control *kc);
2143 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2144 const Odr_oid *attributeSet, NMEM stream,
2145 Z_SortKeySpecList *sort_sequence,
2146 int num_bases, const char **basenames,
2149 struct rset_key_control *kc)
2151 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2152 ZEBRA_RES res = ZEBRA_OK;
2154 for (i = 0; i < num_bases; i++)
2157 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2159 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2164 res = rpn_search_database(zh, zapt, attributeSet, stream,
2166 rset_nmem, rsets+i, kc);
2167 if (res != ZEBRA_OK)
2170 if (res != ZEBRA_OK)
2171 { /* must clean up the already created sets */
2173 rset_delete(rsets[i]);
2180 else if (num_bases == 0)
2181 *rset = rset_create_null(rset_nmem, kc, 0);
2183 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2189 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2190 Z_AttributesPlusTerm *zapt,
2191 const Odr_oid *attributeSet, NMEM stream,
2192 Z_SortKeySpecList *sort_sequence,
2195 struct rset_key_control *kc)
2197 ZEBRA_RES res = ZEBRA_OK;
2198 const char *index_type;
2199 char *search_type = NULL;
2200 char rank_type[128];
2203 char termz[IT_MAX_WORD+1];
2205 const char *xpath_use = 0;
2206 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2210 log_level_rpn = yaz_log_module_level("rpn");
2213 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2214 rank_type, &complete_flag, &sort_flag);
2216 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2217 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2218 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2219 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2221 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2225 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2226 rank_type, rset_nmem, rset, kc);
2227 /* consider if an X-Path query is used */
2228 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2229 xpath, MAX_XPATH_STEPS, stream);
2232 if (xpath[xpath_len-1].part[0] == '@')
2233 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2235 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2242 attr_init_APT(&relation, zapt, 2);
2243 relation_value = attr_find(&relation, NULL);
2245 if (relation_value == 103) /* alwaysmatches */
2247 *rset = 0; /* signal no "term" set */
2248 return rpn_search_xpath(zh, stream, rank_type, *rset,
2249 xpath_len, xpath, rset_nmem, rset, kc);
2254 /* search using one of the various search type strategies
2255 termz is our UTF-8 search term
2256 attributeSet is top-level default attribute set
2257 stream is ODR for search
2258 reg_id is the register type
2259 complete_flag is 1 for complete subfield, 0 for incomplete
2260 xpath_use is use-attribute to be used for X-Path search, 0 for none
2262 if (!strcmp(search_type, "phrase"))
2264 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2265 index_type, complete_flag, rank_type,
2270 else if (!strcmp(search_type, "and-list"))
2272 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2273 index_type, complete_flag, rank_type,
2278 else if (!strcmp(search_type, "or-list"))
2280 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2281 index_type, complete_flag, rank_type,
2286 else if (!strcmp(search_type, "local"))
2288 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2289 rank_type, rset_nmem, rset, kc);
2291 else if (!strcmp(search_type, "numeric"))
2293 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2294 index_type, complete_flag, rank_type,
2301 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2304 if (res != ZEBRA_OK)
2308 return rpn_search_xpath(zh, stream, rank_type, *rset,
2309 xpath_len, xpath, rset_nmem, rset, kc);
2312 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2313 const Odr_oid *attributeSet,
2314 NMEM stream, NMEM rset_nmem,
2315 Z_SortKeySpecList *sort_sequence,
2316 int num_bases, const char **basenames,
2317 RSET **result_sets, int *num_result_sets,
2318 Z_Operator *parent_op,
2319 struct rset_key_control *kc);
2321 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2324 ZEBRA_RES res = ZEBRA_OK;
2325 if (zs->which == Z_RPNStructure_complex)
2327 if (res == ZEBRA_OK)
2328 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2330 if (res == ZEBRA_OK)
2331 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2334 else if (zs->which == Z_RPNStructure_simple)
2336 if (zs->u.simple->which == Z_Operand_APT)
2338 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2339 AttrType global_hits_limit_attr;
2342 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2344 l = attr_find(&global_hits_limit_attr, NULL);
2352 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2353 const Odr_oid *attributeSet,
2354 NMEM stream, NMEM rset_nmem,
2355 Z_SortKeySpecList *sort_sequence,
2356 int num_bases, const char **basenames,
2359 RSET *result_sets = 0;
2360 int num_result_sets = 0;
2362 struct rset_key_control *kc = zebra_key_control_create(zh);
2364 res = rpn_search_structure(zh, zs, attributeSet,
2367 num_bases, basenames,
2368 &result_sets, &num_result_sets,
2369 0 /* no parent op */,
2371 if (res != ZEBRA_OK)
2374 for (i = 0; i<num_result_sets; i++)
2375 rset_delete(result_sets[i]);
2380 assert(num_result_sets == 1);
2381 assert(result_sets);
2382 assert(*result_sets);
2383 *result_set = *result_sets;
2389 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2390 const Odr_oid *attributeSet,
2391 NMEM stream, NMEM rset_nmem,
2392 Z_SortKeySpecList *sort_sequence,
2393 int num_bases, const char **basenames,
2394 RSET **result_sets, int *num_result_sets,
2395 Z_Operator *parent_op,
2396 struct rset_key_control *kc)
2398 *num_result_sets = 0;
2399 if (zs->which == Z_RPNStructure_complex)
2402 Z_Operator *zop = zs->u.complex->roperator;
2403 RSET *result_sets_l = 0;
2404 int num_result_sets_l = 0;
2405 RSET *result_sets_r = 0;
2406 int num_result_sets_r = 0;
2408 res = rpn_search_structure(zh, zs->u.complex->s1,
2409 attributeSet, stream, rset_nmem,
2411 num_bases, basenames,
2412 &result_sets_l, &num_result_sets_l,
2414 if (res != ZEBRA_OK)
2417 for (i = 0; i<num_result_sets_l; i++)
2418 rset_delete(result_sets_l[i]);
2421 res = rpn_search_structure(zh, zs->u.complex->s2,
2422 attributeSet, stream, rset_nmem,
2424 num_bases, basenames,
2425 &result_sets_r, &num_result_sets_r,
2427 if (res != ZEBRA_OK)
2430 for (i = 0; i<num_result_sets_l; i++)
2431 rset_delete(result_sets_l[i]);
2432 for (i = 0; i<num_result_sets_r; i++)
2433 rset_delete(result_sets_r[i]);
2437 /* make a new list of result for all children */
2438 *num_result_sets = num_result_sets_l + num_result_sets_r;
2439 *result_sets = nmem_malloc(stream, *num_result_sets *
2440 sizeof(**result_sets));
2441 memcpy(*result_sets, result_sets_l,
2442 num_result_sets_l * sizeof(**result_sets));
2443 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2444 num_result_sets_r * sizeof(**result_sets));
2446 if (!parent_op || parent_op->which != zop->which
2447 || (zop->which != Z_Operator_and &&
2448 zop->which != Z_Operator_or))
2450 /* parent node different from this one (or non-present) */
2451 /* we must combine result sets now */
2455 case Z_Operator_and:
2456 rset = rset_create_and(rset_nmem, kc,
2458 *num_result_sets, *result_sets);
2461 rset = rset_create_or(rset_nmem, kc,
2462 kc->scope, 0, /* termid */
2463 *num_result_sets, *result_sets);
2465 case Z_Operator_and_not:
2466 rset = rset_create_not(rset_nmem, kc,
2471 case Z_Operator_prox:
2472 if (zop->u.prox->which != Z_ProximityOperator_known)
2475 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2479 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2481 zebra_setError_zint(zh,
2482 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2483 *zop->u.prox->u.known);
2488 rset = rset_create_prox(rset_nmem, kc,
2490 *num_result_sets, *result_sets,
2491 *zop->u.prox->ordered,
2492 (!zop->u.prox->exclusion ?
2493 0 : *zop->u.prox->exclusion),
2494 *zop->u.prox->relationType,
2495 *zop->u.prox->distance );
2499 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2502 *num_result_sets = 1;
2503 *result_sets = nmem_malloc(stream, *num_result_sets *
2504 sizeof(**result_sets));
2505 (*result_sets)[0] = rset;
2508 else if (zs->which == Z_RPNStructure_simple)
2513 if (zs->u.simple->which == Z_Operand_APT)
2515 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2516 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2517 attributeSet, stream, sort_sequence,
2518 num_bases, basenames, rset_nmem, &rset,
2520 if (res != ZEBRA_OK)
2523 else if (zs->u.simple->which == Z_Operand_resultSetId)
2525 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2526 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2530 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2531 zs->u.simple->u.resultSetId);
2538 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2541 *num_result_sets = 1;
2542 *result_sets = nmem_malloc(stream, *num_result_sets *
2543 sizeof(**result_sets));
2544 (*result_sets)[0] = rset;
2548 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2559 * indent-tabs-mode: nil
2561 * vim: shiftwidth=4 tabstop=8 expandtab