1 /* $Id: rpnsearch.c,v 1.20 2007-11-01 14:10:03 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 const char *index_type;
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, const char *ct2, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
167 if (ct2 && strchr(ct2, *s0))
170 map = zebra_maps_input(zm, &s1, strlen(s1), first);
171 if (**map != *CHR_SPACE)
180 static void esc_str(char *out_buf, size_t out_size,
181 const char *in_buf, int in_size)
187 assert(out_size > 20);
189 for (k = 0; k<in_size; k++)
191 int c = in_buf[k] & 0xff;
193 if (c < 32 || c > 126)
197 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
198 if (strlen(out_buf) > out_size-20)
200 strcat(out_buf, "..");
206 #define REGEX_CHARS " []()|.*+?!"
208 static void add_non_space(const char *start, const char *end,
210 char *dst_term, int *dst_ptr,
211 const char **map, int q_map_match)
213 size_t sz = end - start;
214 memcpy(dst_term + *dst_ptr, start, sz);
220 if (strchr(REGEX_CHARS, *start))
221 wrbuf_putc(term_dict, '\\');
222 wrbuf_putc(term_dict, *start);
229 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 wrbuf_puts(term_dict, map[0]);
235 /* term_100: handle term, where trunc = none(no operators at all) */
236 static int term_100(zebra_map_t zm,
237 const char **src, WRBUF term_dict, int space_split,
245 const char *space_start = 0;
246 const char *space_end = 0;
248 if (!term_pre(zm, src, NULL, NULL, !space_split))
255 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
258 if (**map == *CHR_SPACE)
261 else /* complete subfield only. */
263 if (**map == *CHR_SPACE)
264 { /* save space mapping for later .. */
269 else if (space_start)
270 { /* reload last space */
271 while (space_start < space_end)
273 if (strchr(REGEX_CHARS, *space_start))
274 wrbuf_putc(term_dict, '\\');
275 dst_term[j++] = *space_start;
276 wrbuf_putc(term_dict, *space_start);
281 space_start = space_end = 0;
286 add_non_space(s1, s0, term_dict, dst_term, &j,
294 /* term_101: handle term, where trunc = Process # */
295 static int term_101(zebra_map_t zm,
296 const char **src, WRBUF term_dict, int space_split,
304 if (!term_pre(zm, src, "#", "#", !space_split))
312 wrbuf_puts(term_dict, ".*");
313 dst_term[j++] = *s0++;
319 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
320 if (space_split && **map == *CHR_SPACE)
324 add_non_space(s1, s0, term_dict, dst_term, &j,
328 dst_term[j++] = '\0';
333 /* term_103: handle term, where trunc = re-2 (regular expressions) */
334 static int term_103(zebra_map_t zm, const char **src,
335 WRBUF term_dict, int *errors, int space_split,
343 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
346 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
347 isdigit(((const unsigned char *)s0)[1]))
349 *errors = s0[1] - '0';
356 if (strchr("^\\()[].*+?|-", *s0))
359 wrbuf_putc(term_dict, *s0);
367 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
368 if (space_split && **map == *CHR_SPACE)
372 add_non_space(s1, s0, term_dict, dst_term, &j,
382 /* term_103: handle term, where trunc = re-1 (regular expressions) */
383 static int term_102(zebra_map_t zm, const char **src,
384 WRBUF term_dict, int space_split, char *dst_term)
386 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
390 /* term_104: handle term, process # and ! */
391 static int term_104(zebra_map_t zm, const char **src,
392 WRBUF term_dict, int space_split, char *dst_term)
399 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
407 dst_term[j++] = *s0++;
408 if (*s0 >= '0' && *s0 <= '9')
411 while (*s0 >= '0' && *s0 <= '9')
413 limit = limit * 10 + (*s0 - '0');
414 dst_term[j++] = *s0++;
420 wrbuf_puts(term_dict, ".?");
425 wrbuf_puts(term_dict, ".*");
431 wrbuf_puts(term_dict, ".*");
432 dst_term[j++] = *s0++;
437 wrbuf_puts(term_dict, ".");
438 dst_term[j++] = *s0++;
444 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
445 if (space_split && **map == *CHR_SPACE)
449 add_non_space(s1, s0, term_dict, dst_term, &j,
453 dst_term[j++] = '\0';
458 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
459 static int term_105(zebra_map_t zm, const char **src,
460 WRBUF term_dict, int space_split,
461 char *dst_term, int right_truncate)
468 if (!term_pre(zm, src, "*!", "*!", !space_split))
476 wrbuf_puts(term_dict, ".*");
477 dst_term[j++] = *s0++;
482 wrbuf_putc(term_dict, '.');
483 dst_term[j++] = *s0++;
489 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
490 if (space_split && **map == *CHR_SPACE)
494 add_non_space(s1, s0, term_dict, dst_term, &j,
499 wrbuf_puts(term_dict, ".*");
500 dst_term[j++] = '\0';
506 /* gen_regular_rel - generate regular expression from relation
507 * val: border value (inclusive)
508 * islt: 1 if <=; 0 if >=.
510 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
512 char dst_buf[20*5*20]; /* assuming enough for expansion */
519 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
523 strcpy(dst, "(-[0-9]+|(");
531 strcpy(dst, "([0-9]+|-(");
542 sprintf(numstr, "%d", val);
543 for (w = strlen(numstr); --w >= 0; pos++)
562 strcpy(dst + dst_p, numstr);
563 dst_p = strlen(dst) - pos - 1;
591 for (i = 0; i<pos; i++)
604 /* match everything less than 10^(pos-1) */
606 for (i = 1; i<pos; i++)
607 strcat(dst, "[0-9]?");
611 /* match everything greater than 10^pos */
612 for (i = 0; i <= pos; i++)
613 strcat(dst, "[0-9]");
614 strcat(dst, "[0-9]*");
617 wrbuf_puts(term_dict, dst);
620 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
622 const char *src = wrbuf_cstr(wsrc);
623 if (src[*indx] == '\\')
625 wrbuf_putc(term_p, src[*indx]);
628 wrbuf_putc(term_p, src[*indx]);
633 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
634 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
635 * >= abc ([b-].*|a[c-].*|ab[c-].*)
636 * ([^-a].*|a[^-b].*|ab[c-].*)
637 * < abc ([-0].*|a[-a].*|ab[-b].*)
638 * ([^a-].*|a[^b-].*|ab[^c-].*)
639 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
640 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
642 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
643 const char **term_sub, WRBUF term_dict,
644 const Odr_oid *attributeSet,
645 zebra_map_t zm, int space_split, char *term_dst,
651 WRBUF term_component = wrbuf_alloc();
653 attr_init_APT(&relation, zapt, 2);
654 relation_value = attr_find(&relation, NULL);
657 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
658 switch (relation_value)
661 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
663 wrbuf_destroy(term_component);
666 yaz_log(log_level_rpn, "Relation <");
668 wrbuf_putc(term_dict, '(');
669 for (i = 0; i < wrbuf_len(term_component); )
674 wrbuf_putc(term_dict, '|');
676 string_rel_add_char(term_dict, term_component, &j);
678 wrbuf_putc(term_dict, '[');
680 wrbuf_putc(term_dict, '^');
682 wrbuf_putc(term_dict, 1);
683 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
685 string_rel_add_char(term_dict, term_component, &i);
686 wrbuf_putc(term_dict, '-');
688 wrbuf_putc(term_dict, ']');
689 wrbuf_putc(term_dict, '.');
690 wrbuf_putc(term_dict, '*');
692 wrbuf_putc(term_dict, ')');
695 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
697 wrbuf_destroy(term_component);
700 yaz_log(log_level_rpn, "Relation <=");
702 wrbuf_putc(term_dict, '(');
703 for (i = 0; i < wrbuf_len(term_component); )
708 string_rel_add_char(term_dict, term_component, &j);
709 wrbuf_putc(term_dict, '[');
711 wrbuf_putc(term_dict, '^');
713 wrbuf_putc(term_dict, 1);
714 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
716 string_rel_add_char(term_dict, term_component, &i);
717 wrbuf_putc(term_dict, '-');
719 wrbuf_putc(term_dict, ']');
720 wrbuf_putc(term_dict, '.');
721 wrbuf_putc(term_dict, '*');
723 wrbuf_putc(term_dict, '|');
725 for (i = 0; i < wrbuf_len(term_component); )
726 string_rel_add_char(term_dict, term_component, &i);
727 wrbuf_putc(term_dict, ')');
730 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
732 wrbuf_destroy(term_component);
735 yaz_log(log_level_rpn, "Relation >");
737 wrbuf_putc(term_dict, '(');
738 for (i = 0; i < wrbuf_len(term_component); )
743 string_rel_add_char(term_dict, term_component, &j);
744 wrbuf_putc(term_dict, '[');
746 wrbuf_putc(term_dict, '^');
747 wrbuf_putc(term_dict, '-');
748 string_rel_add_char(term_dict, term_component, &i);
750 wrbuf_putc(term_dict, ']');
751 wrbuf_putc(term_dict, '.');
752 wrbuf_putc(term_dict, '*');
754 wrbuf_putc(term_dict, '|');
756 for (i = 0; i < wrbuf_len(term_component); )
757 string_rel_add_char(term_dict, term_component, &i);
758 wrbuf_putc(term_dict, '.');
759 wrbuf_putc(term_dict, '+');
760 wrbuf_putc(term_dict, ')');
763 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
765 wrbuf_destroy(term_component);
768 yaz_log(log_level_rpn, "Relation >=");
770 wrbuf_putc(term_dict, '(');
771 for (i = 0; i < wrbuf_len(term_component); )
776 wrbuf_putc(term_dict, '|');
778 string_rel_add_char(term_dict, term_component, &j);
779 wrbuf_putc(term_dict, '[');
781 if (i < wrbuf_len(term_component)-1)
783 wrbuf_putc(term_dict, '^');
784 wrbuf_putc(term_dict, '-');
785 string_rel_add_char(term_dict, term_component, &i);
789 string_rel_add_char(term_dict, term_component, &i);
790 wrbuf_putc(term_dict, '-');
792 wrbuf_putc(term_dict, ']');
793 wrbuf_putc(term_dict, '.');
794 wrbuf_putc(term_dict, '*');
796 wrbuf_putc(term_dict, ')');
803 yaz_log(log_level_rpn, "Relation =");
804 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
806 wrbuf_destroy(term_component);
809 wrbuf_puts(term_dict, "(");
810 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
811 wrbuf_puts(term_dict, ")");
814 yaz_log(log_level_rpn, "Relation always matches");
815 /* skip to end of term (we don't care what it is) */
816 while (**term_sub != '\0')
820 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
821 wrbuf_destroy(term_component);
824 wrbuf_destroy(term_component);
828 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
829 const char **term_sub,
831 const Odr_oid *attributeSet, NMEM stream,
832 struct grep_info *grep_info,
833 const char *index_type, int complete_flag,
835 const char *xpath_use,
836 struct ord_list **ol);
838 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
839 Z_AttributesPlusTerm *zapt,
840 zint *hits_limit_value,
841 const char **term_ref_id_str,
844 AttrType term_ref_id_attr;
845 AttrType hits_limit_attr;
848 attr_init_APT(&hits_limit_attr, zapt, 11);
849 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
851 attr_init_APT(&term_ref_id_attr, zapt, 10);
852 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
853 if (term_ref_id_int >= 0)
855 char *res = nmem_malloc(nmem, 20);
856 sprintf(res, "%d", term_ref_id_int);
857 *term_ref_id_str = res;
860 /* no limit given ? */
861 if (*hits_limit_value == -1)
863 if (*term_ref_id_str)
865 /* use global if term_ref is present */
866 *hits_limit_value = zh->approx_limit;
870 /* no counting if term_ref is not present */
871 *hits_limit_value = 0;
874 else if (*hits_limit_value == 0)
876 /* 0 is the same as global limit */
877 *hits_limit_value = zh->approx_limit;
879 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
880 *term_ref_id_str ? *term_ref_id_str : "none",
885 static ZEBRA_RES term_trunc(ZebraHandle zh,
886 Z_AttributesPlusTerm *zapt,
887 const char **term_sub,
888 const Odr_oid *attributeSet, NMEM stream,
889 struct grep_info *grep_info,
890 const char *index_type, int complete_flag,
892 const char *rank_type,
893 const char *xpath_use,
896 struct rset_key_control *kc)
900 zint hits_limit_value;
901 const char *term_ref_id_str = 0;
902 WRBUF term_dict = wrbuf_alloc();
905 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
907 grep_info->isam_p_indx = 0;
908 res = string_term(zh, zapt, term_sub, term_dict,
909 attributeSet, stream, grep_info,
910 index_type, complete_flag,
911 term_dst, xpath_use, &ol);
912 wrbuf_destroy(term_dict);
915 if (!*term_sub) /* no more terms ? */
917 yaz_log(log_level_rpn, "term: %s", term_dst);
918 *rset = rset_trunc(zh, grep_info->isam_p_buf,
919 grep_info->isam_p_indx, term_dst,
920 strlen(term_dst), rank_type, 1 /* preserve pos */,
921 zapt->term->which, rset_nmem,
922 kc, kc->scope, ol, index_type, hits_limit_value,
929 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
930 const char **term_sub,
932 const Odr_oid *attributeSet, NMEM stream,
933 struct grep_info *grep_info,
934 const char *index_type, int complete_flag,
936 const char *xpath_use,
937 struct ord_list **ol)
941 int truncation_value;
943 struct rpn_char_map_info rcmi;
945 int space_split = complete_flag ? 0 : 1;
948 int max_pos, prefix_len = 0;
952 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
954 *ol = ord_list_create(stream);
956 rpn_char_map_prepare(zh->reg, zm, &rcmi);
957 attr_init_APT(&truncation, zapt, 5);
958 truncation_value = attr_find(&truncation, NULL);
959 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
961 termp = *term_sub; /* start of term for each database */
963 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
964 attributeSet, &ord) != ZEBRA_OK)
970 wrbuf_rewind(term_dict); /* new dictionary regexp term */
972 *ol = ord_list_append(stream, *ol, ord);
973 ord_len = key_SU_encode(ord, ord_buf);
975 wrbuf_putc(term_dict, '(');
977 for (i = 0; i<ord_len; i++)
979 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
980 wrbuf_putc(term_dict, ord_buf[i]);
982 wrbuf_putc(term_dict, ')');
984 prefix_len = wrbuf_len(term_dict);
986 switch (truncation_value)
988 case -1: /* not specified */
989 case 100: /* do not truncate */
990 if (!string_relation(zh, zapt, &termp, term_dict,
992 zm, space_split, term_dst,
997 zebra_setError(zh, relation_error, 0);
1004 case 1: /* right truncation */
1005 wrbuf_putc(term_dict, '(');
1006 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1011 wrbuf_puts(term_dict, ".*)");
1013 case 2: /* keft truncation */
1014 wrbuf_puts(term_dict, "(.*");
1015 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1020 wrbuf_putc(term_dict, ')');
1022 case 3: /* left&right truncation */
1023 wrbuf_puts(term_dict, "(.*");
1024 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1029 wrbuf_puts(term_dict, ".*)");
1031 case 101: /* process # in term */
1032 wrbuf_putc(term_dict, '(');
1033 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1038 wrbuf_puts(term_dict, ")");
1040 case 102: /* Regexp-1 */
1041 wrbuf_putc(term_dict, '(');
1042 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1047 wrbuf_putc(term_dict, ')');
1049 case 103: /* Regexp-2 */
1051 wrbuf_putc(term_dict, '(');
1052 if (!term_103(zm, &termp, term_dict, ®ex_range,
1053 space_split, term_dst))
1058 wrbuf_putc(term_dict, ')');
1060 case 104: /* process # and ! in term */
1061 wrbuf_putc(term_dict, '(');
1062 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1067 wrbuf_putc(term_dict, ')');
1069 case 105: /* process * and ! in term */
1070 wrbuf_putc(term_dict, '(');
1071 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1076 wrbuf_putc(term_dict, ')');
1078 case 106: /* process * and ! in term */
1079 wrbuf_putc(term_dict, '(');
1080 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1085 wrbuf_putc(term_dict, ')');
1088 zebra_setError_zint(zh,
1089 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1096 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1097 esc_str(buf, sizeof(buf), input, strlen(input));
1099 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1100 wrbuf_cstr(term_dict) + prefix_len);
1101 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1102 grep_info, &max_pos,
1103 ord_len /* number of "exact" chars */,
1106 zebra_set_partial_result(zh);
1108 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1110 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1116 static void grep_info_delete(struct grep_info *grep_info)
1119 xfree(grep_info->term_no);
1121 xfree(grep_info->isam_p_buf);
1124 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1125 Z_AttributesPlusTerm *zapt,
1126 struct grep_info *grep_info,
1127 const char *index_type)
1130 grep_info->term_no = 0;
1132 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1133 grep_info->isam_p_size = 0;
1134 grep_info->isam_p_buf = NULL;
1136 grep_info->index_type = index_type;
1137 grep_info->termset = 0;
1143 attr_init_APT(&truncmax, zapt, 13);
1144 truncmax_value = attr_find(&truncmax, NULL);
1145 if (truncmax_value != -1)
1146 grep_info->trunc_max = truncmax_value;
1151 int termset_value_numeric;
1152 const char *termset_value_string;
1154 attr_init_APT(&termset, zapt, 8);
1155 termset_value_numeric =
1156 attr_find_ex(&termset, NULL, &termset_value_string);
1157 if (termset_value_numeric != -1)
1160 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1164 const char *termset_name = 0;
1165 if (termset_value_numeric != -2)
1168 sprintf(resname, "%d", termset_value_numeric);
1169 termset_name = resname;
1172 termset_name = termset_value_string;
1173 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1174 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1175 if (!grep_info->termset)
1177 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1187 \brief Create result set(s) for list of terms
1188 \param zh Zebra Handle
1189 \param zapt Attributes Plust Term (RPN leaf)
1190 \param termz term as used in query but converted to UTF-8
1191 \param attributeSet default attribute set
1192 \param stream memory for result
1193 \param index_type register type ("w", "p",..)
1194 \param complete_flag whether it's phrases or not
1195 \param rank_type term flags for ranking
1196 \param xpath_use use attribute for X-Path (-1 for no X-path)
1197 \param rset_nmem memory for result sets
1198 \param result_sets output result set for each term in list (output)
1199 \param num_result_sets number of output result sets
1200 \param kc rset key control to be used for created result sets
1202 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1203 Z_AttributesPlusTerm *zapt,
1205 const Odr_oid *attributeSet,
1207 const char *index_type, int complete_flag,
1208 const char *rank_type,
1209 const char *xpath_use,
1211 RSET **result_sets, int *num_result_sets,
1212 struct rset_key_control *kc)
1214 char term_dst[IT_MAX_WORD+1];
1215 struct grep_info grep_info;
1216 const char *termp = termz;
1219 *num_result_sets = 0;
1221 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1227 if (alloc_sets == *num_result_sets)
1230 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1233 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1234 alloc_sets = alloc_sets + add;
1235 *result_sets = rnew;
1237 res = term_trunc(zh, zapt, &termp, attributeSet,
1239 index_type, complete_flag,
1240 term_dst, rank_type,
1241 xpath_use, rset_nmem,
1242 &(*result_sets)[*num_result_sets],
1244 if (res != ZEBRA_OK)
1247 for (i = 0; i < *num_result_sets; i++)
1248 rset_delete((*result_sets)[i]);
1249 grep_info_delete(&grep_info);
1252 if ((*result_sets)[*num_result_sets] == 0)
1254 (*num_result_sets)++;
1259 grep_info_delete(&grep_info);
1263 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1264 Z_AttributesPlusTerm *zapt,
1265 const Odr_oid *attributeSet,
1266 const char *index_type,
1269 struct rset_key_control *kc)
1275 char term_dict[100];
1279 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1281 attr_init_APT(&position, zapt, 3);
1282 position_value = attr_find(&position, NULL);
1283 switch(position_value)
1292 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1298 if (!zebra_maps_is_first_in_field(zm))
1300 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1305 if (!zh->reg->isamb && !zh->reg->isamc)
1307 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1312 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1313 attributeSet, &ord) != ZEBRA_OK)
1317 ord_len = key_SU_encode(ord, ord_buf);
1318 memcpy(term_dict, ord_buf, ord_len);
1319 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1320 val = dict_lookup(zh->reg->dict, term_dict);
1323 assert(*val == sizeof(ISAM_P));
1324 memcpy(&isam_p, val+1, sizeof(isam_p));
1327 *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1328 zh->reg->isamb, isam_p, 0);
1329 else if (zh->reg->isamc)
1330 *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1331 zh->reg->isamc, isam_p, 0);
1336 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1337 Z_AttributesPlusTerm *zapt,
1338 const char *termz_org,
1339 const Odr_oid *attributeSet,
1341 const char *index_type, int complete_flag,
1342 const char *rank_type,
1343 const char *xpath_use,
1346 struct rset_key_control *kc)
1348 RSET *result_sets = 0;
1349 int num_result_sets = 0;
1351 term_list_trunc(zh, zapt, termz_org, attributeSet,
1352 stream, index_type, complete_flag,
1353 rank_type, xpath_use,
1355 &result_sets, &num_result_sets, kc);
1357 if (res != ZEBRA_OK)
1360 if (num_result_sets > 0)
1363 res = rpn_search_APT_position(zh, zapt, attributeSet,
1365 rset_nmem, &first_set,
1367 if (res != ZEBRA_OK)
1371 RSET *nsets = nmem_malloc(stream,
1372 sizeof(RSET) * (num_result_sets+1));
1373 nsets[0] = first_set;
1374 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1375 result_sets = nsets;
1379 if (num_result_sets == 0)
1380 *rset = rset_create_null(rset_nmem, kc, 0);
1381 else if (num_result_sets == 1)
1382 *rset = result_sets[0];
1384 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1385 num_result_sets, result_sets,
1386 1 /* ordered */, 0 /* exclusion */,
1387 3 /* relation */, 1 /* distance */);
1393 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1394 Z_AttributesPlusTerm *zapt,
1395 const char *termz_org,
1396 const Odr_oid *attributeSet,
1398 const char *index_type,
1400 const char *rank_type,
1401 const char *xpath_use,
1404 struct rset_key_control *kc)
1406 RSET *result_sets = 0;
1407 int num_result_sets = 0;
1410 term_list_trunc(zh, zapt, termz_org, attributeSet,
1411 stream, index_type, complete_flag,
1412 rank_type, xpath_use,
1414 &result_sets, &num_result_sets, kc);
1415 if (res != ZEBRA_OK)
1418 for (i = 0; i<num_result_sets; i++)
1421 res = rpn_search_APT_position(zh, zapt, attributeSet,
1423 rset_nmem, &first_set,
1425 if (res != ZEBRA_OK)
1427 for (i = 0; i<num_result_sets; i++)
1428 rset_delete(result_sets[i]);
1436 tmp_set[0] = first_set;
1437 tmp_set[1] = result_sets[i];
1439 result_sets[i] = rset_create_prox(
1440 rset_nmem, kc, kc->scope,
1442 1 /* ordered */, 0 /* exclusion */,
1443 3 /* relation */, 1 /* distance */);
1446 if (num_result_sets == 0)
1447 *rset = rset_create_null(rset_nmem, kc, 0);
1448 else if (num_result_sets == 1)
1449 *rset = result_sets[0];
1451 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1452 num_result_sets, result_sets);
1458 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1459 Z_AttributesPlusTerm *zapt,
1460 const char *termz_org,
1461 const Odr_oid *attributeSet,
1463 const char *index_type,
1465 const char *rank_type,
1466 const char *xpath_use,
1469 struct rset_key_control *kc)
1471 RSET *result_sets = 0;
1472 int num_result_sets = 0;
1475 term_list_trunc(zh, zapt, termz_org, attributeSet,
1476 stream, index_type, complete_flag,
1477 rank_type, xpath_use,
1479 &result_sets, &num_result_sets,
1481 if (res != ZEBRA_OK)
1483 for (i = 0; i<num_result_sets; i++)
1486 res = rpn_search_APT_position(zh, zapt, attributeSet,
1488 rset_nmem, &first_set,
1490 if (res != ZEBRA_OK)
1492 for (i = 0; i<num_result_sets; i++)
1493 rset_delete(result_sets[i]);
1501 tmp_set[0] = first_set;
1502 tmp_set[1] = result_sets[i];
1504 result_sets[i] = rset_create_prox(
1505 rset_nmem, kc, kc->scope,
1507 1 /* ordered */, 0 /* exclusion */,
1508 3 /* relation */, 1 /* distance */);
1513 if (num_result_sets == 0)
1514 *rset = rset_create_null(rset_nmem, kc, 0);
1515 else if (num_result_sets == 1)
1516 *rset = result_sets[0];
1518 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1519 num_result_sets, result_sets);
1525 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1526 const char **term_sub,
1528 const Odr_oid *attributeSet,
1529 struct grep_info *grep_info,
1539 WRBUF term_num = wrbuf_alloc();
1542 attr_init_APT(&relation, zapt, 2);
1543 relation_value = attr_find(&relation, NULL);
1545 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1547 switch (relation_value)
1550 yaz_log(log_level_rpn, "Relation <");
1551 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1553 wrbuf_destroy(term_num);
1556 term_value = atoi(wrbuf_cstr(term_num));
1557 gen_regular_rel(term_dict, term_value-1, 1);
1560 yaz_log(log_level_rpn, "Relation <=");
1561 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1563 wrbuf_destroy(term_num);
1566 term_value = atoi(wrbuf_cstr(term_num));
1567 gen_regular_rel(term_dict, term_value, 1);
1570 yaz_log(log_level_rpn, "Relation >=");
1571 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1573 wrbuf_destroy(term_num);
1576 term_value = atoi(wrbuf_cstr(term_num));
1577 gen_regular_rel(term_dict, term_value, 0);
1580 yaz_log(log_level_rpn, "Relation >");
1581 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1583 wrbuf_destroy(term_num);
1586 term_value = atoi(wrbuf_cstr(term_num));
1587 gen_regular_rel(term_dict, term_value+1, 0);
1591 yaz_log(log_level_rpn, "Relation =");
1592 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1594 wrbuf_destroy(term_num);
1597 term_value = atoi(wrbuf_cstr(term_num));
1598 wrbuf_printf(term_dict, "(0*%d)", term_value);
1601 /* term_tmp untouched.. */
1602 while (**term_sub != '\0')
1606 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1607 wrbuf_destroy(term_num);
1610 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1611 0, grep_info, max_pos, 0, grep_handle);
1614 zebra_set_partial_result(zh);
1616 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1617 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1618 wrbuf_destroy(term_num);
1622 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1623 const char **term_sub,
1625 const Odr_oid *attributeSet, NMEM stream,
1626 struct grep_info *grep_info,
1627 const char *index_type, int complete_flag,
1629 const char *xpath_use,
1630 struct ord_list **ol)
1633 struct rpn_char_map_info rcmi;
1635 int relation_error = 0;
1636 int ord, ord_len, i;
1638 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1640 *ol = ord_list_create(stream);
1642 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1646 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1647 attributeSet, &ord) != ZEBRA_OK)
1652 wrbuf_rewind(term_dict);
1654 *ol = ord_list_append(stream, *ol, ord);
1656 ord_len = key_SU_encode(ord, ord_buf);
1658 wrbuf_putc(term_dict, '(');
1659 for (i = 0; i < ord_len; i++)
1661 wrbuf_putc(term_dict, 1);
1662 wrbuf_putc(term_dict, ord_buf[i]);
1664 wrbuf_putc(term_dict, ')');
1666 if (!numeric_relation(zh, zapt, &termp, term_dict,
1667 attributeSet, grep_info, &max_pos, zm,
1668 term_dst, &relation_error))
1672 zebra_setError(zh, relation_error, 0);
1679 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1684 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1685 Z_AttributesPlusTerm *zapt,
1687 const Odr_oid *attributeSet,
1689 const char *index_type,
1691 const char *rank_type,
1692 const char *xpath_use,
1695 struct rset_key_control *kc)
1697 char term_dst[IT_MAX_WORD+1];
1698 const char *termp = termz;
1699 RSET *result_sets = 0;
1700 int num_result_sets = 0;
1702 struct grep_info grep_info;
1704 zint hits_limit_value;
1705 const char *term_ref_id_str = 0;
1707 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1710 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1711 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1715 struct ord_list *ol;
1716 WRBUF term_dict = wrbuf_alloc();
1717 if (alloc_sets == num_result_sets)
1720 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1723 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1724 alloc_sets = alloc_sets + add;
1727 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1728 grep_info.isam_p_indx = 0;
1729 res = numeric_term(zh, zapt, &termp, term_dict,
1730 attributeSet, stream, &grep_info,
1731 index_type, complete_flag,
1732 term_dst, xpath_use, &ol);
1733 wrbuf_destroy(term_dict);
1734 if (res == ZEBRA_FAIL || termp == 0)
1736 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1737 result_sets[num_result_sets] =
1738 rset_trunc(zh, grep_info.isam_p_buf,
1739 grep_info.isam_p_indx, term_dst,
1740 strlen(term_dst), rank_type,
1741 0 /* preserve position */,
1742 zapt->term->which, rset_nmem,
1743 kc, kc->scope, ol, index_type,
1746 if (!result_sets[num_result_sets])
1752 grep_info_delete(&grep_info);
1754 if (res != ZEBRA_OK)
1756 if (num_result_sets == 0)
1757 *rset = rset_create_null(rset_nmem, kc, 0);
1758 else if (num_result_sets == 1)
1759 *rset = result_sets[0];
1761 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1762 num_result_sets, result_sets);
1768 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1769 Z_AttributesPlusTerm *zapt,
1771 const Odr_oid *attributeSet,
1773 const char *rank_type, NMEM rset_nmem,
1775 struct rset_key_control *kc)
1778 zint sysno = atozint(termz);
1782 rec = rec_get(zh->reg->records, sysno);
1790 *rset = rset_create_null(rset_nmem, kc, 0);
1796 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1797 res_get(zh->res, "setTmpDir"), 0);
1798 rsfd = rset_open(*rset, RSETF_WRITE);
1803 rset_write(rsfd, &key);
1809 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1810 const Odr_oid *attributeSet, NMEM stream,
1811 Z_SortKeySpecList *sort_sequence,
1812 const char *rank_type,
1815 struct rset_key_control *kc)
1818 int sort_relation_value;
1819 AttrType sort_relation_type;
1824 attr_init_APT(&sort_relation_type, zapt, 7);
1825 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1827 if (!sort_sequence->specs)
1829 sort_sequence->num_specs = 10;
1830 sort_sequence->specs = (Z_SortKeySpec **)
1831 nmem_malloc(stream, sort_sequence->num_specs *
1832 sizeof(*sort_sequence->specs));
1833 for (i = 0; i<sort_sequence->num_specs; i++)
1834 sort_sequence->specs[i] = 0;
1836 if (zapt->term->which != Z_Term_general)
1839 i = atoi_n((char *) zapt->term->u.general->buf,
1840 zapt->term->u.general->len);
1841 if (i >= sort_sequence->num_specs)
1843 sprintf(termz, "%d", i);
1845 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1846 sks->sortElement = (Z_SortElement *)
1847 nmem_malloc(stream, sizeof(*sks->sortElement));
1848 sks->sortElement->which = Z_SortElement_generic;
1849 sk = sks->sortElement->u.generic = (Z_SortKey *)
1850 nmem_malloc(stream, sizeof(*sk));
1851 sk->which = Z_SortKey_sortAttributes;
1852 sk->u.sortAttributes = (Z_SortAttributes *)
1853 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1855 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1856 sk->u.sortAttributes->list = zapt->attributes;
1858 sks->sortRelation = (int *)
1859 nmem_malloc(stream, sizeof(*sks->sortRelation));
1860 if (sort_relation_value == 1)
1861 *sks->sortRelation = Z_SortKeySpec_ascending;
1862 else if (sort_relation_value == 2)
1863 *sks->sortRelation = Z_SortKeySpec_descending;
1865 *sks->sortRelation = Z_SortKeySpec_ascending;
1867 sks->caseSensitivity = (int *)
1868 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1869 *sks->caseSensitivity = 0;
1871 sks->which = Z_SortKeySpec_null;
1872 sks->u.null = odr_nullval ();
1873 sort_sequence->specs[i] = sks;
1874 *rset = rset_create_null(rset_nmem, kc, 0);
1879 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1880 const Odr_oid *attributeSet,
1881 struct xpath_location_step *xpath, int max,
1884 const Odr_oid *curAttributeSet = attributeSet;
1886 const char *use_string = 0;
1888 attr_init_APT(&use, zapt, 1);
1889 attr_find_ex(&use, &curAttributeSet, &use_string);
1891 if (!use_string || *use_string != '/')
1894 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1899 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1900 const char *index_type, const char *term,
1901 const char *xpath_use,
1903 struct rset_key_control *kc)
1905 struct grep_info grep_info;
1906 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1907 zinfo_index_category_index,
1908 index_type, xpath_use);
1909 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1910 return rset_create_null(rset_nmem, kc, 0);
1913 return rset_create_null(rset_nmem, kc, 0);
1919 WRBUF term_dict = wrbuf_alloc();
1920 int ord_len = key_SU_encode(ord, ord_buf);
1921 int term_type = Z_Term_characterString;
1922 const char *flags = "void";
1924 wrbuf_putc(term_dict, '(');
1925 for (i = 0; i<ord_len; i++)
1927 wrbuf_putc(term_dict, 1);
1928 wrbuf_putc(term_dict, ord_buf[i]);
1930 wrbuf_putc(term_dict, ')');
1931 wrbuf_puts(term_dict, term);
1933 grep_info.isam_p_indx = 0;
1934 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1935 &grep_info, &max_pos, 0, grep_handle);
1936 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1937 grep_info.isam_p_indx);
1938 rset = rset_trunc(zh, grep_info.isam_p_buf,
1939 grep_info.isam_p_indx, term, strlen(term),
1940 flags, 1, term_type, rset_nmem,
1941 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1942 0 /* term_ref_id_str */);
1943 grep_info_delete(&grep_info);
1944 wrbuf_destroy(term_dict);
1950 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1951 NMEM stream, const char *rank_type, RSET rset,
1952 int xpath_len, struct xpath_location_step *xpath,
1955 struct rset_key_control *kc)
1958 int always_matches = rset ? 0 : 1;
1966 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1967 for (i = 0; i<xpath_len; i++)
1969 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1981 a[@attr = value]/b[@other = othervalue]
1983 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
1984 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
1985 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1986 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1987 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1988 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1992 dict_grep_cmap(zh->reg->dict, 0, 0);
1995 int level = xpath_len;
1998 while (--level >= 0)
2000 WRBUF xpath_rev = wrbuf_alloc();
2002 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2004 for (i = level; i >= 1; --i)
2006 const char *cp = xpath[i].part;
2012 wrbuf_puts(xpath_rev, "[^/]*");
2013 else if (*cp == ' ')
2014 wrbuf_puts(xpath_rev, "\001 ");
2016 wrbuf_putc(xpath_rev, *cp);
2018 /* wrbuf_putc does not null-terminate , but
2019 wrbuf_puts below ensures it does.. so xpath_rev
2020 is OK iff length is > 0 */
2022 wrbuf_puts(xpath_rev, "/");
2024 else if (i == 1) /* // case */
2025 wrbuf_puts(xpath_rev, ".*");
2027 if (xpath[level].predicate &&
2028 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2029 xpath[level].predicate->u.relation.name[0])
2031 WRBUF wbuf = wrbuf_alloc();
2032 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2033 if (xpath[level].predicate->u.relation.value)
2035 const char *cp = xpath[level].predicate->u.relation.value;
2036 wrbuf_putc(wbuf, '=');
2040 if (strchr(REGEX_CHARS, *cp))
2041 wrbuf_putc(wbuf, '\\');
2042 wrbuf_putc(wbuf, *cp);
2046 rset_attr = xpath_trunc(
2047 zh, stream, "0", wrbuf_cstr(wbuf),
2048 ZEBRA_XPATH_ATTR_NAME,
2050 wrbuf_destroy(wbuf);
2056 wrbuf_destroy(xpath_rev);
2060 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2061 wrbuf_cstr(xpath_rev));
2062 if (wrbuf_len(xpath_rev))
2064 rset_start_tag = xpath_trunc(zh, stream, "0",
2065 wrbuf_cstr(xpath_rev),
2066 ZEBRA_XPATH_ELM_BEGIN,
2069 rset = rset_start_tag;
2072 rset_end_tag = xpath_trunc(zh, stream, "0",
2073 wrbuf_cstr(xpath_rev),
2074 ZEBRA_XPATH_ELM_END,
2077 rset = rset_create_between(rset_nmem, kc, kc->scope,
2078 rset_start_tag, rset,
2079 rset_end_tag, rset_attr);
2082 wrbuf_destroy(xpath_rev);
2090 #define MAX_XPATH_STEPS 10
2092 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2093 Z_AttributesPlusTerm *zapt,
2094 const Odr_oid *attributeSet, NMEM stream,
2095 Z_SortKeySpecList *sort_sequence,
2098 struct rset_key_control *kc);
2100 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2101 const Odr_oid *attributeSet, NMEM stream,
2102 Z_SortKeySpecList *sort_sequence,
2103 int num_bases, char **basenames,
2106 struct rset_key_control *kc)
2108 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2109 ZEBRA_RES res = ZEBRA_OK;
2111 for (i = 0; i < num_bases; i++)
2114 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2116 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2121 res = rpn_search_database(zh, zapt, attributeSet, stream,
2123 rset_nmem, rsets+i, kc);
2124 if (res != ZEBRA_OK)
2127 if (res != ZEBRA_OK)
2128 { /* must clean up the already created sets */
2130 rset_delete(rsets[i]);
2137 else if (num_bases == 0)
2138 *rset = rset_create_null(rset_nmem, kc, 0);
2140 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2146 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2147 Z_AttributesPlusTerm *zapt,
2148 const Odr_oid *attributeSet, NMEM stream,
2149 Z_SortKeySpecList *sort_sequence,
2152 struct rset_key_control *kc)
2154 ZEBRA_RES res = ZEBRA_OK;
2155 const char *index_type;
2156 char *search_type = NULL;
2157 char rank_type[128];
2160 char termz[IT_MAX_WORD+1];
2162 const char *xpath_use = 0;
2163 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2167 log_level_rpn = yaz_log_module_level("rpn");
2170 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2171 rank_type, &complete_flag, &sort_flag);
2173 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2174 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2175 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2176 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2178 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2182 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2183 rank_type, rset_nmem, rset, kc);
2184 /* consider if an X-Path query is used */
2185 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2186 xpath, MAX_XPATH_STEPS, stream);
2189 if (xpath[xpath_len-1].part[0] == '@')
2190 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2192 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2199 attr_init_APT(&relation, zapt, 2);
2200 relation_value = attr_find(&relation, NULL);
2202 if (relation_value == 103) /* alwaysmatches */
2204 *rset = 0; /* signal no "term" set */
2205 return rpn_search_xpath(zh, stream, rank_type, *rset,
2206 xpath_len, xpath, rset_nmem, rset, kc);
2211 /* search using one of the various search type strategies
2212 termz is our UTF-8 search term
2213 attributeSet is top-level default attribute set
2214 stream is ODR for search
2215 reg_id is the register type
2216 complete_flag is 1 for complete subfield, 0 for incomplete
2217 xpath_use is use-attribute to be used for X-Path search, 0 for none
2219 if (!strcmp(search_type, "phrase"))
2221 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2222 index_type, complete_flag, rank_type,
2227 else if (!strcmp(search_type, "and-list"))
2229 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2230 index_type, complete_flag, rank_type,
2235 else if (!strcmp(search_type, "or-list"))
2237 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2238 index_type, complete_flag, rank_type,
2243 else if (!strcmp(search_type, "local"))
2245 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2246 rank_type, rset_nmem, rset, kc);
2248 else if (!strcmp(search_type, "numeric"))
2250 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2251 index_type, complete_flag, rank_type,
2258 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2261 if (res != ZEBRA_OK)
2265 return rpn_search_xpath(zh, stream, rank_type, *rset,
2266 xpath_len, xpath, rset_nmem, rset, kc);
2269 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2270 const Odr_oid *attributeSet,
2271 NMEM stream, NMEM rset_nmem,
2272 Z_SortKeySpecList *sort_sequence,
2273 int num_bases, char **basenames,
2274 RSET **result_sets, int *num_result_sets,
2275 Z_Operator *parent_op,
2276 struct rset_key_control *kc);
2278 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2281 ZEBRA_RES res = ZEBRA_OK;
2282 if (zs->which == Z_RPNStructure_complex)
2284 if (res == ZEBRA_OK)
2285 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2287 if (res == ZEBRA_OK)
2288 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2291 else if (zs->which == Z_RPNStructure_simple)
2293 if (zs->u.simple->which == Z_Operand_APT)
2295 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2296 AttrType global_hits_limit_attr;
2299 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2301 l = attr_find(&global_hits_limit_attr, NULL);
2309 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2310 const Odr_oid *attributeSet,
2311 NMEM stream, NMEM rset_nmem,
2312 Z_SortKeySpecList *sort_sequence,
2313 int num_bases, char **basenames,
2316 RSET *result_sets = 0;
2317 int num_result_sets = 0;
2319 struct rset_key_control *kc = zebra_key_control_create(zh);
2321 res = rpn_search_structure(zh, zs, attributeSet,
2324 num_bases, basenames,
2325 &result_sets, &num_result_sets,
2326 0 /* no parent op */,
2328 if (res != ZEBRA_OK)
2331 for (i = 0; i<num_result_sets; i++)
2332 rset_delete(result_sets[i]);
2337 assert(num_result_sets == 1);
2338 assert(result_sets);
2339 assert(*result_sets);
2340 *result_set = *result_sets;
2346 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2347 const Odr_oid *attributeSet,
2348 NMEM stream, NMEM rset_nmem,
2349 Z_SortKeySpecList *sort_sequence,
2350 int num_bases, char **basenames,
2351 RSET **result_sets, int *num_result_sets,
2352 Z_Operator *parent_op,
2353 struct rset_key_control *kc)
2355 *num_result_sets = 0;
2356 if (zs->which == Z_RPNStructure_complex)
2359 Z_Operator *zop = zs->u.complex->roperator;
2360 RSET *result_sets_l = 0;
2361 int num_result_sets_l = 0;
2362 RSET *result_sets_r = 0;
2363 int num_result_sets_r = 0;
2365 res = rpn_search_structure(zh, zs->u.complex->s1,
2366 attributeSet, stream, rset_nmem,
2368 num_bases, basenames,
2369 &result_sets_l, &num_result_sets_l,
2371 if (res != ZEBRA_OK)
2374 for (i = 0; i<num_result_sets_l; i++)
2375 rset_delete(result_sets_l[i]);
2378 res = rpn_search_structure(zh, zs->u.complex->s2,
2379 attributeSet, stream, rset_nmem,
2381 num_bases, basenames,
2382 &result_sets_r, &num_result_sets_r,
2384 if (res != ZEBRA_OK)
2387 for (i = 0; i<num_result_sets_l; i++)
2388 rset_delete(result_sets_l[i]);
2389 for (i = 0; i<num_result_sets_r; i++)
2390 rset_delete(result_sets_r[i]);
2394 /* make a new list of result for all children */
2395 *num_result_sets = num_result_sets_l + num_result_sets_r;
2396 *result_sets = nmem_malloc(stream, *num_result_sets *
2397 sizeof(**result_sets));
2398 memcpy(*result_sets, result_sets_l,
2399 num_result_sets_l * sizeof(**result_sets));
2400 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2401 num_result_sets_r * sizeof(**result_sets));
2403 if (!parent_op || parent_op->which != zop->which
2404 || (zop->which != Z_Operator_and &&
2405 zop->which != Z_Operator_or))
2407 /* parent node different from this one (or non-present) */
2408 /* we must combine result sets now */
2412 case Z_Operator_and:
2413 rset = rset_create_and(rset_nmem, kc,
2415 *num_result_sets, *result_sets);
2418 rset = rset_create_or(rset_nmem, kc,
2419 kc->scope, 0, /* termid */
2420 *num_result_sets, *result_sets);
2422 case Z_Operator_and_not:
2423 rset = rset_create_not(rset_nmem, kc,
2428 case Z_Operator_prox:
2429 if (zop->u.prox->which != Z_ProximityOperator_known)
2432 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2436 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2438 zebra_setError_zint(zh,
2439 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2440 *zop->u.prox->u.known);
2445 rset = rset_create_prox(rset_nmem, kc,
2447 *num_result_sets, *result_sets,
2448 *zop->u.prox->ordered,
2449 (!zop->u.prox->exclusion ?
2450 0 : *zop->u.prox->exclusion),
2451 *zop->u.prox->relationType,
2452 *zop->u.prox->distance );
2456 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2459 *num_result_sets = 1;
2460 *result_sets = nmem_malloc(stream, *num_result_sets *
2461 sizeof(**result_sets));
2462 (*result_sets)[0] = rset;
2465 else if (zs->which == Z_RPNStructure_simple)
2470 if (zs->u.simple->which == Z_Operand_APT)
2472 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2473 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2474 attributeSet, stream, sort_sequence,
2475 num_bases, basenames, rset_nmem, &rset,
2477 if (res != ZEBRA_OK)
2480 else if (zs->u.simple->which == Z_Operand_resultSetId)
2482 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2483 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2487 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2488 zs->u.simple->u.resultSetId);
2495 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2498 *num_result_sets = 1;
2499 *result_sets = nmem_malloc(stream, *num_result_sets *
2500 sizeof(**result_sets));
2501 (*result_sets)[0] = rset;
2505 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2516 * indent-tabs-mode: nil
2518 * vim: shiftwidth=4 tabstop=8 expandtab