1 /* $Id: rpnsearch.c,v 1.16 2007-10-29 16:57:53 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode(&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 static void add_non_space(const char *start, const char *end,
211 char *dst_term, int *dst_ptr,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
215 memcpy(dst_term + *dst_ptr, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, const char *index_type,
238 const char **src, WRBUF term_dict, int space_split,
246 const char *space_start = 0;
247 const char *space_end = 0;
249 if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split))
256 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
260 if (**map == *CHR_SPACE)
263 else /* complete subfield only. */
265 if (**map == *CHR_SPACE)
266 { /* save space mapping for later .. */
271 else if (space_start)
272 { /* reload last space */
273 while (space_start < space_end)
275 if (strchr(REGEX_CHARS, *space_start))
276 wrbuf_putc(term_dict, '\\');
277 dst_term[j++] = *space_start;
278 wrbuf_putc(term_dict, *space_start);
283 space_start = space_end = 0;
288 add_non_space(s1, s0, term_dict, dst_term, &j,
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, const char *index_type,
298 const char **src, WRBUF term_dict, int space_split,
306 if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split))
314 wrbuf_puts(term_dict, ".*");
315 dst_term[j++] = *s0++;
321 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
323 if (space_split && **map == *CHR_SPACE)
327 add_non_space(s1, s0, term_dict, dst_term, &j,
331 dst_term[j++] = '\0';
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, const char *index_type,
339 WRBUF term_dict, int *errors, int space_split,
347 if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split))
350 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
351 isdigit(((const unsigned char *)s0)[1]))
353 *errors = s0[1] - '0';
360 if (strchr("^\\()[].*+?|-", *s0))
363 wrbuf_putc(term_dict, *s0);
371 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
373 if (space_split && **map == *CHR_SPACE)
377 add_non_space(s1, s0, term_dict, dst_term, &j,
387 /* term_103: handle term, where trunc = re-1 (regular expressions) */
388 static int term_102(ZebraMaps zebra_maps, const char *index_type,
390 WRBUF term_dict, int space_split, char *dst_term)
392 return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split,
397 /* term_104: handle term, process # and ! */
398 static int term_104(ZebraMaps zebra_maps, const char *index_type,
399 const char **src, WRBUF term_dict, int space_split,
407 if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split))
415 dst_term[j++] = *s0++;
416 if (*s0 >= '0' && *s0 <= '9')
419 while (*s0 >= '0' && *s0 <= '9')
421 limit = limit * 10 + (*s0 - '0');
422 dst_term[j++] = *s0++;
428 wrbuf_puts(term_dict, ".?");
433 wrbuf_puts(term_dict, ".*");
439 wrbuf_puts(term_dict, ".*");
440 dst_term[j++] = *s0++;
445 wrbuf_puts(term_dict, ".");
446 dst_term[j++] = *s0++;
452 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
454 if (space_split && **map == *CHR_SPACE)
458 add_non_space(s1, s0, term_dict, dst_term, &j,
462 dst_term[j++] = '\0';
467 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
468 static int term_105(ZebraMaps zebra_maps, const char *index_type,
469 const char **src, WRBUF term_dict, int space_split,
470 char *dst_term, int right_truncate)
477 if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split))
485 wrbuf_puts(term_dict, ".*");
486 dst_term[j++] = *s0++;
491 wrbuf_putc(term_dict, '.');
492 dst_term[j++] = *s0++;
498 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
500 if (space_split && **map == *CHR_SPACE)
504 add_non_space(s1, s0, term_dict, dst_term, &j,
509 wrbuf_puts(term_dict, ".*");
510 dst_term[j++] = '\0';
516 /* gen_regular_rel - generate regular expression from relation
517 * val: border value (inclusive)
518 * islt: 1 if <=; 0 if >=.
520 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
522 char dst_buf[20*5*20]; /* assuming enough for expansion */
529 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
533 strcpy(dst, "(-[0-9]+|(");
541 strcpy(dst, "([0-9]+|-(");
552 sprintf(numstr, "%d", val);
553 for (w = strlen(numstr); --w >= 0; pos++)
572 strcpy(dst + dst_p, numstr);
573 dst_p = strlen(dst) - pos - 1;
601 for (i = 0; i<pos; i++)
614 /* match everything less than 10^(pos-1) */
616 for (i = 1; i<pos; i++)
617 strcat(dst, "[0-9]?");
621 /* match everything greater than 10^pos */
622 for (i = 0; i <= pos; i++)
623 strcat(dst, "[0-9]");
624 strcat(dst, "[0-9]*");
627 wrbuf_puts(term_dict, dst);
630 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
632 const char *src = wrbuf_cstr(wsrc);
633 if (src[*indx] == '\\')
635 wrbuf_putc(term_p, src[*indx]);
638 wrbuf_putc(term_p, src[*indx]);
643 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
644 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
645 * >= abc ([b-].*|a[c-].*|ab[c-].*)
646 * ([^-a].*|a[^-b].*|ab[c-].*)
647 * < abc ([-0].*|a[-a].*|ab[-b].*)
648 * ([^a-].*|a[^b-].*|ab[^c-].*)
649 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
650 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
652 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
653 const char **term_sub, WRBUF term_dict,
654 const Odr_oid *attributeSet,
655 const char *index_type, int space_split, char *term_dst,
661 WRBUF term_component = wrbuf_alloc();
663 attr_init_APT(&relation, zapt, 2);
664 relation_value = attr_find(&relation, NULL);
667 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
668 switch (relation_value)
671 if (!term_100(zh->reg->zebra_maps, index_type,
672 term_sub, term_component,
673 space_split, term_dst))
675 wrbuf_destroy(term_component);
678 yaz_log(log_level_rpn, "Relation <");
680 wrbuf_putc(term_dict, '(');
681 for (i = 0; i < wrbuf_len(term_component); )
686 wrbuf_putc(term_dict, '|');
688 string_rel_add_char(term_dict, term_component, &j);
690 wrbuf_putc(term_dict, '[');
692 wrbuf_putc(term_dict, '^');
694 wrbuf_putc(term_dict, 1);
695 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
697 string_rel_add_char(term_dict, term_component, &i);
698 wrbuf_putc(term_dict, '-');
700 wrbuf_putc(term_dict, ']');
701 wrbuf_putc(term_dict, '.');
702 wrbuf_putc(term_dict, '*');
704 wrbuf_putc(term_dict, ')');
707 if (!term_100(zh->reg->zebra_maps, index_type,
708 term_sub, term_component,
709 space_split, term_dst))
711 wrbuf_destroy(term_component);
714 yaz_log(log_level_rpn, "Relation <=");
716 wrbuf_putc(term_dict, '(');
717 for (i = 0; i < wrbuf_len(term_component); )
722 string_rel_add_char(term_dict, term_component, &j);
723 wrbuf_putc(term_dict, '[');
725 wrbuf_putc(term_dict, '^');
727 wrbuf_putc(term_dict, 1);
728 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
730 string_rel_add_char(term_dict, term_component, &i);
731 wrbuf_putc(term_dict, '-');
733 wrbuf_putc(term_dict, ']');
734 wrbuf_putc(term_dict, '.');
735 wrbuf_putc(term_dict, '*');
737 wrbuf_putc(term_dict, '|');
739 for (i = 0; i < wrbuf_len(term_component); )
740 string_rel_add_char(term_dict, term_component, &i);
741 wrbuf_putc(term_dict, ')');
744 if (!term_100(zh->reg->zebra_maps, index_type,
745 term_sub, term_component, space_split, term_dst))
747 wrbuf_destroy(term_component);
750 yaz_log(log_level_rpn, "Relation >");
752 wrbuf_putc(term_dict, '(');
753 for (i = 0; i < wrbuf_len(term_component); )
758 string_rel_add_char(term_dict, term_component, &j);
759 wrbuf_putc(term_dict, '[');
761 wrbuf_putc(term_dict, '^');
762 wrbuf_putc(term_dict, '-');
763 string_rel_add_char(term_dict, term_component, &i);
765 wrbuf_putc(term_dict, ']');
766 wrbuf_putc(term_dict, '.');
767 wrbuf_putc(term_dict, '*');
769 wrbuf_putc(term_dict, '|');
771 for (i = 0; i < wrbuf_len(term_component); )
772 string_rel_add_char(term_dict, term_component, &i);
773 wrbuf_putc(term_dict, '.');
774 wrbuf_putc(term_dict, '+');
775 wrbuf_putc(term_dict, ')');
778 if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
779 term_component, space_split, term_dst))
781 wrbuf_destroy(term_component);
784 yaz_log(log_level_rpn, "Relation >=");
786 wrbuf_putc(term_dict, '(');
787 for (i = 0; i < wrbuf_len(term_component); )
792 wrbuf_putc(term_dict, '|');
794 string_rel_add_char(term_dict, term_component, &j);
795 wrbuf_putc(term_dict, '[');
797 if (i < wrbuf_len(term_component)-1)
799 wrbuf_putc(term_dict, '^');
800 wrbuf_putc(term_dict, '-');
801 string_rel_add_char(term_dict, term_component, &i);
805 string_rel_add_char(term_dict, term_component, &i);
806 wrbuf_putc(term_dict, '-');
808 wrbuf_putc(term_dict, ']');
809 wrbuf_putc(term_dict, '.');
810 wrbuf_putc(term_dict, '*');
812 wrbuf_putc(term_dict, ')');
819 yaz_log(log_level_rpn, "Relation =");
820 if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
821 term_component, space_split, term_dst))
823 wrbuf_destroy(term_component);
826 wrbuf_puts(term_dict, "(");
827 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
828 wrbuf_puts(term_dict, ")");
831 yaz_log(log_level_rpn, "Relation always matches");
832 /* skip to end of term (we don't care what it is) */
833 while (**term_sub != '\0')
837 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
838 wrbuf_destroy(term_component);
841 wrbuf_destroy(term_component);
845 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
846 const char **term_sub,
848 const Odr_oid *attributeSet, NMEM stream,
849 struct grep_info *grep_info,
850 const char *index_type, int complete_flag,
851 int num_bases, char **basenames,
853 const char *xpath_use,
854 struct ord_list **ol);
856 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
857 Z_AttributesPlusTerm *zapt,
858 zint *hits_limit_value,
859 const char **term_ref_id_str,
862 AttrType term_ref_id_attr;
863 AttrType hits_limit_attr;
866 attr_init_APT(&hits_limit_attr, zapt, 11);
867 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
869 attr_init_APT(&term_ref_id_attr, zapt, 10);
870 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
871 if (term_ref_id_int >= 0)
873 char *res = nmem_malloc(nmem, 20);
874 sprintf(res, "%d", term_ref_id_int);
875 *term_ref_id_str = res;
878 /* no limit given ? */
879 if (*hits_limit_value == -1)
881 if (*term_ref_id_str)
883 /* use global if term_ref is present */
884 *hits_limit_value = zh->approx_limit;
888 /* no counting if term_ref is not present */
889 *hits_limit_value = 0;
892 else if (*hits_limit_value == 0)
894 /* 0 is the same as global limit */
895 *hits_limit_value = zh->approx_limit;
897 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
898 *term_ref_id_str ? *term_ref_id_str : "none",
903 static ZEBRA_RES term_trunc(ZebraHandle zh,
904 Z_AttributesPlusTerm *zapt,
905 const char **term_sub,
906 const Odr_oid *attributeSet, NMEM stream,
907 struct grep_info *grep_info,
908 const char *index_type, int complete_flag,
909 int num_bases, char **basenames,
911 const char *rank_type,
912 const char *xpath_use,
915 struct rset_key_control *kc)
919 zint hits_limit_value;
920 const char *term_ref_id_str = 0;
921 WRBUF term_dict = wrbuf_alloc();
924 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
925 grep_info->isam_p_indx = 0;
926 res = string_term(zh, zapt, term_sub, term_dict,
927 attributeSet, stream, grep_info,
928 index_type, complete_flag, num_bases, basenames,
929 term_dst, xpath_use, &ol);
930 wrbuf_destroy(term_dict);
933 if (!*term_sub) /* no more terms ? */
935 yaz_log(log_level_rpn, "term: %s", term_dst);
936 *rset = rset_trunc(zh, grep_info->isam_p_buf,
937 grep_info->isam_p_indx, term_dst,
938 strlen(term_dst), rank_type, 1 /* preserve pos */,
939 zapt->term->which, rset_nmem,
940 kc, kc->scope, ol, index_type, hits_limit_value,
947 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
948 const char **term_sub,
950 const Odr_oid *attributeSet, NMEM stream,
951 struct grep_info *grep_info,
952 const char *index_type, int complete_flag,
953 int num_bases, char **basenames,
955 const char *xpath_use,
956 struct ord_list **ol)
960 int truncation_value;
962 struct rpn_char_map_info rcmi;
963 int space_split = complete_flag ? 0 : 1;
965 int bases_ok = 0; /* no of databases with OK attribute */
967 *ol = ord_list_create(stream);
969 rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
970 attr_init_APT(&truncation, zapt, 5);
971 truncation_value = attr_find(&truncation, NULL);
972 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
974 for (base_no = 0; base_no < num_bases; base_no++)
978 int max_pos, prefix_len = 0;
983 termp = *term_sub; /* start of term for each database */
986 if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
988 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
993 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
994 attributeSet, &ord) != ZEBRA_OK)
998 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1002 *ol = ord_list_append(stream, *ol, ord);
1003 ord_len = key_SU_encode(ord, ord_buf);
1005 wrbuf_putc(term_dict, '(');
1007 for (i = 0; i<ord_len; i++)
1009 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1010 wrbuf_putc(term_dict, ord_buf[i]);
1012 wrbuf_putc(term_dict, ')');
1014 prefix_len = wrbuf_len(term_dict);
1016 switch (truncation_value)
1018 case -1: /* not specified */
1019 case 100: /* do not truncate */
1020 if (!string_relation(zh, zapt, &termp, term_dict,
1022 index_type, space_split, term_dst,
1027 zebra_setError(zh, relation_error, 0);
1034 case 1: /* right truncation */
1035 wrbuf_putc(term_dict, '(');
1036 if (!term_100(zh->reg->zebra_maps, index_type,
1037 &termp, term_dict, space_split, term_dst))
1042 wrbuf_puts(term_dict, ".*)");
1044 case 2: /* keft truncation */
1045 wrbuf_puts(term_dict, "(.*");
1046 if (!term_100(zh->reg->zebra_maps, index_type,
1047 &termp, term_dict, space_split, term_dst))
1052 wrbuf_putc(term_dict, ')');
1054 case 3: /* left&right truncation */
1055 wrbuf_puts(term_dict, "(.*");
1056 if (!term_100(zh->reg->zebra_maps, index_type,
1057 &termp, term_dict, space_split, term_dst))
1062 wrbuf_puts(term_dict, ".*)");
1064 case 101: /* process # in term */
1065 wrbuf_putc(term_dict, '(');
1066 if (!term_101(zh->reg->zebra_maps, index_type,
1067 &termp, term_dict, space_split, term_dst))
1072 wrbuf_puts(term_dict, ")");
1074 case 102: /* Regexp-1 */
1075 wrbuf_putc(term_dict, '(');
1076 if (!term_102(zh->reg->zebra_maps, index_type,
1077 &termp, term_dict, space_split, term_dst))
1082 wrbuf_putc(term_dict, ')');
1084 case 103: /* Regexp-2 */
1086 wrbuf_putc(term_dict, '(');
1087 if (!term_103(zh->reg->zebra_maps, index_type,
1088 &termp, term_dict, ®ex_range,
1089 space_split, term_dst))
1094 wrbuf_putc(term_dict, ')');
1096 case 104: /* process # and ! in term */
1097 wrbuf_putc(term_dict, '(');
1098 if (!term_104(zh->reg->zebra_maps, index_type,
1099 &termp, term_dict, space_split, term_dst))
1104 wrbuf_putc(term_dict, ')');
1106 case 105: /* process * and ! in term */
1107 wrbuf_putc(term_dict, '(');
1108 if (!term_105(zh->reg->zebra_maps, index_type,
1109 &termp, term_dict, space_split, term_dst, 1))
1114 wrbuf_putc(term_dict, ')');
1116 case 106: /* process * and ! in term */
1117 wrbuf_putc(term_dict, '(');
1118 if (!term_105(zh->reg->zebra_maps, index_type,
1119 &termp, term_dict, space_split, term_dst, 0))
1124 wrbuf_putc(term_dict, ')');
1127 zebra_setError_zint(zh,
1128 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1135 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1136 esc_str(buf, sizeof(buf), input, strlen(input));
1138 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1139 wrbuf_cstr(term_dict) + prefix_len);
1140 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1141 grep_info, &max_pos,
1142 ord_len /* number of "exact" chars */,
1145 zebra_set_partial_result(zh);
1147 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1152 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1158 static void grep_info_delete(struct grep_info *grep_info)
1161 xfree(grep_info->term_no);
1163 xfree(grep_info->isam_p_buf);
1166 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1167 Z_AttributesPlusTerm *zapt,
1168 struct grep_info *grep_info,
1172 grep_info->term_no = 0;
1174 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1175 grep_info->isam_p_size = 0;
1176 grep_info->isam_p_buf = NULL;
1178 grep_info->reg_type = reg_type;
1179 grep_info->termset = 0;
1185 attr_init_APT(&truncmax, zapt, 13);
1186 truncmax_value = attr_find(&truncmax, NULL);
1187 if (truncmax_value != -1)
1188 grep_info->trunc_max = truncmax_value;
1193 int termset_value_numeric;
1194 const char *termset_value_string;
1196 attr_init_APT(&termset, zapt, 8);
1197 termset_value_numeric =
1198 attr_find_ex(&termset, NULL, &termset_value_string);
1199 if (termset_value_numeric != -1)
1202 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1206 const char *termset_name = 0;
1207 if (termset_value_numeric != -2)
1210 sprintf(resname, "%d", termset_value_numeric);
1211 termset_name = resname;
1214 termset_name = termset_value_string;
1215 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1216 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1217 if (!grep_info->termset)
1219 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1229 \brief Create result set(s) for list of terms
1230 \param zh Zebra Handle
1231 \param zapt Attributes Plust Term (RPN leaf)
1232 \param termz term as used in query but converted to UTF-8
1233 \param attributeSet default attribute set
1234 \param stream memory for result
1235 \param reg_type register type ('w', 'p',..)
1236 \param complete_flag whether it's phrases or not
1237 \param rank_type term flags for ranking
1238 \param xpath_use use attribute for X-Path (-1 for no X-path)
1239 \param num_bases number of databases
1240 \param basenames array of databases
1241 \param rset_nmem memory for result sets
1242 \param result_sets output result set for each term in list (output)
1243 \param num_result_sets number of output result sets
1244 \param kc rset key control to be used for created result sets
1246 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1247 Z_AttributesPlusTerm *zapt,
1249 const Odr_oid *attributeSet,
1251 const char *index_type, int complete_flag,
1252 const char *rank_type,
1253 const char *xpath_use,
1254 int num_bases, char **basenames,
1256 RSET **result_sets, int *num_result_sets,
1257 struct rset_key_control *kc)
1259 char term_dst[IT_MAX_WORD+1];
1260 struct grep_info grep_info;
1261 const char *termp = termz;
1264 *num_result_sets = 0;
1266 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1272 if (alloc_sets == *num_result_sets)
1275 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1278 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1279 alloc_sets = alloc_sets + add;
1280 *result_sets = rnew;
1282 res = term_trunc(zh, zapt, &termp, attributeSet,
1284 index_type, complete_flag,
1285 num_bases, basenames,
1286 term_dst, rank_type,
1287 xpath_use, rset_nmem,
1288 &(*result_sets)[*num_result_sets],
1290 if (res != ZEBRA_OK)
1293 for (i = 0; i < *num_result_sets; i++)
1294 rset_delete((*result_sets)[i]);
1295 grep_info_delete(&grep_info);
1298 if ((*result_sets)[*num_result_sets] == 0)
1300 (*num_result_sets)++;
1305 grep_info_delete(&grep_info);
1309 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 const Odr_oid *attributeSet,
1312 const char *index_type,
1313 int num_bases, char **basenames,
1316 struct rset_key_control *kc)
1324 attr_init_APT(&position, zapt, 3);
1325 position_value = attr_find(&position, NULL);
1326 switch(position_value)
1335 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1340 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type))
1342 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1347 if (!zh->reg->isamb && !zh->reg->isamc)
1349 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1353 f_set = xmalloc(sizeof(RSET) * num_bases);
1354 for (base_no = 0; base_no < num_bases; base_no++)
1358 char term_dict[100];
1363 if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
1365 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1366 basenames[base_no]);
1370 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1371 attributeSet, &ord) != ZEBRA_OK)
1374 ord_len = key_SU_encode(ord, ord_buf);
1375 memcpy(term_dict, ord_buf, ord_len);
1376 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1377 val = dict_lookup(zh->reg->dict, term_dict);
1380 assert(*val == sizeof(ISAM_P));
1381 memcpy(&isam_p, val+1, sizeof(isam_p));
1385 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1386 zh->reg->isamb, isam_p, 0);
1387 else if (zh->reg->isamc)
1388 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1389 zh->reg->isamc, isam_p, 0);
1393 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1394 0 /* termid */, num_sets, f_set);
1400 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1401 Z_AttributesPlusTerm *zapt,
1402 const char *termz_org,
1403 const Odr_oid *attributeSet,
1405 const char *index_type, int complete_flag,
1406 const char *rank_type,
1407 const char *xpath_use,
1408 int num_bases, char **basenames,
1411 struct rset_key_control *kc)
1413 RSET *result_sets = 0;
1414 int num_result_sets = 0;
1416 term_list_trunc(zh, zapt, termz_org, attributeSet,
1417 stream, index_type, complete_flag,
1418 rank_type, xpath_use,
1419 num_bases, basenames,
1421 &result_sets, &num_result_sets, kc);
1423 if (res != ZEBRA_OK)
1426 if (num_result_sets > 0)
1429 res = rpn_search_APT_position(zh, zapt, attributeSet,
1431 num_bases, basenames,
1432 rset_nmem, &first_set,
1434 if (res != ZEBRA_OK)
1438 RSET *nsets = nmem_malloc(stream,
1439 sizeof(RSET) * (num_result_sets+1));
1440 nsets[0] = first_set;
1441 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1442 result_sets = nsets;
1446 if (num_result_sets == 0)
1447 *rset = rset_create_null(rset_nmem, kc, 0);
1448 else if (num_result_sets == 1)
1449 *rset = result_sets[0];
1451 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1452 num_result_sets, result_sets,
1453 1 /* ordered */, 0 /* exclusion */,
1454 3 /* relation */, 1 /* distance */);
1460 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1461 Z_AttributesPlusTerm *zapt,
1462 const char *termz_org,
1463 const Odr_oid *attributeSet,
1465 const char *index_type,
1467 const char *rank_type,
1468 const char *xpath_use,
1469 int num_bases, char **basenames,
1472 struct rset_key_control *kc)
1474 RSET *result_sets = 0;
1475 int num_result_sets = 0;
1478 term_list_trunc(zh, zapt, termz_org, attributeSet,
1479 stream, index_type, complete_flag,
1480 rank_type, xpath_use,
1481 num_bases, basenames,
1483 &result_sets, &num_result_sets, kc);
1484 if (res != ZEBRA_OK)
1487 for (i = 0; i<num_result_sets; i++)
1490 res = rpn_search_APT_position(zh, zapt, attributeSet,
1492 num_bases, basenames,
1493 rset_nmem, &first_set,
1495 if (res != ZEBRA_OK)
1497 for (i = 0; i<num_result_sets; i++)
1498 rset_delete(result_sets[i]);
1506 tmp_set[0] = first_set;
1507 tmp_set[1] = result_sets[i];
1509 result_sets[i] = rset_create_prox(
1510 rset_nmem, kc, kc->scope,
1512 1 /* ordered */, 0 /* exclusion */,
1513 3 /* relation */, 1 /* distance */);
1516 if (num_result_sets == 0)
1517 *rset = rset_create_null(rset_nmem, kc, 0);
1518 else if (num_result_sets == 1)
1519 *rset = result_sets[0];
1521 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1522 num_result_sets, result_sets);
1528 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1529 Z_AttributesPlusTerm *zapt,
1530 const char *termz_org,
1531 const Odr_oid *attributeSet,
1533 const char *index_type,
1535 const char *rank_type,
1536 const char *xpath_use,
1537 int num_bases, char **basenames,
1540 struct rset_key_control *kc)
1542 RSET *result_sets = 0;
1543 int num_result_sets = 0;
1546 term_list_trunc(zh, zapt, termz_org, attributeSet,
1547 stream, index_type, complete_flag,
1548 rank_type, xpath_use,
1549 num_bases, basenames,
1551 &result_sets, &num_result_sets,
1553 if (res != ZEBRA_OK)
1555 for (i = 0; i<num_result_sets; i++)
1558 res = rpn_search_APT_position(zh, zapt, attributeSet,
1560 num_bases, basenames,
1561 rset_nmem, &first_set,
1563 if (res != ZEBRA_OK)
1565 for (i = 0; i<num_result_sets; i++)
1566 rset_delete(result_sets[i]);
1574 tmp_set[0] = first_set;
1575 tmp_set[1] = result_sets[i];
1577 result_sets[i] = rset_create_prox(
1578 rset_nmem, kc, kc->scope,
1580 1 /* ordered */, 0 /* exclusion */,
1581 3 /* relation */, 1 /* distance */);
1586 if (num_result_sets == 0)
1587 *rset = rset_create_null(rset_nmem, kc, 0);
1588 else if (num_result_sets == 1)
1589 *rset = result_sets[0];
1591 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1592 num_result_sets, result_sets);
1598 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1599 const char **term_sub,
1601 const Odr_oid *attributeSet,
1602 struct grep_info *grep_info,
1604 const char *index_type,
1612 WRBUF term_num = wrbuf_alloc();
1615 attr_init_APT(&relation, zapt, 2);
1616 relation_value = attr_find(&relation, NULL);
1618 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1620 switch (relation_value)
1623 yaz_log(log_level_rpn, "Relation <");
1624 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1627 wrbuf_destroy(term_num);
1630 term_value = atoi(wrbuf_cstr(term_num));
1631 gen_regular_rel(term_dict, term_value-1, 1);
1634 yaz_log(log_level_rpn, "Relation <=");
1635 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1638 wrbuf_destroy(term_num);
1641 term_value = atoi(wrbuf_cstr(term_num));
1642 gen_regular_rel(term_dict, term_value, 1);
1645 yaz_log(log_level_rpn, "Relation >=");
1646 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1649 wrbuf_destroy(term_num);
1652 term_value = atoi(wrbuf_cstr(term_num));
1653 gen_regular_rel(term_dict, term_value, 0);
1656 yaz_log(log_level_rpn, "Relation >");
1657 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1660 wrbuf_destroy(term_num);
1663 term_value = atoi(wrbuf_cstr(term_num));
1664 gen_regular_rel(term_dict, term_value+1, 0);
1668 yaz_log(log_level_rpn, "Relation =");
1669 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1672 wrbuf_destroy(term_num);
1675 term_value = atoi(wrbuf_cstr(term_num));
1676 wrbuf_printf(term_dict, "(0*%d)", term_value);
1679 /* term_tmp untouched.. */
1680 while (**term_sub != '\0')
1684 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1685 wrbuf_destroy(term_num);
1688 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1689 0, grep_info, max_pos, 0, grep_handle);
1692 zebra_set_partial_result(zh);
1694 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1695 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1696 wrbuf_destroy(term_num);
1700 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1701 const char **term_sub,
1703 const Odr_oid *attributeSet, NMEM stream,
1704 struct grep_info *grep_info,
1705 const char *index_type, int complete_flag,
1706 int num_bases, char **basenames,
1708 const char *xpath_use,
1709 struct ord_list **ol)
1713 struct rpn_char_map_info rcmi;
1715 int bases_ok = 0; /* no of databases with OK attribute */
1717 *ol = ord_list_create(stream);
1719 rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
1721 for (base_no = 0; base_no < num_bases; base_no++)
1724 int relation_error = 0;
1725 int ord, ord_len, i;
1730 if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
1732 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1733 basenames[base_no]);
1737 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1738 attributeSet, &ord) != ZEBRA_OK)
1742 wrbuf_rewind(term_dict);
1744 *ol = ord_list_append(stream, *ol, ord);
1746 ord_len = key_SU_encode(ord, ord_buf);
1748 wrbuf_putc(term_dict, '(');
1749 for (i = 0; i < ord_len; i++)
1751 wrbuf_putc(term_dict, 1);
1752 wrbuf_putc(term_dict, ord_buf[i]);
1754 wrbuf_putc(term_dict, ')');
1756 if (!numeric_relation(zh, zapt, &termp, term_dict,
1757 attributeSet, grep_info, &max_pos, index_type,
1758 term_dst, &relation_error))
1762 zebra_setError(zh, relation_error, 0);
1772 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1777 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1778 Z_AttributesPlusTerm *zapt,
1780 const Odr_oid *attributeSet,
1782 const char *index_type,
1784 const char *rank_type,
1785 const char *xpath_use,
1786 int num_bases, char **basenames,
1789 struct rset_key_control *kc)
1791 char term_dst[IT_MAX_WORD+1];
1792 const char *termp = termz;
1793 RSET *result_sets = 0;
1794 int num_result_sets = 0;
1796 struct grep_info grep_info;
1798 zint hits_limit_value;
1799 const char *term_ref_id_str = 0;
1801 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1803 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1804 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1808 struct ord_list *ol;
1809 WRBUF term_dict = wrbuf_alloc();
1810 if (alloc_sets == num_result_sets)
1813 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1816 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1817 alloc_sets = alloc_sets + add;
1820 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1821 grep_info.isam_p_indx = 0;
1822 res = numeric_term(zh, zapt, &termp, term_dict,
1823 attributeSet, stream, &grep_info,
1824 index_type, complete_flag, num_bases, basenames,
1825 term_dst, xpath_use, &ol);
1826 wrbuf_destroy(term_dict);
1827 if (res == ZEBRA_FAIL || termp == 0)
1829 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1830 result_sets[num_result_sets] =
1831 rset_trunc(zh, grep_info.isam_p_buf,
1832 grep_info.isam_p_indx, term_dst,
1833 strlen(term_dst), rank_type,
1834 0 /* preserve position */,
1835 zapt->term->which, rset_nmem,
1836 kc, kc->scope, ol, index_type,
1839 if (!result_sets[num_result_sets])
1845 grep_info_delete(&grep_info);
1847 if (res != ZEBRA_OK)
1849 if (num_result_sets == 0)
1850 *rset = rset_create_null(rset_nmem, kc, 0);
1851 else if (num_result_sets == 1)
1852 *rset = result_sets[0];
1854 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1855 num_result_sets, result_sets);
1861 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1862 Z_AttributesPlusTerm *zapt,
1864 const Odr_oid *attributeSet,
1866 const char *rank_type, NMEM rset_nmem,
1868 struct rset_key_control *kc)
1871 zint sysno = atozint(termz);
1875 rec = rec_get(zh->reg->records, sysno);
1883 *rset = rset_create_null(rset_nmem, kc, 0);
1889 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1890 res_get(zh->res, "setTmpDir"), 0);
1891 rsfd = rset_open(*rset, RSETF_WRITE);
1896 rset_write(rsfd, &key);
1902 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1903 const Odr_oid *attributeSet, NMEM stream,
1904 Z_SortKeySpecList *sort_sequence,
1905 const char *rank_type,
1908 struct rset_key_control *kc)
1911 int sort_relation_value;
1912 AttrType sort_relation_type;
1917 attr_init_APT(&sort_relation_type, zapt, 7);
1918 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1920 if (!sort_sequence->specs)
1922 sort_sequence->num_specs = 10;
1923 sort_sequence->specs = (Z_SortKeySpec **)
1924 nmem_malloc(stream, sort_sequence->num_specs *
1925 sizeof(*sort_sequence->specs));
1926 for (i = 0; i<sort_sequence->num_specs; i++)
1927 sort_sequence->specs[i] = 0;
1929 if (zapt->term->which != Z_Term_general)
1932 i = atoi_n((char *) zapt->term->u.general->buf,
1933 zapt->term->u.general->len);
1934 if (i >= sort_sequence->num_specs)
1936 sprintf(termz, "%d", i);
1938 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1939 sks->sortElement = (Z_SortElement *)
1940 nmem_malloc(stream, sizeof(*sks->sortElement));
1941 sks->sortElement->which = Z_SortElement_generic;
1942 sk = sks->sortElement->u.generic = (Z_SortKey *)
1943 nmem_malloc(stream, sizeof(*sk));
1944 sk->which = Z_SortKey_sortAttributes;
1945 sk->u.sortAttributes = (Z_SortAttributes *)
1946 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1948 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1949 sk->u.sortAttributes->list = zapt->attributes;
1951 sks->sortRelation = (int *)
1952 nmem_malloc(stream, sizeof(*sks->sortRelation));
1953 if (sort_relation_value == 1)
1954 *sks->sortRelation = Z_SortKeySpec_ascending;
1955 else if (sort_relation_value == 2)
1956 *sks->sortRelation = Z_SortKeySpec_descending;
1958 *sks->sortRelation = Z_SortKeySpec_ascending;
1960 sks->caseSensitivity = (int *)
1961 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1962 *sks->caseSensitivity = 0;
1964 sks->which = Z_SortKeySpec_null;
1965 sks->u.null = odr_nullval ();
1966 sort_sequence->specs[i] = sks;
1967 *rset = rset_create_null(rset_nmem, kc, 0);
1972 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1973 const Odr_oid *attributeSet,
1974 struct xpath_location_step *xpath, int max,
1977 const Odr_oid *curAttributeSet = attributeSet;
1979 const char *use_string = 0;
1981 attr_init_APT(&use, zapt, 1);
1982 attr_find_ex(&use, &curAttributeSet, &use_string);
1984 if (!use_string || *use_string != '/')
1987 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1992 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1993 const char *index_type, const char *term,
1994 const char *xpath_use,
1996 struct rset_key_control *kc)
1998 struct grep_info grep_info;
1999 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2000 zinfo_index_category_index,
2001 index_type, xpath_use);
2002 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2003 return rset_create_null(rset_nmem, kc, 0);
2006 return rset_create_null(rset_nmem, kc, 0);
2012 WRBUF term_dict = wrbuf_alloc();
2013 int ord_len = key_SU_encode(ord, ord_buf);
2014 int term_type = Z_Term_characterString;
2015 const char *flags = "void";
2017 wrbuf_putc(term_dict, '(');
2018 for (i = 0; i<ord_len; i++)
2020 wrbuf_putc(term_dict, 1);
2021 wrbuf_putc(term_dict, ord_buf[i]);
2023 wrbuf_putc(term_dict, ')');
2024 wrbuf_puts(term_dict, term);
2026 grep_info.isam_p_indx = 0;
2027 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2028 &grep_info, &max_pos, 0, grep_handle);
2029 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2030 grep_info.isam_p_indx);
2031 rset = rset_trunc(zh, grep_info.isam_p_buf,
2032 grep_info.isam_p_indx, term, strlen(term),
2033 flags, 1, term_type, rset_nmem,
2034 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2035 0 /* term_ref_id_str */);
2036 grep_info_delete(&grep_info);
2037 wrbuf_destroy(term_dict);
2043 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2044 int num_bases, char **basenames,
2045 NMEM stream, const char *rank_type, RSET rset,
2046 int xpath_len, struct xpath_location_step *xpath,
2049 struct rset_key_control *kc)
2053 int always_matches = rset ? 0 : 1;
2061 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2062 for (i = 0; i<xpath_len; i++)
2064 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2076 a[@attr = value]/b[@other = othervalue]
2078 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2079 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2080 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2081 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2082 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2083 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2087 dict_grep_cmap(zh->reg->dict, 0, 0);
2089 for (base_no = 0; base_no < num_bases; base_no++)
2091 int level = xpath_len;
2094 if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
2096 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2097 basenames[base_no]);
2101 while (--level >= 0)
2103 WRBUF xpath_rev = wrbuf_alloc();
2105 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2107 for (i = level; i >= 1; --i)
2109 const char *cp = xpath[i].part;
2115 wrbuf_puts(xpath_rev, "[^/]*");
2116 else if (*cp == ' ')
2117 wrbuf_puts(xpath_rev, "\001 ");
2119 wrbuf_putc(xpath_rev, *cp);
2121 /* wrbuf_putc does not null-terminate , but
2122 wrbuf_puts below ensures it does.. so xpath_rev
2123 is OK iff length is > 0 */
2125 wrbuf_puts(xpath_rev, "/");
2127 else if (i == 1) /* // case */
2128 wrbuf_puts(xpath_rev, ".*");
2130 if (xpath[level].predicate &&
2131 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2132 xpath[level].predicate->u.relation.name[0])
2134 WRBUF wbuf = wrbuf_alloc();
2135 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2136 if (xpath[level].predicate->u.relation.value)
2138 const char *cp = xpath[level].predicate->u.relation.value;
2139 wrbuf_putc(wbuf, '=');
2143 if (strchr(REGEX_CHARS, *cp))
2144 wrbuf_putc(wbuf, '\\');
2145 wrbuf_putc(wbuf, *cp);
2149 rset_attr = xpath_trunc(
2150 zh, stream, "0", wrbuf_cstr(wbuf),
2151 ZEBRA_XPATH_ATTR_NAME,
2153 wrbuf_destroy(wbuf);
2159 wrbuf_destroy(xpath_rev);
2163 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2164 wrbuf_cstr(xpath_rev));
2165 if (wrbuf_len(xpath_rev))
2167 rset_start_tag = xpath_trunc(zh, stream, "0",
2168 wrbuf_cstr(xpath_rev),
2169 ZEBRA_XPATH_ELM_BEGIN,
2172 rset = rset_start_tag;
2175 rset_end_tag = xpath_trunc(zh, stream, "0",
2176 wrbuf_cstr(xpath_rev),
2177 ZEBRA_XPATH_ELM_END,
2180 rset = rset_create_between(rset_nmem, kc, kc->scope,
2181 rset_start_tag, rset,
2182 rset_end_tag, rset_attr);
2185 wrbuf_destroy(xpath_rev);
2193 #define MAX_XPATH_STEPS 10
2195 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2196 Z_AttributesPlusTerm *zapt,
2197 const Odr_oid *attributeSet, NMEM stream,
2198 Z_SortKeySpecList *sort_sequence,
2199 int num_bases, char **basenames,
2202 struct rset_key_control *kc);
2204 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2205 const Odr_oid *attributeSet, NMEM stream,
2206 Z_SortKeySpecList *sort_sequence,
2207 int num_bases, char **basenames,
2210 struct rset_key_control *kc)
2212 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2213 ZEBRA_RES res = ZEBRA_OK;
2215 for (i = 0; i < num_bases; i++)
2218 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2220 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2225 res = rpn_search_database(zh, zapt, attributeSet, stream,
2226 sort_sequence, 1, basenames+i,
2227 rset_nmem, rsets+i, kc);
2228 if (res != ZEBRA_OK)
2231 if (res != ZEBRA_OK)
2232 { /* must clean up the already created sets */
2234 for (i = 0; j < i; j++)
2235 rset_delete(rsets[j]);
2242 else if (num_bases == 0)
2243 *rset = rset_create_null(rset_nmem, kc, 0);
2245 *rset = rset_create_and(rset_nmem, kc, kc->scope,
2251 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2252 Z_AttributesPlusTerm *zapt,
2253 const Odr_oid *attributeSet, NMEM stream,
2254 Z_SortKeySpecList *sort_sequence,
2255 int num_bases, char **basenames,
2258 struct rset_key_control *kc)
2260 ZEBRA_RES res = ZEBRA_OK;
2261 const char *index_type;
2262 char *search_type = NULL;
2263 char rank_type[128];
2266 char termz[IT_MAX_WORD+1];
2268 const char *xpath_use = 0;
2269 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2273 log_level_rpn = yaz_log_module_level("rpn");
2276 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2277 rank_type, &complete_flag, &sort_flag);
2279 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2280 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2281 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2282 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2284 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2288 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2289 rank_type, rset_nmem, rset, kc);
2290 /* consider if an X-Path query is used */
2291 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2292 xpath, MAX_XPATH_STEPS, stream);
2295 if (xpath[xpath_len-1].part[0] == '@')
2296 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2298 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2305 attr_init_APT(&relation, zapt, 2);
2306 relation_value = attr_find(&relation, NULL);
2308 if (relation_value == 103) /* alwaysmatches */
2310 *rset = 0; /* signal no "term" set */
2311 return rpn_search_xpath(zh, num_bases, basenames,
2312 stream, rank_type, *rset,
2313 xpath_len, xpath, rset_nmem, rset, kc);
2318 /* search using one of the various search type strategies
2319 termz is our UTF-8 search term
2320 attributeSet is top-level default attribute set
2321 stream is ODR for search
2322 reg_id is the register type
2323 complete_flag is 1 for complete subfield, 0 for incomplete
2324 xpath_use is use-attribute to be used for X-Path search, 0 for none
2326 if (!strcmp(search_type, "phrase"))
2328 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2329 index_type, complete_flag, rank_type,
2331 num_bases, basenames, rset_nmem,
2334 else if (!strcmp(search_type, "and-list"))
2336 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2337 index_type, complete_flag, rank_type,
2339 num_bases, basenames, rset_nmem,
2342 else if (!strcmp(search_type, "or-list"))
2344 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2345 index_type, complete_flag, rank_type,
2347 num_bases, basenames, rset_nmem,
2350 else if (!strcmp(search_type, "local"))
2352 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2353 rank_type, rset_nmem, rset, kc);
2355 else if (!strcmp(search_type, "numeric"))
2357 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2358 index_type, complete_flag, rank_type,
2360 num_bases, basenames, rset_nmem,
2365 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2368 if (res != ZEBRA_OK)
2372 return rpn_search_xpath(zh, num_bases, basenames,
2373 stream, rank_type, *rset,
2374 xpath_len, xpath, rset_nmem, rset, kc);
2377 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2378 const Odr_oid *attributeSet,
2379 NMEM stream, NMEM rset_nmem,
2380 Z_SortKeySpecList *sort_sequence,
2381 int num_bases, char **basenames,
2382 RSET **result_sets, int *num_result_sets,
2383 Z_Operator *parent_op,
2384 struct rset_key_control *kc);
2386 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2389 ZEBRA_RES res = ZEBRA_OK;
2390 if (zs->which == Z_RPNStructure_complex)
2392 if (res == ZEBRA_OK)
2393 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2395 if (res == ZEBRA_OK)
2396 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2399 else if (zs->which == Z_RPNStructure_simple)
2401 if (zs->u.simple->which == Z_Operand_APT)
2403 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2404 AttrType global_hits_limit_attr;
2407 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2409 l = attr_find(&global_hits_limit_attr, NULL);
2417 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2418 const Odr_oid *attributeSet,
2419 NMEM stream, NMEM rset_nmem,
2420 Z_SortKeySpecList *sort_sequence,
2421 int num_bases, char **basenames,
2424 RSET *result_sets = 0;
2425 int num_result_sets = 0;
2427 struct rset_key_control *kc = zebra_key_control_create(zh);
2429 res = rpn_search_structure(zh, zs, attributeSet,
2432 num_bases, basenames,
2433 &result_sets, &num_result_sets,
2434 0 /* no parent op */,
2436 if (res != ZEBRA_OK)
2439 for (i = 0; i<num_result_sets; i++)
2440 rset_delete(result_sets[i]);
2445 assert(num_result_sets == 1);
2446 assert(result_sets);
2447 assert(*result_sets);
2448 *result_set = *result_sets;
2454 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2455 const Odr_oid *attributeSet,
2456 NMEM stream, NMEM rset_nmem,
2457 Z_SortKeySpecList *sort_sequence,
2458 int num_bases, char **basenames,
2459 RSET **result_sets, int *num_result_sets,
2460 Z_Operator *parent_op,
2461 struct rset_key_control *kc)
2463 *num_result_sets = 0;
2464 if (zs->which == Z_RPNStructure_complex)
2467 Z_Operator *zop = zs->u.complex->roperator;
2468 RSET *result_sets_l = 0;
2469 int num_result_sets_l = 0;
2470 RSET *result_sets_r = 0;
2471 int num_result_sets_r = 0;
2473 res = rpn_search_structure(zh, zs->u.complex->s1,
2474 attributeSet, stream, rset_nmem,
2476 num_bases, basenames,
2477 &result_sets_l, &num_result_sets_l,
2479 if (res != ZEBRA_OK)
2482 for (i = 0; i<num_result_sets_l; i++)
2483 rset_delete(result_sets_l[i]);
2486 res = rpn_search_structure(zh, zs->u.complex->s2,
2487 attributeSet, stream, rset_nmem,
2489 num_bases, basenames,
2490 &result_sets_r, &num_result_sets_r,
2492 if (res != ZEBRA_OK)
2495 for (i = 0; i<num_result_sets_l; i++)
2496 rset_delete(result_sets_l[i]);
2497 for (i = 0; i<num_result_sets_r; i++)
2498 rset_delete(result_sets_r[i]);
2502 /* make a new list of result for all children */
2503 *num_result_sets = num_result_sets_l + num_result_sets_r;
2504 *result_sets = nmem_malloc(stream, *num_result_sets *
2505 sizeof(**result_sets));
2506 memcpy(*result_sets, result_sets_l,
2507 num_result_sets_l * sizeof(**result_sets));
2508 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2509 num_result_sets_r * sizeof(**result_sets));
2511 if (!parent_op || parent_op->which != zop->which
2512 || (zop->which != Z_Operator_and &&
2513 zop->which != Z_Operator_or))
2515 /* parent node different from this one (or non-present) */
2516 /* we must combine result sets now */
2520 case Z_Operator_and:
2521 rset = rset_create_and(rset_nmem, kc,
2523 *num_result_sets, *result_sets);
2526 rset = rset_create_or(rset_nmem, kc,
2527 kc->scope, 0, /* termid */
2528 *num_result_sets, *result_sets);
2530 case Z_Operator_and_not:
2531 rset = rset_create_not(rset_nmem, kc,
2536 case Z_Operator_prox:
2537 if (zop->u.prox->which != Z_ProximityOperator_known)
2540 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2544 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2546 zebra_setError_zint(zh,
2547 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2548 *zop->u.prox->u.known);
2553 rset = rset_create_prox(rset_nmem, kc,
2555 *num_result_sets, *result_sets,
2556 *zop->u.prox->ordered,
2557 (!zop->u.prox->exclusion ?
2558 0 : *zop->u.prox->exclusion),
2559 *zop->u.prox->relationType,
2560 *zop->u.prox->distance );
2564 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2567 *num_result_sets = 1;
2568 *result_sets = nmem_malloc(stream, *num_result_sets *
2569 sizeof(**result_sets));
2570 (*result_sets)[0] = rset;
2573 else if (zs->which == Z_RPNStructure_simple)
2578 if (zs->u.simple->which == Z_Operand_APT)
2580 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2581 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2582 attributeSet, stream, sort_sequence,
2583 num_bases, basenames, rset_nmem, &rset,
2585 if (res != ZEBRA_OK)
2588 else if (zs->u.simple->which == Z_Operand_resultSetId)
2590 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2591 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2595 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2596 zs->u.simple->u.resultSetId);
2603 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2606 *num_result_sets = 1;
2607 *result_sets = nmem_malloc(stream, *num_result_sets *
2608 sizeof(**result_sets));
2609 (*result_sets)[0] = rset;
2613 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2624 * indent-tabs-mode: nil
2626 * vim: shiftwidth=4 tabstop=8 expandtab