1 /* $Id: rpnsearch.c,v 1.17 2007-10-29 20:07:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode(&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 static void add_non_space(const char *start, const char *end,
211 char *dst_term, int *dst_ptr,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
215 memcpy(dst_term + *dst_ptr, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, const char *index_type,
238 const char **src, WRBUF term_dict, int space_split,
246 const char *space_start = 0;
247 const char *space_end = 0;
249 if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split))
256 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
260 if (**map == *CHR_SPACE)
263 else /* complete subfield only. */
265 if (**map == *CHR_SPACE)
266 { /* save space mapping for later .. */
271 else if (space_start)
272 { /* reload last space */
273 while (space_start < space_end)
275 if (strchr(REGEX_CHARS, *space_start))
276 wrbuf_putc(term_dict, '\\');
277 dst_term[j++] = *space_start;
278 wrbuf_putc(term_dict, *space_start);
283 space_start = space_end = 0;
288 add_non_space(s1, s0, term_dict, dst_term, &j,
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, const char *index_type,
298 const char **src, WRBUF term_dict, int space_split,
306 if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split))
314 wrbuf_puts(term_dict, ".*");
315 dst_term[j++] = *s0++;
321 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
323 if (space_split && **map == *CHR_SPACE)
327 add_non_space(s1, s0, term_dict, dst_term, &j,
331 dst_term[j++] = '\0';
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, const char *index_type,
339 WRBUF term_dict, int *errors, int space_split,
347 if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split))
350 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
351 isdigit(((const unsigned char *)s0)[1]))
353 *errors = s0[1] - '0';
360 if (strchr("^\\()[].*+?|-", *s0))
363 wrbuf_putc(term_dict, *s0);
371 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
373 if (space_split && **map == *CHR_SPACE)
377 add_non_space(s1, s0, term_dict, dst_term, &j,
387 /* term_103: handle term, where trunc = re-1 (regular expressions) */
388 static int term_102(ZebraMaps zebra_maps, const char *index_type,
390 WRBUF term_dict, int space_split, char *dst_term)
392 return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split,
397 /* term_104: handle term, process # and ! */
398 static int term_104(ZebraMaps zebra_maps, const char *index_type,
399 const char **src, WRBUF term_dict, int space_split,
407 if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split))
415 dst_term[j++] = *s0++;
416 if (*s0 >= '0' && *s0 <= '9')
419 while (*s0 >= '0' && *s0 <= '9')
421 limit = limit * 10 + (*s0 - '0');
422 dst_term[j++] = *s0++;
428 wrbuf_puts(term_dict, ".?");
433 wrbuf_puts(term_dict, ".*");
439 wrbuf_puts(term_dict, ".*");
440 dst_term[j++] = *s0++;
445 wrbuf_puts(term_dict, ".");
446 dst_term[j++] = *s0++;
452 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
454 if (space_split && **map == *CHR_SPACE)
458 add_non_space(s1, s0, term_dict, dst_term, &j,
462 dst_term[j++] = '\0';
467 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
468 static int term_105(ZebraMaps zebra_maps, const char *index_type,
469 const char **src, WRBUF term_dict, int space_split,
470 char *dst_term, int right_truncate)
477 if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split))
485 wrbuf_puts(term_dict, ".*");
486 dst_term[j++] = *s0++;
491 wrbuf_putc(term_dict, '.');
492 dst_term[j++] = *s0++;
498 map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0),
500 if (space_split && **map == *CHR_SPACE)
504 add_non_space(s1, s0, term_dict, dst_term, &j,
509 wrbuf_puts(term_dict, ".*");
510 dst_term[j++] = '\0';
516 /* gen_regular_rel - generate regular expression from relation
517 * val: border value (inclusive)
518 * islt: 1 if <=; 0 if >=.
520 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
522 char dst_buf[20*5*20]; /* assuming enough for expansion */
529 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
533 strcpy(dst, "(-[0-9]+|(");
541 strcpy(dst, "([0-9]+|-(");
552 sprintf(numstr, "%d", val);
553 for (w = strlen(numstr); --w >= 0; pos++)
572 strcpy(dst + dst_p, numstr);
573 dst_p = strlen(dst) - pos - 1;
601 for (i = 0; i<pos; i++)
614 /* match everything less than 10^(pos-1) */
616 for (i = 1; i<pos; i++)
617 strcat(dst, "[0-9]?");
621 /* match everything greater than 10^pos */
622 for (i = 0; i <= pos; i++)
623 strcat(dst, "[0-9]");
624 strcat(dst, "[0-9]*");
627 wrbuf_puts(term_dict, dst);
630 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
632 const char *src = wrbuf_cstr(wsrc);
633 if (src[*indx] == '\\')
635 wrbuf_putc(term_p, src[*indx]);
638 wrbuf_putc(term_p, src[*indx]);
643 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
644 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
645 * >= abc ([b-].*|a[c-].*|ab[c-].*)
646 * ([^-a].*|a[^-b].*|ab[c-].*)
647 * < abc ([-0].*|a[-a].*|ab[-b].*)
648 * ([^a-].*|a[^b-].*|ab[^c-].*)
649 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
650 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
652 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
653 const char **term_sub, WRBUF term_dict,
654 const Odr_oid *attributeSet,
655 const char *index_type, int space_split, char *term_dst,
661 WRBUF term_component = wrbuf_alloc();
663 attr_init_APT(&relation, zapt, 2);
664 relation_value = attr_find(&relation, NULL);
667 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
668 switch (relation_value)
671 if (!term_100(zh->reg->zebra_maps, index_type,
672 term_sub, term_component,
673 space_split, term_dst))
675 wrbuf_destroy(term_component);
678 yaz_log(log_level_rpn, "Relation <");
680 wrbuf_putc(term_dict, '(');
681 for (i = 0; i < wrbuf_len(term_component); )
686 wrbuf_putc(term_dict, '|');
688 string_rel_add_char(term_dict, term_component, &j);
690 wrbuf_putc(term_dict, '[');
692 wrbuf_putc(term_dict, '^');
694 wrbuf_putc(term_dict, 1);
695 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
697 string_rel_add_char(term_dict, term_component, &i);
698 wrbuf_putc(term_dict, '-');
700 wrbuf_putc(term_dict, ']');
701 wrbuf_putc(term_dict, '.');
702 wrbuf_putc(term_dict, '*');
704 wrbuf_putc(term_dict, ')');
707 if (!term_100(zh->reg->zebra_maps, index_type,
708 term_sub, term_component,
709 space_split, term_dst))
711 wrbuf_destroy(term_component);
714 yaz_log(log_level_rpn, "Relation <=");
716 wrbuf_putc(term_dict, '(');
717 for (i = 0; i < wrbuf_len(term_component); )
722 string_rel_add_char(term_dict, term_component, &j);
723 wrbuf_putc(term_dict, '[');
725 wrbuf_putc(term_dict, '^');
727 wrbuf_putc(term_dict, 1);
728 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
730 string_rel_add_char(term_dict, term_component, &i);
731 wrbuf_putc(term_dict, '-');
733 wrbuf_putc(term_dict, ']');
734 wrbuf_putc(term_dict, '.');
735 wrbuf_putc(term_dict, '*');
737 wrbuf_putc(term_dict, '|');
739 for (i = 0; i < wrbuf_len(term_component); )
740 string_rel_add_char(term_dict, term_component, &i);
741 wrbuf_putc(term_dict, ')');
744 if (!term_100(zh->reg->zebra_maps, index_type,
745 term_sub, term_component, space_split, term_dst))
747 wrbuf_destroy(term_component);
750 yaz_log(log_level_rpn, "Relation >");
752 wrbuf_putc(term_dict, '(');
753 for (i = 0; i < wrbuf_len(term_component); )
758 string_rel_add_char(term_dict, term_component, &j);
759 wrbuf_putc(term_dict, '[');
761 wrbuf_putc(term_dict, '^');
762 wrbuf_putc(term_dict, '-');
763 string_rel_add_char(term_dict, term_component, &i);
765 wrbuf_putc(term_dict, ']');
766 wrbuf_putc(term_dict, '.');
767 wrbuf_putc(term_dict, '*');
769 wrbuf_putc(term_dict, '|');
771 for (i = 0; i < wrbuf_len(term_component); )
772 string_rel_add_char(term_dict, term_component, &i);
773 wrbuf_putc(term_dict, '.');
774 wrbuf_putc(term_dict, '+');
775 wrbuf_putc(term_dict, ')');
778 if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
779 term_component, space_split, term_dst))
781 wrbuf_destroy(term_component);
784 yaz_log(log_level_rpn, "Relation >=");
786 wrbuf_putc(term_dict, '(');
787 for (i = 0; i < wrbuf_len(term_component); )
792 wrbuf_putc(term_dict, '|');
794 string_rel_add_char(term_dict, term_component, &j);
795 wrbuf_putc(term_dict, '[');
797 if (i < wrbuf_len(term_component)-1)
799 wrbuf_putc(term_dict, '^');
800 wrbuf_putc(term_dict, '-');
801 string_rel_add_char(term_dict, term_component, &i);
805 string_rel_add_char(term_dict, term_component, &i);
806 wrbuf_putc(term_dict, '-');
808 wrbuf_putc(term_dict, ']');
809 wrbuf_putc(term_dict, '.');
810 wrbuf_putc(term_dict, '*');
812 wrbuf_putc(term_dict, ')');
819 yaz_log(log_level_rpn, "Relation =");
820 if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
821 term_component, space_split, term_dst))
823 wrbuf_destroy(term_component);
826 wrbuf_puts(term_dict, "(");
827 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
828 wrbuf_puts(term_dict, ")");
831 yaz_log(log_level_rpn, "Relation always matches");
832 /* skip to end of term (we don't care what it is) */
833 while (**term_sub != '\0')
837 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
838 wrbuf_destroy(term_component);
841 wrbuf_destroy(term_component);
845 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
846 const char **term_sub,
848 const Odr_oid *attributeSet, NMEM stream,
849 struct grep_info *grep_info,
850 const char *index_type, int complete_flag,
852 const char *xpath_use,
853 struct ord_list **ol);
855 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
856 Z_AttributesPlusTerm *zapt,
857 zint *hits_limit_value,
858 const char **term_ref_id_str,
861 AttrType term_ref_id_attr;
862 AttrType hits_limit_attr;
865 attr_init_APT(&hits_limit_attr, zapt, 11);
866 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
868 attr_init_APT(&term_ref_id_attr, zapt, 10);
869 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
870 if (term_ref_id_int >= 0)
872 char *res = nmem_malloc(nmem, 20);
873 sprintf(res, "%d", term_ref_id_int);
874 *term_ref_id_str = res;
877 /* no limit given ? */
878 if (*hits_limit_value == -1)
880 if (*term_ref_id_str)
882 /* use global if term_ref is present */
883 *hits_limit_value = zh->approx_limit;
887 /* no counting if term_ref is not present */
888 *hits_limit_value = 0;
891 else if (*hits_limit_value == 0)
893 /* 0 is the same as global limit */
894 *hits_limit_value = zh->approx_limit;
896 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
897 *term_ref_id_str ? *term_ref_id_str : "none",
902 static ZEBRA_RES term_trunc(ZebraHandle zh,
903 Z_AttributesPlusTerm *zapt,
904 const char **term_sub,
905 const Odr_oid *attributeSet, NMEM stream,
906 struct grep_info *grep_info,
907 const char *index_type, int complete_flag,
909 const char *rank_type,
910 const char *xpath_use,
913 struct rset_key_control *kc)
917 zint hits_limit_value;
918 const char *term_ref_id_str = 0;
919 WRBUF term_dict = wrbuf_alloc();
922 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
923 grep_info->isam_p_indx = 0;
924 res = string_term(zh, zapt, term_sub, term_dict,
925 attributeSet, stream, grep_info,
926 index_type, complete_flag,
927 term_dst, xpath_use, &ol);
928 wrbuf_destroy(term_dict);
931 if (!*term_sub) /* no more terms ? */
933 yaz_log(log_level_rpn, "term: %s", term_dst);
934 *rset = rset_trunc(zh, grep_info->isam_p_buf,
935 grep_info->isam_p_indx, term_dst,
936 strlen(term_dst), rank_type, 1 /* preserve pos */,
937 zapt->term->which, rset_nmem,
938 kc, kc->scope, ol, index_type, hits_limit_value,
945 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
946 const char **term_sub,
948 const Odr_oid *attributeSet, NMEM stream,
949 struct grep_info *grep_info,
950 const char *index_type, int complete_flag,
952 const char *xpath_use,
953 struct ord_list **ol)
957 int truncation_value;
959 struct rpn_char_map_info rcmi;
961 int space_split = complete_flag ? 0 : 1;
964 int max_pos, prefix_len = 0;
969 *ol = ord_list_create(stream);
971 rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
972 attr_init_APT(&truncation, zapt, 5);
973 truncation_value = attr_find(&truncation, NULL);
974 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
976 termp = *term_sub; /* start of term for each database */
978 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
979 attributeSet, &ord) != ZEBRA_OK)
985 wrbuf_rewind(term_dict); /* new dictionary regexp term */
987 *ol = ord_list_append(stream, *ol, ord);
988 ord_len = key_SU_encode(ord, ord_buf);
990 wrbuf_putc(term_dict, '(');
992 for (i = 0; i<ord_len; i++)
994 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
995 wrbuf_putc(term_dict, ord_buf[i]);
997 wrbuf_putc(term_dict, ')');
999 prefix_len = wrbuf_len(term_dict);
1001 switch (truncation_value)
1003 case -1: /* not specified */
1004 case 100: /* do not truncate */
1005 if (!string_relation(zh, zapt, &termp, term_dict,
1007 index_type, space_split, term_dst,
1012 zebra_setError(zh, relation_error, 0);
1019 case 1: /* right truncation */
1020 wrbuf_putc(term_dict, '(');
1021 if (!term_100(zh->reg->zebra_maps, index_type,
1022 &termp, term_dict, space_split, term_dst))
1027 wrbuf_puts(term_dict, ".*)");
1029 case 2: /* keft truncation */
1030 wrbuf_puts(term_dict, "(.*");
1031 if (!term_100(zh->reg->zebra_maps, index_type,
1032 &termp, term_dict, space_split, term_dst))
1037 wrbuf_putc(term_dict, ')');
1039 case 3: /* left&right truncation */
1040 wrbuf_puts(term_dict, "(.*");
1041 if (!term_100(zh->reg->zebra_maps, index_type,
1042 &termp, term_dict, space_split, term_dst))
1047 wrbuf_puts(term_dict, ".*)");
1049 case 101: /* process # in term */
1050 wrbuf_putc(term_dict, '(');
1051 if (!term_101(zh->reg->zebra_maps, index_type,
1052 &termp, term_dict, space_split, term_dst))
1057 wrbuf_puts(term_dict, ")");
1059 case 102: /* Regexp-1 */
1060 wrbuf_putc(term_dict, '(');
1061 if (!term_102(zh->reg->zebra_maps, index_type,
1062 &termp, term_dict, space_split, term_dst))
1067 wrbuf_putc(term_dict, ')');
1069 case 103: /* Regexp-2 */
1071 wrbuf_putc(term_dict, '(');
1072 if (!term_103(zh->reg->zebra_maps, index_type,
1073 &termp, term_dict, ®ex_range,
1074 space_split, term_dst))
1079 wrbuf_putc(term_dict, ')');
1081 case 104: /* process # and ! in term */
1082 wrbuf_putc(term_dict, '(');
1083 if (!term_104(zh->reg->zebra_maps, index_type,
1084 &termp, term_dict, space_split, term_dst))
1089 wrbuf_putc(term_dict, ')');
1091 case 105: /* process * and ! in term */
1092 wrbuf_putc(term_dict, '(');
1093 if (!term_105(zh->reg->zebra_maps, index_type,
1094 &termp, term_dict, space_split, term_dst, 1))
1099 wrbuf_putc(term_dict, ')');
1101 case 106: /* process * and ! in term */
1102 wrbuf_putc(term_dict, '(');
1103 if (!term_105(zh->reg->zebra_maps, index_type,
1104 &termp, term_dict, space_split, term_dst, 0))
1109 wrbuf_putc(term_dict, ')');
1112 zebra_setError_zint(zh,
1113 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1120 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1121 esc_str(buf, sizeof(buf), input, strlen(input));
1123 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1124 wrbuf_cstr(term_dict) + prefix_len);
1125 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1126 grep_info, &max_pos,
1127 ord_len /* number of "exact" chars */,
1130 zebra_set_partial_result(zh);
1132 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1134 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1140 static void grep_info_delete(struct grep_info *grep_info)
1143 xfree(grep_info->term_no);
1145 xfree(grep_info->isam_p_buf);
1148 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1149 Z_AttributesPlusTerm *zapt,
1150 struct grep_info *grep_info,
1154 grep_info->term_no = 0;
1156 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1157 grep_info->isam_p_size = 0;
1158 grep_info->isam_p_buf = NULL;
1160 grep_info->reg_type = reg_type;
1161 grep_info->termset = 0;
1167 attr_init_APT(&truncmax, zapt, 13);
1168 truncmax_value = attr_find(&truncmax, NULL);
1169 if (truncmax_value != -1)
1170 grep_info->trunc_max = truncmax_value;
1175 int termset_value_numeric;
1176 const char *termset_value_string;
1178 attr_init_APT(&termset, zapt, 8);
1179 termset_value_numeric =
1180 attr_find_ex(&termset, NULL, &termset_value_string);
1181 if (termset_value_numeric != -1)
1184 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1188 const char *termset_name = 0;
1189 if (termset_value_numeric != -2)
1192 sprintf(resname, "%d", termset_value_numeric);
1193 termset_name = resname;
1196 termset_name = termset_value_string;
1197 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1198 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1199 if (!grep_info->termset)
1201 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1211 \brief Create result set(s) for list of terms
1212 \param zh Zebra Handle
1213 \param zapt Attributes Plust Term (RPN leaf)
1214 \param termz term as used in query but converted to UTF-8
1215 \param attributeSet default attribute set
1216 \param stream memory for result
1217 \param index_type register type ("w", "p",..)
1218 \param complete_flag whether it's phrases or not
1219 \param rank_type term flags for ranking
1220 \param xpath_use use attribute for X-Path (-1 for no X-path)
1221 \param rset_nmem memory for result sets
1222 \param result_sets output result set for each term in list (output)
1223 \param num_result_sets number of output result sets
1224 \param kc rset key control to be used for created result sets
1226 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1227 Z_AttributesPlusTerm *zapt,
1229 const Odr_oid *attributeSet,
1231 const char *index_type, int complete_flag,
1232 const char *rank_type,
1233 const char *xpath_use,
1235 RSET **result_sets, int *num_result_sets,
1236 struct rset_key_control *kc)
1238 char term_dst[IT_MAX_WORD+1];
1239 struct grep_info grep_info;
1240 const char *termp = termz;
1243 *num_result_sets = 0;
1245 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1251 if (alloc_sets == *num_result_sets)
1254 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1257 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1258 alloc_sets = alloc_sets + add;
1259 *result_sets = rnew;
1261 res = term_trunc(zh, zapt, &termp, attributeSet,
1263 index_type, complete_flag,
1264 term_dst, rank_type,
1265 xpath_use, rset_nmem,
1266 &(*result_sets)[*num_result_sets],
1268 if (res != ZEBRA_OK)
1271 for (i = 0; i < *num_result_sets; i++)
1272 rset_delete((*result_sets)[i]);
1273 grep_info_delete(&grep_info);
1276 if ((*result_sets)[*num_result_sets] == 0)
1278 (*num_result_sets)++;
1283 grep_info_delete(&grep_info);
1287 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1288 Z_AttributesPlusTerm *zapt,
1289 const Odr_oid *attributeSet,
1290 const char *index_type,
1293 struct rset_key_control *kc)
1299 char term_dict[100];
1304 attr_init_APT(&position, zapt, 3);
1305 position_value = attr_find(&position, NULL);
1306 switch(position_value)
1315 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1320 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type))
1322 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1327 if (!zh->reg->isamb && !zh->reg->isamc)
1329 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1334 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1335 attributeSet, &ord) != ZEBRA_OK)
1339 ord_len = key_SU_encode(ord, ord_buf);
1340 memcpy(term_dict, ord_buf, ord_len);
1341 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1342 val = dict_lookup(zh->reg->dict, term_dict);
1345 assert(*val == sizeof(ISAM_P));
1346 memcpy(&isam_p, val+1, sizeof(isam_p));
1349 *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1350 zh->reg->isamb, isam_p, 0);
1351 else if (zh->reg->isamc)
1352 *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1353 zh->reg->isamc, isam_p, 0);
1358 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1359 Z_AttributesPlusTerm *zapt,
1360 const char *termz_org,
1361 const Odr_oid *attributeSet,
1363 const char *index_type, int complete_flag,
1364 const char *rank_type,
1365 const char *xpath_use,
1368 struct rset_key_control *kc)
1370 RSET *result_sets = 0;
1371 int num_result_sets = 0;
1373 term_list_trunc(zh, zapt, termz_org, attributeSet,
1374 stream, index_type, complete_flag,
1375 rank_type, xpath_use,
1377 &result_sets, &num_result_sets, kc);
1379 if (res != ZEBRA_OK)
1382 if (num_result_sets > 0)
1385 res = rpn_search_APT_position(zh, zapt, attributeSet,
1387 rset_nmem, &first_set,
1389 if (res != ZEBRA_OK)
1393 RSET *nsets = nmem_malloc(stream,
1394 sizeof(RSET) * (num_result_sets+1));
1395 nsets[0] = first_set;
1396 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1397 result_sets = nsets;
1401 if (num_result_sets == 0)
1402 *rset = rset_create_null(rset_nmem, kc, 0);
1403 else if (num_result_sets == 1)
1404 *rset = result_sets[0];
1406 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1407 num_result_sets, result_sets,
1408 1 /* ordered */, 0 /* exclusion */,
1409 3 /* relation */, 1 /* distance */);
1415 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1416 Z_AttributesPlusTerm *zapt,
1417 const char *termz_org,
1418 const Odr_oid *attributeSet,
1420 const char *index_type,
1422 const char *rank_type,
1423 const char *xpath_use,
1426 struct rset_key_control *kc)
1428 RSET *result_sets = 0;
1429 int num_result_sets = 0;
1432 term_list_trunc(zh, zapt, termz_org, attributeSet,
1433 stream, index_type, complete_flag,
1434 rank_type, xpath_use,
1436 &result_sets, &num_result_sets, kc);
1437 if (res != ZEBRA_OK)
1440 for (i = 0; i<num_result_sets; i++)
1443 res = rpn_search_APT_position(zh, zapt, attributeSet,
1445 rset_nmem, &first_set,
1447 if (res != ZEBRA_OK)
1449 for (i = 0; i<num_result_sets; i++)
1450 rset_delete(result_sets[i]);
1458 tmp_set[0] = first_set;
1459 tmp_set[1] = result_sets[i];
1461 result_sets[i] = rset_create_prox(
1462 rset_nmem, kc, kc->scope,
1464 1 /* ordered */, 0 /* exclusion */,
1465 3 /* relation */, 1 /* distance */);
1468 if (num_result_sets == 0)
1469 *rset = rset_create_null(rset_nmem, kc, 0);
1470 else if (num_result_sets == 1)
1471 *rset = result_sets[0];
1473 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1474 num_result_sets, result_sets);
1480 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1481 Z_AttributesPlusTerm *zapt,
1482 const char *termz_org,
1483 const Odr_oid *attributeSet,
1485 const char *index_type,
1487 const char *rank_type,
1488 const char *xpath_use,
1491 struct rset_key_control *kc)
1493 RSET *result_sets = 0;
1494 int num_result_sets = 0;
1497 term_list_trunc(zh, zapt, termz_org, attributeSet,
1498 stream, index_type, complete_flag,
1499 rank_type, xpath_use,
1501 &result_sets, &num_result_sets,
1503 if (res != ZEBRA_OK)
1505 for (i = 0; i<num_result_sets; i++)
1508 res = rpn_search_APT_position(zh, zapt, attributeSet,
1510 rset_nmem, &first_set,
1512 if (res != ZEBRA_OK)
1514 for (i = 0; i<num_result_sets; i++)
1515 rset_delete(result_sets[i]);
1523 tmp_set[0] = first_set;
1524 tmp_set[1] = result_sets[i];
1526 result_sets[i] = rset_create_prox(
1527 rset_nmem, kc, kc->scope,
1529 1 /* ordered */, 0 /* exclusion */,
1530 3 /* relation */, 1 /* distance */);
1535 if (num_result_sets == 0)
1536 *rset = rset_create_null(rset_nmem, kc, 0);
1537 else if (num_result_sets == 1)
1538 *rset = result_sets[0];
1540 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1541 num_result_sets, result_sets);
1547 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1548 const char **term_sub,
1550 const Odr_oid *attributeSet,
1551 struct grep_info *grep_info,
1553 const char *index_type,
1561 WRBUF term_num = wrbuf_alloc();
1564 attr_init_APT(&relation, zapt, 2);
1565 relation_value = attr_find(&relation, NULL);
1567 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1569 switch (relation_value)
1572 yaz_log(log_level_rpn, "Relation <");
1573 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1576 wrbuf_destroy(term_num);
1579 term_value = atoi(wrbuf_cstr(term_num));
1580 gen_regular_rel(term_dict, term_value-1, 1);
1583 yaz_log(log_level_rpn, "Relation <=");
1584 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1587 wrbuf_destroy(term_num);
1590 term_value = atoi(wrbuf_cstr(term_num));
1591 gen_regular_rel(term_dict, term_value, 1);
1594 yaz_log(log_level_rpn, "Relation >=");
1595 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1598 wrbuf_destroy(term_num);
1601 term_value = atoi(wrbuf_cstr(term_num));
1602 gen_regular_rel(term_dict, term_value, 0);
1605 yaz_log(log_level_rpn, "Relation >");
1606 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1609 wrbuf_destroy(term_num);
1612 term_value = atoi(wrbuf_cstr(term_num));
1613 gen_regular_rel(term_dict, term_value+1, 0);
1617 yaz_log(log_level_rpn, "Relation =");
1618 if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1621 wrbuf_destroy(term_num);
1624 term_value = atoi(wrbuf_cstr(term_num));
1625 wrbuf_printf(term_dict, "(0*%d)", term_value);
1628 /* term_tmp untouched.. */
1629 while (**term_sub != '\0')
1633 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1634 wrbuf_destroy(term_num);
1637 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1638 0, grep_info, max_pos, 0, grep_handle);
1641 zebra_set_partial_result(zh);
1643 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1644 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1645 wrbuf_destroy(term_num);
1649 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1650 const char **term_sub,
1652 const Odr_oid *attributeSet, NMEM stream,
1653 struct grep_info *grep_info,
1654 const char *index_type, int complete_flag,
1656 const char *xpath_use,
1657 struct ord_list **ol)
1660 struct rpn_char_map_info rcmi;
1662 int relation_error = 0;
1663 int ord, ord_len, i;
1666 *ol = ord_list_create(stream);
1668 rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
1672 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1673 attributeSet, &ord) != ZEBRA_OK)
1678 wrbuf_rewind(term_dict);
1680 *ol = ord_list_append(stream, *ol, ord);
1682 ord_len = key_SU_encode(ord, ord_buf);
1684 wrbuf_putc(term_dict, '(');
1685 for (i = 0; i < ord_len; i++)
1687 wrbuf_putc(term_dict, 1);
1688 wrbuf_putc(term_dict, ord_buf[i]);
1690 wrbuf_putc(term_dict, ')');
1692 if (!numeric_relation(zh, zapt, &termp, term_dict,
1693 attributeSet, grep_info, &max_pos, index_type,
1694 term_dst, &relation_error))
1698 zebra_setError(zh, relation_error, 0);
1705 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1710 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1711 Z_AttributesPlusTerm *zapt,
1713 const Odr_oid *attributeSet,
1715 const char *index_type,
1717 const char *rank_type,
1718 const char *xpath_use,
1721 struct rset_key_control *kc)
1723 char term_dst[IT_MAX_WORD+1];
1724 const char *termp = termz;
1725 RSET *result_sets = 0;
1726 int num_result_sets = 0;
1728 struct grep_info grep_info;
1730 zint hits_limit_value;
1731 const char *term_ref_id_str = 0;
1733 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1735 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1736 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1740 struct ord_list *ol;
1741 WRBUF term_dict = wrbuf_alloc();
1742 if (alloc_sets == num_result_sets)
1745 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1748 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1749 alloc_sets = alloc_sets + add;
1752 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1753 grep_info.isam_p_indx = 0;
1754 res = numeric_term(zh, zapt, &termp, term_dict,
1755 attributeSet, stream, &grep_info,
1756 index_type, complete_flag,
1757 term_dst, xpath_use, &ol);
1758 wrbuf_destroy(term_dict);
1759 if (res == ZEBRA_FAIL || termp == 0)
1761 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1762 result_sets[num_result_sets] =
1763 rset_trunc(zh, grep_info.isam_p_buf,
1764 grep_info.isam_p_indx, term_dst,
1765 strlen(term_dst), rank_type,
1766 0 /* preserve position */,
1767 zapt->term->which, rset_nmem,
1768 kc, kc->scope, ol, index_type,
1771 if (!result_sets[num_result_sets])
1777 grep_info_delete(&grep_info);
1779 if (res != ZEBRA_OK)
1781 if (num_result_sets == 0)
1782 *rset = rset_create_null(rset_nmem, kc, 0);
1783 else if (num_result_sets == 1)
1784 *rset = result_sets[0];
1786 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1787 num_result_sets, result_sets);
1793 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1794 Z_AttributesPlusTerm *zapt,
1796 const Odr_oid *attributeSet,
1798 const char *rank_type, NMEM rset_nmem,
1800 struct rset_key_control *kc)
1803 zint sysno = atozint(termz);
1807 rec = rec_get(zh->reg->records, sysno);
1815 *rset = rset_create_null(rset_nmem, kc, 0);
1821 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1822 res_get(zh->res, "setTmpDir"), 0);
1823 rsfd = rset_open(*rset, RSETF_WRITE);
1828 rset_write(rsfd, &key);
1834 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1835 const Odr_oid *attributeSet, NMEM stream,
1836 Z_SortKeySpecList *sort_sequence,
1837 const char *rank_type,
1840 struct rset_key_control *kc)
1843 int sort_relation_value;
1844 AttrType sort_relation_type;
1849 attr_init_APT(&sort_relation_type, zapt, 7);
1850 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1852 if (!sort_sequence->specs)
1854 sort_sequence->num_specs = 10;
1855 sort_sequence->specs = (Z_SortKeySpec **)
1856 nmem_malloc(stream, sort_sequence->num_specs *
1857 sizeof(*sort_sequence->specs));
1858 for (i = 0; i<sort_sequence->num_specs; i++)
1859 sort_sequence->specs[i] = 0;
1861 if (zapt->term->which != Z_Term_general)
1864 i = atoi_n((char *) zapt->term->u.general->buf,
1865 zapt->term->u.general->len);
1866 if (i >= sort_sequence->num_specs)
1868 sprintf(termz, "%d", i);
1870 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1871 sks->sortElement = (Z_SortElement *)
1872 nmem_malloc(stream, sizeof(*sks->sortElement));
1873 sks->sortElement->which = Z_SortElement_generic;
1874 sk = sks->sortElement->u.generic = (Z_SortKey *)
1875 nmem_malloc(stream, sizeof(*sk));
1876 sk->which = Z_SortKey_sortAttributes;
1877 sk->u.sortAttributes = (Z_SortAttributes *)
1878 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1880 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1881 sk->u.sortAttributes->list = zapt->attributes;
1883 sks->sortRelation = (int *)
1884 nmem_malloc(stream, sizeof(*sks->sortRelation));
1885 if (sort_relation_value == 1)
1886 *sks->sortRelation = Z_SortKeySpec_ascending;
1887 else if (sort_relation_value == 2)
1888 *sks->sortRelation = Z_SortKeySpec_descending;
1890 *sks->sortRelation = Z_SortKeySpec_ascending;
1892 sks->caseSensitivity = (int *)
1893 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1894 *sks->caseSensitivity = 0;
1896 sks->which = Z_SortKeySpec_null;
1897 sks->u.null = odr_nullval ();
1898 sort_sequence->specs[i] = sks;
1899 *rset = rset_create_null(rset_nmem, kc, 0);
1904 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1905 const Odr_oid *attributeSet,
1906 struct xpath_location_step *xpath, int max,
1909 const Odr_oid *curAttributeSet = attributeSet;
1911 const char *use_string = 0;
1913 attr_init_APT(&use, zapt, 1);
1914 attr_find_ex(&use, &curAttributeSet, &use_string);
1916 if (!use_string || *use_string != '/')
1919 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1924 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1925 const char *index_type, const char *term,
1926 const char *xpath_use,
1928 struct rset_key_control *kc)
1930 struct grep_info grep_info;
1931 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1932 zinfo_index_category_index,
1933 index_type, xpath_use);
1934 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1935 return rset_create_null(rset_nmem, kc, 0);
1938 return rset_create_null(rset_nmem, kc, 0);
1944 WRBUF term_dict = wrbuf_alloc();
1945 int ord_len = key_SU_encode(ord, ord_buf);
1946 int term_type = Z_Term_characterString;
1947 const char *flags = "void";
1949 wrbuf_putc(term_dict, '(');
1950 for (i = 0; i<ord_len; i++)
1952 wrbuf_putc(term_dict, 1);
1953 wrbuf_putc(term_dict, ord_buf[i]);
1955 wrbuf_putc(term_dict, ')');
1956 wrbuf_puts(term_dict, term);
1958 grep_info.isam_p_indx = 0;
1959 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1960 &grep_info, &max_pos, 0, grep_handle);
1961 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1962 grep_info.isam_p_indx);
1963 rset = rset_trunc(zh, grep_info.isam_p_buf,
1964 grep_info.isam_p_indx, term, strlen(term),
1965 flags, 1, term_type, rset_nmem,
1966 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1967 0 /* term_ref_id_str */);
1968 grep_info_delete(&grep_info);
1969 wrbuf_destroy(term_dict);
1975 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1976 NMEM stream, const char *rank_type, RSET rset,
1977 int xpath_len, struct xpath_location_step *xpath,
1980 struct rset_key_control *kc)
1983 int always_matches = rset ? 0 : 1;
1991 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1992 for (i = 0; i<xpath_len; i++)
1994 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2006 a[@attr = value]/b[@other = othervalue]
2008 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2009 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2010 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2011 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2012 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2013 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2017 dict_grep_cmap(zh->reg->dict, 0, 0);
2020 int level = xpath_len;
2023 while (--level >= 0)
2025 WRBUF xpath_rev = wrbuf_alloc();
2027 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2029 for (i = level; i >= 1; --i)
2031 const char *cp = xpath[i].part;
2037 wrbuf_puts(xpath_rev, "[^/]*");
2038 else if (*cp == ' ')
2039 wrbuf_puts(xpath_rev, "\001 ");
2041 wrbuf_putc(xpath_rev, *cp);
2043 /* wrbuf_putc does not null-terminate , but
2044 wrbuf_puts below ensures it does.. so xpath_rev
2045 is OK iff length is > 0 */
2047 wrbuf_puts(xpath_rev, "/");
2049 else if (i == 1) /* // case */
2050 wrbuf_puts(xpath_rev, ".*");
2052 if (xpath[level].predicate &&
2053 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2054 xpath[level].predicate->u.relation.name[0])
2056 WRBUF wbuf = wrbuf_alloc();
2057 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2058 if (xpath[level].predicate->u.relation.value)
2060 const char *cp = xpath[level].predicate->u.relation.value;
2061 wrbuf_putc(wbuf, '=');
2065 if (strchr(REGEX_CHARS, *cp))
2066 wrbuf_putc(wbuf, '\\');
2067 wrbuf_putc(wbuf, *cp);
2071 rset_attr = xpath_trunc(
2072 zh, stream, "0", wrbuf_cstr(wbuf),
2073 ZEBRA_XPATH_ATTR_NAME,
2075 wrbuf_destroy(wbuf);
2081 wrbuf_destroy(xpath_rev);
2085 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2086 wrbuf_cstr(xpath_rev));
2087 if (wrbuf_len(xpath_rev))
2089 rset_start_tag = xpath_trunc(zh, stream, "0",
2090 wrbuf_cstr(xpath_rev),
2091 ZEBRA_XPATH_ELM_BEGIN,
2094 rset = rset_start_tag;
2097 rset_end_tag = xpath_trunc(zh, stream, "0",
2098 wrbuf_cstr(xpath_rev),
2099 ZEBRA_XPATH_ELM_END,
2102 rset = rset_create_between(rset_nmem, kc, kc->scope,
2103 rset_start_tag, rset,
2104 rset_end_tag, rset_attr);
2107 wrbuf_destroy(xpath_rev);
2115 #define MAX_XPATH_STEPS 10
2117 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2118 Z_AttributesPlusTerm *zapt,
2119 const Odr_oid *attributeSet, NMEM stream,
2120 Z_SortKeySpecList *sort_sequence,
2123 struct rset_key_control *kc);
2125 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2126 const Odr_oid *attributeSet, NMEM stream,
2127 Z_SortKeySpecList *sort_sequence,
2128 int num_bases, char **basenames,
2131 struct rset_key_control *kc)
2133 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2134 ZEBRA_RES res = ZEBRA_OK;
2136 for (i = 0; i < num_bases; i++)
2139 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2141 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2146 res = rpn_search_database(zh, zapt, attributeSet, stream,
2148 rset_nmem, rsets+i, kc);
2149 if (res != ZEBRA_OK)
2152 if (res != ZEBRA_OK)
2153 { /* must clean up the already created sets */
2155 rset_delete(rsets[i]);
2162 else if (num_bases == 0)
2163 *rset = rset_create_null(rset_nmem, kc, 0);
2165 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2171 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2172 Z_AttributesPlusTerm *zapt,
2173 const Odr_oid *attributeSet, NMEM stream,
2174 Z_SortKeySpecList *sort_sequence,
2177 struct rset_key_control *kc)
2179 ZEBRA_RES res = ZEBRA_OK;
2180 const char *index_type;
2181 char *search_type = NULL;
2182 char rank_type[128];
2185 char termz[IT_MAX_WORD+1];
2187 const char *xpath_use = 0;
2188 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2192 log_level_rpn = yaz_log_module_level("rpn");
2195 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2196 rank_type, &complete_flag, &sort_flag);
2198 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2199 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2200 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2201 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2203 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2207 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2208 rank_type, rset_nmem, rset, kc);
2209 /* consider if an X-Path query is used */
2210 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2211 xpath, MAX_XPATH_STEPS, stream);
2214 if (xpath[xpath_len-1].part[0] == '@')
2215 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2217 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2224 attr_init_APT(&relation, zapt, 2);
2225 relation_value = attr_find(&relation, NULL);
2227 if (relation_value == 103) /* alwaysmatches */
2229 *rset = 0; /* signal no "term" set */
2230 return rpn_search_xpath(zh, stream, rank_type, *rset,
2231 xpath_len, xpath, rset_nmem, rset, kc);
2236 /* search using one of the various search type strategies
2237 termz is our UTF-8 search term
2238 attributeSet is top-level default attribute set
2239 stream is ODR for search
2240 reg_id is the register type
2241 complete_flag is 1 for complete subfield, 0 for incomplete
2242 xpath_use is use-attribute to be used for X-Path search, 0 for none
2244 if (!strcmp(search_type, "phrase"))
2246 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2247 index_type, complete_flag, rank_type,
2252 else if (!strcmp(search_type, "and-list"))
2254 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2255 index_type, complete_flag, rank_type,
2260 else if (!strcmp(search_type, "or-list"))
2262 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2263 index_type, complete_flag, rank_type,
2268 else if (!strcmp(search_type, "local"))
2270 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2271 rank_type, rset_nmem, rset, kc);
2273 else if (!strcmp(search_type, "numeric"))
2275 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2276 index_type, complete_flag, rank_type,
2283 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2286 if (res != ZEBRA_OK)
2290 return rpn_search_xpath(zh, stream, rank_type, *rset,
2291 xpath_len, xpath, rset_nmem, rset, kc);
2294 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2295 const Odr_oid *attributeSet,
2296 NMEM stream, NMEM rset_nmem,
2297 Z_SortKeySpecList *sort_sequence,
2298 int num_bases, char **basenames,
2299 RSET **result_sets, int *num_result_sets,
2300 Z_Operator *parent_op,
2301 struct rset_key_control *kc);
2303 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2306 ZEBRA_RES res = ZEBRA_OK;
2307 if (zs->which == Z_RPNStructure_complex)
2309 if (res == ZEBRA_OK)
2310 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2312 if (res == ZEBRA_OK)
2313 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2316 else if (zs->which == Z_RPNStructure_simple)
2318 if (zs->u.simple->which == Z_Operand_APT)
2320 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2321 AttrType global_hits_limit_attr;
2324 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2326 l = attr_find(&global_hits_limit_attr, NULL);
2334 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2335 const Odr_oid *attributeSet,
2336 NMEM stream, NMEM rset_nmem,
2337 Z_SortKeySpecList *sort_sequence,
2338 int num_bases, char **basenames,
2341 RSET *result_sets = 0;
2342 int num_result_sets = 0;
2344 struct rset_key_control *kc = zebra_key_control_create(zh);
2346 res = rpn_search_structure(zh, zs, attributeSet,
2349 num_bases, basenames,
2350 &result_sets, &num_result_sets,
2351 0 /* no parent op */,
2353 if (res != ZEBRA_OK)
2356 for (i = 0; i<num_result_sets; i++)
2357 rset_delete(result_sets[i]);
2362 assert(num_result_sets == 1);
2363 assert(result_sets);
2364 assert(*result_sets);
2365 *result_set = *result_sets;
2371 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2372 const Odr_oid *attributeSet,
2373 NMEM stream, NMEM rset_nmem,
2374 Z_SortKeySpecList *sort_sequence,
2375 int num_bases, char **basenames,
2376 RSET **result_sets, int *num_result_sets,
2377 Z_Operator *parent_op,
2378 struct rset_key_control *kc)
2380 *num_result_sets = 0;
2381 if (zs->which == Z_RPNStructure_complex)
2384 Z_Operator *zop = zs->u.complex->roperator;
2385 RSET *result_sets_l = 0;
2386 int num_result_sets_l = 0;
2387 RSET *result_sets_r = 0;
2388 int num_result_sets_r = 0;
2390 res = rpn_search_structure(zh, zs->u.complex->s1,
2391 attributeSet, stream, rset_nmem,
2393 num_bases, basenames,
2394 &result_sets_l, &num_result_sets_l,
2396 if (res != ZEBRA_OK)
2399 for (i = 0; i<num_result_sets_l; i++)
2400 rset_delete(result_sets_l[i]);
2403 res = rpn_search_structure(zh, zs->u.complex->s2,
2404 attributeSet, stream, rset_nmem,
2406 num_bases, basenames,
2407 &result_sets_r, &num_result_sets_r,
2409 if (res != ZEBRA_OK)
2412 for (i = 0; i<num_result_sets_l; i++)
2413 rset_delete(result_sets_l[i]);
2414 for (i = 0; i<num_result_sets_r; i++)
2415 rset_delete(result_sets_r[i]);
2419 /* make a new list of result for all children */
2420 *num_result_sets = num_result_sets_l + num_result_sets_r;
2421 *result_sets = nmem_malloc(stream, *num_result_sets *
2422 sizeof(**result_sets));
2423 memcpy(*result_sets, result_sets_l,
2424 num_result_sets_l * sizeof(**result_sets));
2425 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2426 num_result_sets_r * sizeof(**result_sets));
2428 if (!parent_op || parent_op->which != zop->which
2429 || (zop->which != Z_Operator_and &&
2430 zop->which != Z_Operator_or))
2432 /* parent node different from this one (or non-present) */
2433 /* we must combine result sets now */
2437 case Z_Operator_and:
2438 rset = rset_create_and(rset_nmem, kc,
2440 *num_result_sets, *result_sets);
2443 rset = rset_create_or(rset_nmem, kc,
2444 kc->scope, 0, /* termid */
2445 *num_result_sets, *result_sets);
2447 case Z_Operator_and_not:
2448 rset = rset_create_not(rset_nmem, kc,
2453 case Z_Operator_prox:
2454 if (zop->u.prox->which != Z_ProximityOperator_known)
2457 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2461 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2463 zebra_setError_zint(zh,
2464 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2465 *zop->u.prox->u.known);
2470 rset = rset_create_prox(rset_nmem, kc,
2472 *num_result_sets, *result_sets,
2473 *zop->u.prox->ordered,
2474 (!zop->u.prox->exclusion ?
2475 0 : *zop->u.prox->exclusion),
2476 *zop->u.prox->relationType,
2477 *zop->u.prox->distance );
2481 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2484 *num_result_sets = 1;
2485 *result_sets = nmem_malloc(stream, *num_result_sets *
2486 sizeof(**result_sets));
2487 (*result_sets)[0] = rset;
2490 else if (zs->which == Z_RPNStructure_simple)
2495 if (zs->u.simple->which == Z_Operand_APT)
2497 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2498 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2499 attributeSet, stream, sort_sequence,
2500 num_bases, basenames, rset_nmem, &rset,
2502 if (res != ZEBRA_OK)
2505 else if (zs->u.simple->which == Z_Operand_resultSetId)
2507 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2508 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2512 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2513 zs->u.simple->u.resultSetId);
2520 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2523 *num_result_sets = 1;
2524 *result_sets = nmem_malloc(stream, *num_result_sets *
2525 sizeof(**result_sets));
2526 (*result_sets)[0] = rset;
2530 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2541 * indent-tabs-mode: nil
2543 * vim: shiftwidth=4 tabstop=8 expandtab