1 /* This file is part of the Zebra server.
2 Copyright (C) Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k < in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 /* ICU sort keys seem to be of the form
238 basechars \x01 accents \x01 length
239 For now we'll just right truncate from basechars . This
240 may give false hits due to accents not being used.
242 static size_t icu_basechars(const char *buf, size_t i)
244 while (i > 0 && buf[--i] != '\x01') /* skip length */
246 while (i > 0 && buf[--i] != '\x01') /* skip accents */
248 return i; /* only basechars left */
251 static int term_102_icu(zebra_map_t zm,
252 const char **src, WRBUF term_dict, int space_split,
256 const char *s0 = *src, *s1;
262 if (*s1 == ' ' && space_split)
264 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
268 /* EOF or regex reserved char */
271 const char *res_buf = 0;
273 const char *display_buf;
276 zebra_map_tokenize_start(zm, s0, s1 - s0);
278 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279 &display_buf, &display_len))
282 res_len = icu_basechars(res_buf, res_len);
283 for (i = 0; i < res_len; i++)
285 if (strchr(REGEX_CHARS "\\", res_buf[i]))
286 wrbuf_putc(term_dict, '\\');
288 wrbuf_putc(term_dict, '\x01');
290 wrbuf_putc(term_dict, res_buf[i]);
292 wrbuf_write(display_term, display_buf, display_len);
300 wrbuf_putc(term_dict, *s1);
301 wrbuf_putc(display_term, *s1);
308 wrbuf_puts(term_dict, "\x01\x01.*");
313 static int term_100_icu(zebra_map_t zm,
314 const char **src, WRBUF term_dict,
320 const char *res_buf = 0;
322 const char *display_buf;
325 zebra_map_tokenize_start(zm, *src, strlen(*src));
326 for (i = 0; i <= token_number; i++)
328 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
329 &display_buf, &display_len))
332 wrbuf_write(display_term, display_buf, display_len);
335 res_len = icu_basechars(res_buf, res_len);
338 wrbuf_puts(term_dict, ".*");
339 for (i = 0; i < res_len; i++)
341 if (strchr(REGEX_CHARS "\\", res_buf[i]))
342 wrbuf_putc(term_dict, '\\');
344 wrbuf_putc(term_dict, '\x01');
346 wrbuf_putc(term_dict, res_buf[i]);
349 wrbuf_puts(term_dict, ".*");
351 wrbuf_puts(term_dict, "\x01\x01.*");
355 /* term_100: handle term, where trunc = none(no operators at all) */
356 static int term_100(zebra_map_t zm,
357 const char **src, WRBUF term_dict, int space_split,
364 const char *space_start = 0;
365 const char *space_end = 0;
367 if (!term_pre(zm, src, 0, !space_split))
374 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
377 if (**map == *CHR_SPACE)
380 else /* complete subfield only. */
382 if (**map == *CHR_SPACE)
383 { /* save space mapping for later .. */
388 else if (space_start)
389 { /* reload last space */
390 while (space_start < space_end)
392 if (strchr(REGEX_CHARS, *space_start))
393 wrbuf_putc(term_dict, '\\');
394 wrbuf_putc(display_term, *space_start);
395 wrbuf_putc(term_dict, *space_start);
400 space_start = space_end = 0;
405 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
411 /* term_101: handle term, where trunc = Process # */
412 static int term_101(zebra_map_t zm,
413 const char **src, WRBUF term_dict, int space_split,
420 if (!term_pre(zm, src, "#", !space_split))
428 wrbuf_puts(term_dict, ".*");
429 wrbuf_putc(display_term, *s0);
436 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
437 if (space_split && **map == *CHR_SPACE)
441 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
448 /* term_103: handle term, where trunc = re-2 (regular expressions) */
449 static int term_103(zebra_map_t zm, const char **src,
450 WRBUF term_dict, int *errors, int space_split,
457 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
460 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
461 isdigit(((const unsigned char *)s0)[1]))
463 *errors = s0[1] - '0';
470 if (strchr("^\\()[].*+?|-", *s0))
472 wrbuf_putc(display_term, *s0);
473 wrbuf_putc(term_dict, *s0);
481 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
482 if (space_split && **map == *CHR_SPACE)
486 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
494 /* term_103: handle term, where trunc = re-1 (regular expressions) */
495 static int term_102(zebra_map_t zm, const char **src,
496 WRBUF term_dict, int space_split, WRBUF display_term)
498 return term_103(zm, src, term_dict, NULL, space_split, display_term);
502 /* term_104: handle term, process ?n * # */
503 static int term_104(zebra_map_t zm, const char **src,
504 WRBUF term_dict, int space_split, WRBUF display_term)
510 if (!term_pre(zm, src, "?*#", !space_split))
518 wrbuf_putc(display_term, *s0);
520 if (*s0 >= '0' && *s0 <= '9')
523 while (*s0 >= '0' && *s0 <= '9')
525 limit = limit * 10 + (*s0 - '0');
526 wrbuf_putc(display_term, *s0);
533 wrbuf_puts(term_dict, ".?");
538 wrbuf_puts(term_dict, ".*");
544 wrbuf_puts(term_dict, ".*");
545 wrbuf_putc(display_term, *s0);
551 wrbuf_puts(term_dict, ".");
552 wrbuf_putc(display_term, *s0);
559 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
560 if (space_split && **map == *CHR_SPACE)
564 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
571 /* term_105/106: handle term, process * ! and possibly right_truncate */
572 static int term_105(zebra_map_t zm, const char **src,
573 WRBUF term_dict, int space_split,
574 WRBUF display_term, int right_truncate)
580 if (!term_pre(zm, src, "\\*!", !space_split))
588 wrbuf_puts(term_dict, ".*");
589 wrbuf_putc(display_term, *s0);
595 wrbuf_putc(term_dict, '.');
596 wrbuf_putc(display_term, *s0);
599 else if (*s0 == '\\')
602 wrbuf_puts(term_dict, "\\\\");
603 wrbuf_putc(display_term, *s0);
610 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
611 if (space_split && **map == *CHR_SPACE)
615 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
619 wrbuf_puts(term_dict, ".*");
625 /* gen_regular_rel - generate regular expression from relation
626 * val: border value (inclusive)
627 * islt: 1 if <=; 0 if >=.
629 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
631 char dst_buf[20*5*20]; /* assuming enough for expansion */
638 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
642 strcpy(dst, "(-[0-9]+|(");
650 strcpy(dst, "([0-9]+|-(");
661 sprintf(numstr, "%d", val);
662 for (w = strlen(numstr); --w >= 0; pos++)
681 strcpy(dst + dst_p, numstr);
682 dst_p = strlen(dst) - pos - 1;
710 for (i = 0; i < pos; i++)
723 /* match everything less than 10^(pos-1) */
725 for (i = 1; i < pos; i++)
726 strcat(dst, "[0-9]?");
730 /* match everything greater than 10^pos */
731 for (i = 0; i <= pos; i++)
732 strcat(dst, "[0-9]");
733 strcat(dst, "[0-9]*");
736 wrbuf_puts(term_dict, dst);
739 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
741 const char *src = wrbuf_cstr(wsrc);
742 if (src[*indx] == '\\')
744 wrbuf_putc(term_p, src[*indx]);
747 wrbuf_putc(term_p, src[*indx]);
752 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
753 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
754 * >= abc ([b-].*|a[c-].*|ab[c-].*)
755 * ([^-a].*|a[^-b].*|ab[c-].*)
756 * < abc ([-0].*|a[-a].*|ab[-b].*)
757 * ([^a-].*|a[^b-].*|ab[^c-].*)
758 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
759 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
761 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
762 const char **term_sub, WRBUF term_dict,
763 const Odr_oid *attributeSet,
764 zebra_map_t zm, int space_split,
771 WRBUF term_component = wrbuf_alloc();
773 attr_init_APT(&relation, zapt, 2);
774 relation_value = attr_find(&relation, NULL);
777 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
778 switch (relation_value)
781 if (!term_100(zm, term_sub, term_component, space_split, display_term))
783 wrbuf_destroy(term_component);
786 yaz_log(log_level_rpn, "Relation <");
788 wrbuf_putc(term_dict, '(');
789 for (i = 0; i < wrbuf_len(term_component); )
794 wrbuf_putc(term_dict, '|');
796 string_rel_add_char(term_dict, term_component, &j);
798 wrbuf_putc(term_dict, '[');
800 wrbuf_putc(term_dict, '^');
802 wrbuf_putc(term_dict, 1);
803 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
805 string_rel_add_char(term_dict, term_component, &i);
806 wrbuf_putc(term_dict, '-');
808 wrbuf_putc(term_dict, ']');
809 wrbuf_putc(term_dict, '.');
810 wrbuf_putc(term_dict, '*');
812 wrbuf_putc(term_dict, ')');
815 if (!term_100(zm, term_sub, term_component, space_split, display_term))
817 wrbuf_destroy(term_component);
820 yaz_log(log_level_rpn, "Relation <=");
822 wrbuf_putc(term_dict, '(');
823 for (i = 0; i < wrbuf_len(term_component); )
828 string_rel_add_char(term_dict, term_component, &j);
829 wrbuf_putc(term_dict, '[');
831 wrbuf_putc(term_dict, '^');
833 wrbuf_putc(term_dict, 1);
834 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
836 string_rel_add_char(term_dict, term_component, &i);
837 wrbuf_putc(term_dict, '-');
839 wrbuf_putc(term_dict, ']');
840 wrbuf_putc(term_dict, '.');
841 wrbuf_putc(term_dict, '*');
843 wrbuf_putc(term_dict, '|');
845 for (i = 0; i < wrbuf_len(term_component); )
846 string_rel_add_char(term_dict, term_component, &i);
847 wrbuf_putc(term_dict, ')');
850 if (!term_100(zm, term_sub, term_component, space_split, display_term))
852 wrbuf_destroy(term_component);
855 yaz_log(log_level_rpn, "Relation >");
857 wrbuf_putc(term_dict, '(');
858 for (i = 0; i < wrbuf_len(term_component); )
863 string_rel_add_char(term_dict, term_component, &j);
864 wrbuf_putc(term_dict, '[');
866 wrbuf_putc(term_dict, '^');
867 wrbuf_putc(term_dict, '-');
868 string_rel_add_char(term_dict, term_component, &i);
870 wrbuf_putc(term_dict, ']');
871 wrbuf_putc(term_dict, '.');
872 wrbuf_putc(term_dict, '*');
874 wrbuf_putc(term_dict, '|');
876 for (i = 0; i < wrbuf_len(term_component); )
877 string_rel_add_char(term_dict, term_component, &i);
878 wrbuf_putc(term_dict, '.');
879 wrbuf_putc(term_dict, '+');
880 wrbuf_putc(term_dict, ')');
883 if (!term_100(zm, term_sub, term_component, space_split, display_term))
885 wrbuf_destroy(term_component);
888 yaz_log(log_level_rpn, "Relation >=");
890 wrbuf_putc(term_dict, '(');
891 for (i = 0; i < wrbuf_len(term_component); )
896 wrbuf_putc(term_dict, '|');
898 string_rel_add_char(term_dict, term_component, &j);
899 wrbuf_putc(term_dict, '[');
901 if (i < wrbuf_len(term_component)-1)
903 wrbuf_putc(term_dict, '^');
904 wrbuf_putc(term_dict, '-');
905 string_rel_add_char(term_dict, term_component, &i);
909 string_rel_add_char(term_dict, term_component, &i);
910 wrbuf_putc(term_dict, '-');
912 wrbuf_putc(term_dict, ']');
913 wrbuf_putc(term_dict, '.');
914 wrbuf_putc(term_dict, '*');
916 wrbuf_putc(term_dict, ')');
923 yaz_log(log_level_rpn, "Relation =");
924 if (!term_100(zm, term_sub, term_component, space_split, display_term))
926 wrbuf_destroy(term_component);
929 wrbuf_puts(term_dict, "(");
930 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
931 wrbuf_puts(term_dict, ")");
934 yaz_log(log_level_rpn, "Relation always matches");
935 /* skip to end of term (we don't care what it is) */
936 while (**term_sub != '\0')
940 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
941 wrbuf_destroy(term_component);
944 wrbuf_destroy(term_component);
948 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
949 const char **term_sub,
951 const Odr_oid *attributeSet, NMEM stream,
952 struct grep_info *grep_info,
953 const char *index_type, int complete_flag,
955 const char *xpath_use,
956 struct ord_list **ol,
957 zebra_map_t zm, size_t token_number);
959 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
960 Z_AttributesPlusTerm *zapt,
961 zint *hits_limit_value,
962 const char **term_ref_id_str,
965 AttrType term_ref_id_attr;
966 AttrType hits_limit_attr;
968 zint hits_limit_from_attr;
970 attr_init_APT(&hits_limit_attr, zapt, 11);
971 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
973 attr_init_APT(&term_ref_id_attr, zapt, 10);
974 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
975 if (term_ref_id_int >= 0)
977 char *res = nmem_malloc(nmem, 20);
978 sprintf(res, "%d", term_ref_id_int);
979 *term_ref_id_str = res;
981 if (hits_limit_from_attr != -1)
982 *hits_limit_value = hits_limit_from_attr;
984 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
985 *term_ref_id_str ? *term_ref_id_str : "none",
990 /** \brief search for term (which may be truncated)
992 static ZEBRA_RES search_term(ZebraHandle zh,
993 Z_AttributesPlusTerm *zapt,
994 const char **term_sub,
995 const Odr_oid *attributeSet,
996 zint hits_limit, NMEM stream,
997 struct grep_info *grep_info,
998 const char *index_type, int complete_flag,
999 const char *rank_type,
1000 const char *xpath_use,
1003 struct rset_key_control *kc,
1005 size_t token_number)
1008 struct ord_list *ol;
1009 zint hits_limit_value = hits_limit;
1010 const char *term_ref_id_str = 0;
1011 WRBUF term_dict = wrbuf_alloc();
1012 WRBUF display_term = wrbuf_alloc();
1014 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1016 grep_info->isam_p_indx = 0;
1017 res = string_term(zh, zapt, term_sub, term_dict,
1018 attributeSet, stream, grep_info,
1019 index_type, complete_flag,
1020 display_term, xpath_use, &ol, zm, token_number);
1021 wrbuf_destroy(term_dict);
1022 if (res == ZEBRA_OK && *term_sub)
1024 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1025 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1026 grep_info->isam_p_indx, wrbuf_buf(display_term),
1027 wrbuf_len(display_term), rank_type,
1028 1 /* preserve pos */,
1029 zapt->term->which, rset_nmem,
1030 kc, kc->scope, ol, index_type, hits_limit_value,
1035 wrbuf_destroy(display_term);
1039 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1040 const char **term_sub,
1042 const Odr_oid *attributeSet, NMEM stream,
1043 struct grep_info *grep_info,
1044 const char *index_type, int complete_flag,
1046 const char *xpath_use,
1047 struct ord_list **ol,
1048 zebra_map_t zm, size_t token_number)
1051 AttrType truncation;
1052 int truncation_value;
1054 struct rpn_char_map_info rcmi;
1056 int space_split = complete_flag ? 0 : 1;
1058 int regex_range = 0;
1059 int max_pos, prefix_len = 0;
1064 *ol = ord_list_create(stream);
1066 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1067 attr_init_APT(&truncation, zapt, 5);
1068 truncation_value = attr_find(&truncation, NULL);
1069 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1071 termp = *term_sub; /* start of term for each database */
1073 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1074 attributeSet, &ord) != ZEBRA_OK)
1080 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1082 *ol = ord_list_append(stream, *ol, ord);
1083 ord_len = key_SU_encode(ord, ord_buf);
1085 wrbuf_putc(term_dict, '(');
1087 for (i = 0; i < ord_len; i++)
1089 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1090 wrbuf_putc(term_dict, ord_buf[i]);
1092 wrbuf_putc(term_dict, ')');
1094 prefix_len = wrbuf_len(term_dict);
1096 if (zebra_maps_is_icu(zm))
1101 attr_init_APT(&relation, zapt, 2);
1102 relation_value = attr_find(&relation, NULL);
1103 if (relation_value == 103) /* always matches */
1104 termp += strlen(termp); /* move to end of term */
1105 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1108 switch (truncation_value)
1110 case -1: /* not specified */
1111 case 100: /* do not truncate */
1112 if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1119 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1125 case 1: /* right truncation */
1126 if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1133 if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1140 if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1147 zebra_setError_zint(zh,
1148 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1155 zebra_setError_zint(zh,
1156 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1163 /* non-ICU case. using string.chr and friends */
1164 switch (truncation_value)
1166 case -1: /* not specified */
1167 case 100: /* do not truncate */
1168 if (!string_relation(zh, zapt, &termp, term_dict,
1170 zm, space_split, display_term,
1175 zebra_setError(zh, relation_error, 0);
1182 case 1: /* right truncation */
1183 wrbuf_putc(term_dict, '(');
1184 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1189 wrbuf_puts(term_dict, ".*)");
1191 case 2: /* left truncation */
1192 wrbuf_puts(term_dict, "(.*");
1193 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1198 wrbuf_putc(term_dict, ')');
1200 case 3: /* left&right truncation */
1201 wrbuf_puts(term_dict, "(.*");
1202 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1207 wrbuf_puts(term_dict, ".*)");
1209 case 101: /* process # in term */
1210 wrbuf_putc(term_dict, '(');
1211 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1216 wrbuf_puts(term_dict, ")");
1218 case 102: /* Regexp-1 */
1219 wrbuf_putc(term_dict, '(');
1220 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1225 wrbuf_putc(term_dict, ')');
1227 case 103: /* Regexp-2 */
1229 wrbuf_putc(term_dict, '(');
1230 if (!term_103(zm, &termp, term_dict, ®ex_range,
1231 space_split, display_term))
1236 wrbuf_putc(term_dict, ')');
1238 case 104: /* process ?n * # term */
1239 wrbuf_putc(term_dict, '(');
1240 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1245 wrbuf_putc(term_dict, ')');
1247 case 105: /* process * ! in term and right truncate */
1248 wrbuf_putc(term_dict, '(');
1249 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1254 wrbuf_putc(term_dict, ')');
1256 case 106: /* process * ! in term */
1257 wrbuf_putc(term_dict, '(');
1258 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1263 wrbuf_putc(term_dict, ')');
1266 zebra_setError_zint(zh,
1267 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1275 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1276 esc_str(buf, sizeof(buf), input, strlen(input));
1279 WRBUF pr_wr = wrbuf_alloc();
1281 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1282 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1283 wrbuf_destroy(pr_wr);
1285 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1286 grep_info, &max_pos,
1287 ord_len /* number of "exact" chars */,
1290 zebra_set_partial_result(zh);
1292 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1294 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1300 static void grep_info_delete(struct grep_info *grep_info)
1303 xfree(grep_info->term_no);
1305 xfree(grep_info->isam_p_buf);
1308 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1309 Z_AttributesPlusTerm *zapt,
1310 struct grep_info *grep_info,
1311 const char *index_type)
1314 grep_info->term_no = 0;
1316 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1317 grep_info->isam_p_size = 0;
1318 grep_info->isam_p_buf = NULL;
1320 grep_info->index_type = index_type;
1321 grep_info->termset = 0;
1327 attr_init_APT(&truncmax, zapt, 13);
1328 truncmax_value = attr_find(&truncmax, NULL);
1329 if (truncmax_value != -1)
1330 grep_info->trunc_max = truncmax_value;
1335 int termset_value_numeric;
1336 const char *termset_value_string;
1338 attr_init_APT(&termset, zapt, 8);
1339 termset_value_numeric =
1340 attr_find_ex(&termset, NULL, &termset_value_string);
1341 if (termset_value_numeric != -1)
1344 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1348 const char *termset_name = 0;
1349 if (termset_value_numeric != -2)
1352 sprintf(resname, "%d", termset_value_numeric);
1353 termset_name = resname;
1356 termset_name = termset_value_string;
1357 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359 if (!grep_info->termset)
1361 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1370 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1371 Z_AttributesPlusTerm *zapt,
1373 const Odr_oid *attributeSet,
1376 const char *index_type, int complete_flag,
1377 const char *rank_type,
1378 const char *xpath_use,
1380 RSET **result_sets, int *num_result_sets,
1381 struct rset_key_control *kc,
1384 struct grep_info grep_info;
1385 const char *termp = termz;
1388 *num_result_sets = 0;
1389 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1395 if (alloc_sets == *num_result_sets)
1398 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1401 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1402 alloc_sets = alloc_sets + add;
1403 *result_sets = rnew;
1405 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1407 index_type, complete_flag,
1409 xpath_use, rset_nmem,
1410 &(*result_sets)[*num_result_sets],
1413 if (res != ZEBRA_OK)
1416 for (i = 0; i < *num_result_sets; i++)
1417 rset_delete((*result_sets)[i]);
1418 grep_info_delete(&grep_info);
1421 if ((*result_sets)[*num_result_sets] == 0)
1423 (*num_result_sets)++;
1428 grep_info_delete(&grep_info);
1433 \brief Create result set(s) for list of terms
1434 \param zh Zebra Handle
1435 \param zapt Attributes Plust Term (RPN leaf)
1436 \param termz term as used in query but converted to UTF-8
1437 \param attributeSet default attribute set
1438 \param stream memory for result
1439 \param index_type register type ("w", "p",..)
1440 \param complete_flag whether it's phrases or not
1441 \param rank_type term flags for ranking
1442 \param xpath_use use attribute for X-Path (-1 for no X-path)
1443 \param rset_nmem memory for result sets
1444 \param result_sets output result set for each term in list (output)
1445 \param num_result_sets number of output result sets
1446 \param kc rset key control to be used for created result sets
1448 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1449 Z_AttributesPlusTerm *zapt,
1451 const Odr_oid *attributeSet,
1454 const char *index_type, int complete_flag,
1455 const char *rank_type,
1456 const char *xpath_use,
1458 RSET **result_sets, int *num_result_sets,
1459 struct rset_key_control *kc)
1461 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1462 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1463 stream, index_type, complete_flag,
1464 rank_type, xpath_use,
1465 rset_nmem, result_sets, num_result_sets,
1470 /** \brief limit a search by position - returns result set
1472 static ZEBRA_RES search_position(ZebraHandle zh,
1473 Z_AttributesPlusTerm *zapt,
1474 const Odr_oid *attributeSet,
1475 const char *index_type,
1478 struct rset_key_control *kc)
1484 char term_dict[100];
1488 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1490 attr_init_APT(&position, zapt, 3);
1491 position_value = attr_find(&position, NULL);
1492 switch(position_value)
1501 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1507 if (!zebra_maps_is_first_in_field(zm))
1509 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1514 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1515 attributeSet, &ord) != ZEBRA_OK)
1519 ord_len = key_SU_encode(ord, ord_buf);
1520 memcpy(term_dict, ord_buf, ord_len);
1521 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1522 val = dict_lookup(zh->reg->dict, term_dict);
1525 assert(*val == sizeof(ISAM_P));
1526 memcpy(&isam_p, val+1, sizeof(isam_p));
1528 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1534 /** \brief returns result set for phrase search
1536 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1537 Z_AttributesPlusTerm *zapt,
1538 const char *termz_org,
1539 const Odr_oid *attributeSet,
1542 const char *index_type,
1544 const char *rank_type,
1545 const char *xpath_use,
1548 struct rset_key_control *kc)
1550 RSET *result_sets = 0;
1551 int num_result_sets = 0;
1553 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1554 stream, index_type, complete_flag,
1555 rank_type, xpath_use,
1557 &result_sets, &num_result_sets, kc);
1559 if (res != ZEBRA_OK)
1562 if (num_result_sets > 0)
1565 res = search_position(zh, zapt, attributeSet,
1567 rset_nmem, &first_set,
1569 if (res != ZEBRA_OK)
1572 for (i = 0; i < num_result_sets; i++)
1573 rset_delete(result_sets[i]);
1578 RSET *nsets = nmem_malloc(stream,
1579 sizeof(RSET) * (num_result_sets+1));
1580 nsets[0] = first_set;
1581 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1582 result_sets = nsets;
1586 if (num_result_sets == 0)
1587 *rset = rset_create_null(rset_nmem, kc, 0);
1588 else if (num_result_sets == 1)
1589 *rset = result_sets[0];
1591 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1592 num_result_sets, result_sets,
1593 1 /* ordered */, 0 /* exclusion */,
1594 3 /* relation */, 1 /* distance */);
1600 /** \brief returns result set for or-list search
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603 Z_AttributesPlusTerm *zapt,
1604 const char *termz_org,
1605 const Odr_oid *attributeSet,
1608 const char *index_type,
1610 const char *rank_type,
1611 const char *xpath_use,
1614 struct rset_key_control *kc)
1616 RSET *result_sets = 0;
1617 int num_result_sets = 0;
1620 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621 stream, index_type, complete_flag,
1622 rank_type, xpath_use,
1624 &result_sets, &num_result_sets, kc);
1625 if (res != ZEBRA_OK)
1628 for (i = 0; i < num_result_sets; i++)
1631 res = search_position(zh, zapt, attributeSet,
1633 rset_nmem, &first_set,
1635 if (res != ZEBRA_OK)
1637 for (i = 0; i < num_result_sets; i++)
1638 rset_delete(result_sets[i]);
1646 tmp_set[0] = first_set;
1647 tmp_set[1] = result_sets[i];
1649 result_sets[i] = rset_create_prox(
1650 rset_nmem, kc, kc->scope,
1652 1 /* ordered */, 0 /* exclusion */,
1653 3 /* relation */, 1 /* distance */);
1656 if (num_result_sets == 0)
1657 *rset = rset_create_null(rset_nmem, kc, 0);
1658 else if (num_result_sets == 1)
1659 *rset = result_sets[0];
1661 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1662 num_result_sets, result_sets);
1668 /** \brief returns result set for and-list search
1670 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1671 Z_AttributesPlusTerm *zapt,
1672 const char *termz_org,
1673 const Odr_oid *attributeSet,
1676 const char *index_type,
1678 const char *rank_type,
1679 const char *xpath_use,
1682 struct rset_key_control *kc)
1684 RSET *result_sets = 0;
1685 int num_result_sets = 0;
1688 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1689 stream, index_type, complete_flag,
1690 rank_type, xpath_use,
1692 &result_sets, &num_result_sets,
1694 if (res != ZEBRA_OK)
1696 for (i = 0; i < num_result_sets; i++)
1699 res = search_position(zh, zapt, attributeSet,
1701 rset_nmem, &first_set,
1703 if (res != ZEBRA_OK)
1705 for (i = 0; i < num_result_sets; i++)
1706 rset_delete(result_sets[i]);
1714 tmp_set[0] = first_set;
1715 tmp_set[1] = result_sets[i];
1717 result_sets[i] = rset_create_prox(
1718 rset_nmem, kc, kc->scope,
1720 1 /* ordered */, 0 /* exclusion */,
1721 3 /* relation */, 1 /* distance */);
1726 if (num_result_sets == 0)
1727 *rset = rset_create_null(rset_nmem, kc, 0);
1728 else if (num_result_sets == 1)
1729 *rset = result_sets[0];
1731 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1732 num_result_sets, result_sets);
1738 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739 const char **term_sub,
1741 const Odr_oid *attributeSet,
1742 struct grep_info *grep_info,
1752 WRBUF term_num = wrbuf_alloc();
1755 attr_init_APT(&relation, zapt, 2);
1756 relation_value = attr_find(&relation, NULL);
1758 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1760 switch (relation_value)
1763 yaz_log(log_level_rpn, "Relation <");
1764 if (!term_100(zm, term_sub, term_num, 1, display_term))
1766 wrbuf_destroy(term_num);
1769 term_value = atoi(wrbuf_cstr(term_num));
1770 gen_regular_rel(term_dict, term_value-1, 1);
1773 yaz_log(log_level_rpn, "Relation <=");
1774 if (!term_100(zm, term_sub, term_num, 1, display_term))
1776 wrbuf_destroy(term_num);
1779 term_value = atoi(wrbuf_cstr(term_num));
1780 gen_regular_rel(term_dict, term_value, 1);
1783 yaz_log(log_level_rpn, "Relation >=");
1784 if (!term_100(zm, term_sub, term_num, 1, display_term))
1786 wrbuf_destroy(term_num);
1789 term_value = atoi(wrbuf_cstr(term_num));
1790 gen_regular_rel(term_dict, term_value, 0);
1793 yaz_log(log_level_rpn, "Relation >");
1794 if (!term_100(zm, term_sub, term_num, 1, display_term))
1796 wrbuf_destroy(term_num);
1799 term_value = atoi(wrbuf_cstr(term_num));
1800 gen_regular_rel(term_dict, term_value+1, 0);
1804 yaz_log(log_level_rpn, "Relation =");
1805 if (!term_100(zm, term_sub, term_num, 1, display_term))
1807 wrbuf_destroy(term_num);
1810 term_value = atoi(wrbuf_cstr(term_num));
1811 wrbuf_printf(term_dict, "(0*%d)", term_value);
1814 /* term_tmp untouched.. */
1815 while (**term_sub != '\0')
1819 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1820 wrbuf_destroy(term_num);
1823 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1824 0, grep_info, max_pos, 0, grep_handle);
1827 zebra_set_partial_result(zh);
1829 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1830 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1831 wrbuf_destroy(term_num);
1835 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1836 const char **term_sub,
1838 const Odr_oid *attributeSet, NMEM stream,
1839 struct grep_info *grep_info,
1840 const char *index_type, int complete_flag,
1842 const char *xpath_use,
1843 struct ord_list **ol)
1846 struct rpn_char_map_info rcmi;
1848 int relation_error = 0;
1849 int ord, ord_len, i;
1851 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1853 *ol = ord_list_create(stream);
1855 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1859 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1860 attributeSet, &ord) != ZEBRA_OK)
1865 wrbuf_rewind(term_dict);
1867 *ol = ord_list_append(stream, *ol, ord);
1869 ord_len = key_SU_encode(ord, ord_buf);
1871 wrbuf_putc(term_dict, '(');
1872 for (i = 0; i < ord_len; i++)
1874 wrbuf_putc(term_dict, 1);
1875 wrbuf_putc(term_dict, ord_buf[i]);
1877 wrbuf_putc(term_dict, ')');
1879 if (!numeric_relation(zh, zapt, &termp, term_dict,
1880 attributeSet, grep_info, &max_pos, zm,
1881 display_term, &relation_error))
1885 zebra_setError(zh, relation_error, 0);
1892 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1897 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1898 Z_AttributesPlusTerm *zapt,
1900 const Odr_oid *attributeSet,
1903 const char *index_type,
1905 const char *rank_type,
1906 const char *xpath_use,
1909 struct rset_key_control *kc)
1911 const char *termp = termz;
1912 RSET *result_sets = 0;
1913 int num_result_sets = 0;
1915 struct grep_info grep_info;
1917 zint hits_limit_value = hits_limit;
1918 const char *term_ref_id_str = 0;
1920 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1923 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1924 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1928 struct ord_list *ol;
1929 WRBUF term_dict = wrbuf_alloc();
1930 WRBUF display_term = wrbuf_alloc();
1931 if (alloc_sets == num_result_sets)
1934 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1937 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1938 alloc_sets = alloc_sets + add;
1941 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1942 grep_info.isam_p_indx = 0;
1943 res = numeric_term(zh, zapt, &termp, term_dict,
1944 attributeSet, stream, &grep_info,
1945 index_type, complete_flag,
1946 display_term, xpath_use, &ol);
1947 wrbuf_destroy(term_dict);
1948 if (res == ZEBRA_FAIL || termp == 0)
1950 wrbuf_destroy(display_term);
1953 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1954 result_sets[num_result_sets] =
1955 rset_trunc(zh, grep_info.isam_p_buf,
1956 grep_info.isam_p_indx, wrbuf_buf(display_term),
1957 wrbuf_len(display_term), rank_type,
1958 0 /* preserve position */,
1959 zapt->term->which, rset_nmem,
1960 kc, kc->scope, ol, index_type,
1963 wrbuf_destroy(display_term);
1964 if (!result_sets[num_result_sets])
1970 grep_info_delete(&grep_info);
1972 if (res != ZEBRA_OK)
1974 if (num_result_sets == 0)
1975 *rset = rset_create_null(rset_nmem, kc, 0);
1976 else if (num_result_sets == 1)
1977 *rset = result_sets[0];
1979 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1980 num_result_sets, result_sets);
1986 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1987 Z_AttributesPlusTerm *zapt,
1989 const Odr_oid *attributeSet,
1991 const char *rank_type, NMEM rset_nmem,
1993 struct rset_key_control *kc)
1996 zint sysno = atozint(termz);
2000 rec = rec_get(zh->reg->records, sysno);
2008 *rset = rset_create_null(rset_nmem, kc, 0);
2014 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2015 res_get(zh->res, "setTmpDir"), 0);
2016 rsfd = rset_open(*rset, RSETF_WRITE);
2021 rset_write(rsfd, &key);
2027 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2028 const Odr_oid *attributeSet, NMEM stream,
2029 Z_SortKeySpecList *sort_sequence,
2030 const char *rank_type,
2033 struct rset_key_control *kc)
2036 int sort_relation_value;
2037 AttrType sort_relation_type;
2042 attr_init_APT(&sort_relation_type, zapt, 7);
2043 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2045 if (!sort_sequence->specs)
2047 sort_sequence->num_specs = 10;
2048 sort_sequence->specs = (Z_SortKeySpec **)
2049 nmem_malloc(stream, sort_sequence->num_specs *
2050 sizeof(*sort_sequence->specs));
2051 for (i = 0; i < sort_sequence->num_specs; i++)
2052 sort_sequence->specs[i] = 0;
2054 if (zapt->term->which != Z_Term_general)
2057 i = atoi_n((char *) zapt->term->u.general->buf,
2058 zapt->term->u.general->len);
2059 if (i >= sort_sequence->num_specs)
2061 sprintf(termz, "%d", i);
2063 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2064 sks->sortElement = (Z_SortElement *)
2065 nmem_malloc(stream, sizeof(*sks->sortElement));
2066 sks->sortElement->which = Z_SortElement_generic;
2067 sk = sks->sortElement->u.generic = (Z_SortKey *)
2068 nmem_malloc(stream, sizeof(*sk));
2069 sk->which = Z_SortKey_sortAttributes;
2070 sk->u.sortAttributes = (Z_SortAttributes *)
2071 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2073 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2074 sk->u.sortAttributes->list = zapt->attributes;
2076 sks->sortRelation = (Odr_int *)
2077 nmem_malloc(stream, sizeof(*sks->sortRelation));
2078 if (sort_relation_value == 1)
2079 *sks->sortRelation = Z_SortKeySpec_ascending;
2080 else if (sort_relation_value == 2)
2081 *sks->sortRelation = Z_SortKeySpec_descending;
2083 *sks->sortRelation = Z_SortKeySpec_ascending;
2085 sks->caseSensitivity = (Odr_int *)
2086 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2087 *sks->caseSensitivity = 0;
2089 sks->which = Z_SortKeySpec_null;
2090 sks->u.null = odr_nullval ();
2091 sort_sequence->specs[i] = sks;
2092 *rset = rset_create_null(rset_nmem, kc, 0);
2097 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2098 const Odr_oid *attributeSet,
2099 struct xpath_location_step *xpath, int max,
2102 const Odr_oid *curAttributeSet = attributeSet;
2104 const char *use_string = 0;
2106 attr_init_APT(&use, zapt, 1);
2107 attr_find_ex(&use, &curAttributeSet, &use_string);
2109 if (!use_string || *use_string != '/')
2112 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2117 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2118 const char *index_type, const char *term,
2119 const char *xpath_use,
2121 struct rset_key_control *kc)
2123 struct grep_info grep_info;
2124 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2125 zinfo_index_category_index,
2126 index_type, xpath_use);
2127 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2128 return rset_create_null(rset_nmem, kc, 0);
2131 return rset_create_null(rset_nmem, kc, 0);
2137 WRBUF term_dict = wrbuf_alloc();
2138 int ord_len = key_SU_encode(ord, ord_buf);
2139 int term_type = Z_Term_characterString;
2140 const char *flags = "void";
2142 wrbuf_putc(term_dict, '(');
2143 for (i = 0; i < ord_len; i++)
2145 wrbuf_putc(term_dict, 1);
2146 wrbuf_putc(term_dict, ord_buf[i]);
2148 wrbuf_putc(term_dict, ')');
2149 wrbuf_puts(term_dict, term);
2151 grep_info.isam_p_indx = 0;
2152 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2153 &grep_info, &max_pos, 0, grep_handle);
2154 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2155 grep_info.isam_p_indx);
2156 rset = rset_trunc(zh, grep_info.isam_p_buf,
2157 grep_info.isam_p_indx, term, strlen(term),
2158 flags, 1, term_type, rset_nmem,
2159 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2160 0 /* term_ref_id_str */);
2161 grep_info_delete(&grep_info);
2162 wrbuf_destroy(term_dict);
2168 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2169 NMEM stream, const char *rank_type, RSET rset,
2170 int xpath_len, struct xpath_location_step *xpath,
2173 struct rset_key_control *kc)
2176 int always_matches = rset ? 0 : 1;
2184 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2185 for (i = 0; i < xpath_len; i++)
2187 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2199 a[@attr = value]/b[@other = othervalue]
2201 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2202 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2203 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2204 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2205 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2206 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2210 dict_grep_cmap(zh->reg->dict, 0, 0);
2213 int level = xpath_len;
2216 while (--level >= 0)
2218 WRBUF xpath_rev = wrbuf_alloc();
2220 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2222 for (i = level; i >= 1; --i)
2224 const char *cp = xpath[i].part;
2230 wrbuf_puts(xpath_rev, "[^/]*");
2231 else if (*cp == ' ')
2232 wrbuf_puts(xpath_rev, "\001 ");
2234 wrbuf_putc(xpath_rev, *cp);
2236 /* wrbuf_putc does not null-terminate , but
2237 wrbuf_puts below ensures it does.. so xpath_rev
2238 is OK iff length is > 0 */
2240 wrbuf_puts(xpath_rev, "/");
2242 else if (i == 1) /* // case */
2243 wrbuf_puts(xpath_rev, ".*");
2245 if (xpath[level].predicate &&
2246 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2247 xpath[level].predicate->u.relation.name[0])
2249 WRBUF wbuf = wrbuf_alloc();
2250 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2251 if (xpath[level].predicate->u.relation.value)
2253 const char *cp = xpath[level].predicate->u.relation.value;
2254 wrbuf_putc(wbuf, '=');
2258 if (strchr(REGEX_CHARS, *cp))
2259 wrbuf_putc(wbuf, '\\');
2260 wrbuf_putc(wbuf, *cp);
2264 rset_attr = xpath_trunc(
2265 zh, stream, "0", wrbuf_cstr(wbuf),
2266 ZEBRA_XPATH_ATTR_NAME,
2268 wrbuf_destroy(wbuf);
2274 wrbuf_destroy(xpath_rev);
2278 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2279 wrbuf_cstr(xpath_rev));
2280 if (wrbuf_len(xpath_rev))
2282 rset_start_tag = xpath_trunc(zh, stream, "0",
2283 wrbuf_cstr(xpath_rev),
2284 ZEBRA_XPATH_ELM_BEGIN,
2287 rset = rset_start_tag;
2290 rset_end_tag = xpath_trunc(zh, stream, "0",
2291 wrbuf_cstr(xpath_rev),
2292 ZEBRA_XPATH_ELM_END,
2295 rset = rset_create_between(rset_nmem, kc, kc->scope,
2296 rset_start_tag, rset,
2297 rset_end_tag, rset_attr);
2300 wrbuf_destroy(xpath_rev);
2308 #define MAX_XPATH_STEPS 10
2310 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2311 Z_AttributesPlusTerm *zapt,
2312 const Odr_oid *attributeSet,
2313 zint hits_limit, NMEM stream,
2314 Z_SortKeySpecList *sort_sequence,
2317 struct rset_key_control *kc);
2319 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2320 const Odr_oid *attributeSet,
2321 zint hits_limit, NMEM stream,
2322 Z_SortKeySpecList *sort_sequence,
2323 int num_bases, const char **basenames,
2326 struct rset_key_control *kc)
2328 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2329 ZEBRA_RES res = ZEBRA_OK;
2331 for (i = 0; i < num_bases; i++)
2334 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2336 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2341 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2343 rset_nmem, rsets+i, kc);
2344 if (res != ZEBRA_OK)
2347 if (res != ZEBRA_OK)
2348 { /* must clean up the already created sets */
2350 rset_delete(rsets[i]);
2357 else if (num_bases == 0)
2358 *rset = rset_create_null(rset_nmem, kc, 0);
2360 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2366 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2367 Z_AttributesPlusTerm *zapt,
2368 const Odr_oid *attributeSet,
2369 zint hits_limit, NMEM stream,
2370 Z_SortKeySpecList *sort_sequence,
2373 struct rset_key_control *kc)
2375 ZEBRA_RES res = ZEBRA_OK;
2376 const char *index_type;
2377 char *search_type = NULL;
2378 char rank_type[128];
2381 char termz[IT_MAX_WORD+1];
2383 const char *xpath_use = 0;
2384 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2388 log_level_rpn = yaz_log_module_level("rpn");
2391 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2392 rank_type, &complete_flag, &sort_flag);
2394 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2395 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2396 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2397 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2399 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2403 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2404 rank_type, rset_nmem, rset, kc);
2405 /* consider if an X-Path query is used */
2406 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2407 xpath, MAX_XPATH_STEPS, stream);
2410 if (xpath[xpath_len-1].part[0] == '@')
2411 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2413 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2420 attr_init_APT(&relation, zapt, 2);
2421 relation_value = attr_find(&relation, NULL);
2423 if (relation_value == 103) /* alwaysmatches */
2425 *rset = 0; /* signal no "term" set */
2426 return rpn_search_xpath(zh, stream, rank_type, *rset,
2427 xpath_len, xpath, rset_nmem, rset, kc);
2432 /* search using one of the various search type strategies
2433 termz is our UTF-8 search term
2434 attributeSet is top-level default attribute set
2435 stream is ODR for search
2436 reg_id is the register type
2437 complete_flag is 1 for complete subfield, 0 for incomplete
2438 xpath_use is use-attribute to be used for X-Path search, 0 for none
2440 if (!strcmp(search_type, "phrase"))
2442 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2444 index_type, complete_flag, rank_type,
2449 else if (!strcmp(search_type, "and-list"))
2451 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2453 index_type, complete_flag, rank_type,
2458 else if (!strcmp(search_type, "or-list"))
2460 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2462 index_type, complete_flag, rank_type,
2467 else if (!strcmp(search_type, "local"))
2469 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2470 rank_type, rset_nmem, rset, kc);
2472 else if (!strcmp(search_type, "numeric"))
2474 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2476 index_type, complete_flag, rank_type,
2483 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2486 if (res != ZEBRA_OK)
2490 return rpn_search_xpath(zh, stream, rank_type, *rset,
2491 xpath_len, xpath, rset_nmem, rset, kc);
2494 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2495 const Odr_oid *attributeSet,
2497 NMEM stream, NMEM rset_nmem,
2498 Z_SortKeySpecList *sort_sequence,
2499 int num_bases, const char **basenames,
2500 RSET **result_sets, int *num_result_sets,
2501 Z_Operator *parent_op,
2502 struct rset_key_control *kc);
2504 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2507 ZEBRA_RES res = ZEBRA_OK;
2508 if (zs->which == Z_RPNStructure_complex)
2510 if (res == ZEBRA_OK)
2511 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2513 if (res == ZEBRA_OK)
2514 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2517 else if (zs->which == Z_RPNStructure_simple)
2519 if (zs->u.simple->which == Z_Operand_APT)
2521 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2522 AttrType global_hits_limit_attr;
2525 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2527 l = attr_find(&global_hits_limit_attr, NULL);
2535 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2536 const Odr_oid *attributeSet,
2538 NMEM stream, NMEM rset_nmem,
2539 Z_SortKeySpecList *sort_sequence,
2540 int num_bases, const char **basenames,
2543 RSET *result_sets = 0;
2544 int num_result_sets = 0;
2546 struct rset_key_control *kc = zebra_key_control_create(zh);
2548 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2551 num_bases, basenames,
2552 &result_sets, &num_result_sets,
2553 0 /* no parent op */,
2555 if (res != ZEBRA_OK)
2558 for (i = 0; i < num_result_sets; i++)
2559 rset_delete(result_sets[i]);
2564 assert(num_result_sets == 1);
2565 assert(result_sets);
2566 assert(*result_sets);
2567 *result_set = *result_sets;
2573 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2574 const Odr_oid *attributeSet, zint hits_limit,
2575 NMEM stream, NMEM rset_nmem,
2576 Z_SortKeySpecList *sort_sequence,
2577 int num_bases, const char **basenames,
2578 RSET **result_sets, int *num_result_sets,
2579 Z_Operator *parent_op,
2580 struct rset_key_control *kc)
2582 *num_result_sets = 0;
2583 if (zs->which == Z_RPNStructure_complex)
2586 Z_Operator *zop = zs->u.complex->roperator;
2587 RSET *result_sets_l = 0;
2588 int num_result_sets_l = 0;
2589 RSET *result_sets_r = 0;
2590 int num_result_sets_r = 0;
2592 res = rpn_search_structure(zh, zs->u.complex->s1,
2593 attributeSet, hits_limit, stream, rset_nmem,
2595 num_bases, basenames,
2596 &result_sets_l, &num_result_sets_l,
2598 if (res != ZEBRA_OK)
2601 for (i = 0; i < num_result_sets_l; i++)
2602 rset_delete(result_sets_l[i]);
2605 res = rpn_search_structure(zh, zs->u.complex->s2,
2606 attributeSet, hits_limit, stream, rset_nmem,
2608 num_bases, basenames,
2609 &result_sets_r, &num_result_sets_r,
2611 if (res != ZEBRA_OK)
2614 for (i = 0; i < num_result_sets_l; i++)
2615 rset_delete(result_sets_l[i]);
2616 for (i = 0; i < num_result_sets_r; i++)
2617 rset_delete(result_sets_r[i]);
2621 /* make a new list of result for all children */
2622 *num_result_sets = num_result_sets_l + num_result_sets_r;
2623 *result_sets = nmem_malloc(stream, *num_result_sets *
2624 sizeof(**result_sets));
2625 memcpy(*result_sets, result_sets_l,
2626 num_result_sets_l * sizeof(**result_sets));
2627 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2628 num_result_sets_r * sizeof(**result_sets));
2630 if (!parent_op || parent_op->which != zop->which
2631 || (zop->which != Z_Operator_and &&
2632 zop->which != Z_Operator_or))
2634 /* parent node different from this one (or non-present) */
2635 /* we must combine result sets now */
2639 case Z_Operator_and:
2640 rset = rset_create_and(rset_nmem, kc,
2642 *num_result_sets, *result_sets);
2645 rset = rset_create_or(rset_nmem, kc,
2646 kc->scope, 0, /* termid */
2647 *num_result_sets, *result_sets);
2649 case Z_Operator_and_not:
2650 rset = rset_create_not(rset_nmem, kc,
2655 case Z_Operator_prox:
2656 if (zop->u.prox->which != Z_ProximityOperator_known)
2659 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2663 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2665 zebra_setError_zint(zh,
2666 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2667 *zop->u.prox->u.known);
2672 rset = rset_create_prox(rset_nmem, kc,
2674 *num_result_sets, *result_sets,
2675 *zop->u.prox->ordered,
2676 (!zop->u.prox->exclusion ?
2677 0 : *zop->u.prox->exclusion),
2678 *zop->u.prox->relationType,
2679 *zop->u.prox->distance );
2683 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2686 *num_result_sets = 1;
2687 *result_sets = nmem_malloc(stream, *num_result_sets *
2688 sizeof(**result_sets));
2689 (*result_sets)[0] = rset;
2692 else if (zs->which == Z_RPNStructure_simple)
2697 if (zs->u.simple->which == Z_Operand_APT)
2699 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2700 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2701 attributeSet, hits_limit,
2702 stream, sort_sequence,
2703 num_bases, basenames, rset_nmem, &rset,
2705 if (res != ZEBRA_OK)
2708 else if (zs->u.simple->which == Z_Operand_resultSetId)
2710 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2711 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2715 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2716 zs->u.simple->u.resultSetId);
2723 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2726 *num_result_sets = 1;
2727 *result_sets = nmem_malloc(stream, *num_result_sets *
2728 sizeof(**result_sets));
2729 (*result_sets)[0] = rset;
2733 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2744 * c-file-style: "Stroustrup"
2745 * indent-tabs-mode: nil
2747 * vim: shiftwidth=4 tabstop=8 expandtab