Happy new year
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2009 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #ifdef WIN32
23 #include <io.h>
24 #endif
25 #if HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #include <ctype.h>
29
30 #include <yaz/diagbib1.h>
31 #include "index.h"
32 #include <zebra_xpath.h>
33 #include <attrfind.h>
34 #include <charmap.h>
35 #include <rset.h>
36
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
39
40 #define TERMSET_DISABLE 1
41
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
43 {
44     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45     const char **out = zebra_maps_input(p->zm, from, len, 0);
46 #if 0
47     if (out && *out)
48     {
49         const char *outp = *out;
50         yaz_log(YLOG_LOG, "---");
51         while (*outp)
52         {
53             yaz_log(YLOG_LOG, "%02X", *outp);
54             outp++;
55         }
56     }
57 #endif
58     return out;
59 }
60
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62                           struct rpn_char_map_info *map_info)
63 {
64     map_info->zm = zm;
65     if (zebra_maps_is_icu(zm))
66         dict_grep_cmap(reg->dict, 0, 0);
67     else
68         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                    p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                    p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, const char *ct2, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         if (ct2 && strchr(ct2, *s0))
168             break;
169         s1 = s0;
170         map = zebra_maps_input(zm, &s1, strlen(s1), first);
171         if (**map != *CHR_SPACE)
172             break;
173         s0 = s1;
174     }
175     *src = s0;
176     return *s0;
177 }
178
179
180 static void esc_str(char *out_buf, size_t out_size,
181                     const char *in_buf, int in_size)
182 {
183     int k;
184
185     assert(out_buf);
186     assert(in_buf);
187     assert(out_size > 20);
188     *out_buf = '\0';
189     for (k = 0; k<in_size; k++)
190     {
191         int c = in_buf[k] & 0xff;
192         int pc;
193         if (c < 32 || c > 126)
194             pc = '?';
195         else
196             pc = c;
197         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
198         if (strlen(out_buf) > out_size-20)
199         {
200             strcat(out_buf, "..");
201             break;
202         }
203     }
204 }
205
206 #define REGEX_CHARS " []()|.*+?!\"$"
207
208 static void add_non_space(const char *start, const char *end,
209                           WRBUF term_dict,
210                           WRBUF display_term,
211                           const char **map, int q_map_match)
212 {
213     size_t sz = end - start;
214
215     wrbuf_write(display_term, start, sz);
216     if (!q_map_match)
217     {
218         while (start < end)
219         {
220             if (strchr(REGEX_CHARS, *start))
221                 wrbuf_putc(term_dict, '\\');
222             wrbuf_putc(term_dict, *start);
223             start++;
224         }
225     }
226     else
227     {
228         char tmpbuf[80];
229         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230         
231         wrbuf_puts(term_dict, map[0]);
232     }
233 }
234
235
236 static int term_100_icu(zebra_map_t zm,
237                         const char **src, WRBUF term_dict, int space_split,
238                         WRBUF display_term,
239                         int right_trunc)
240 {
241     int i;
242     const char *res_buf = 0;
243     size_t res_len = 0;
244     const char *display_buf;
245     size_t display_len;
246     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247                                  &display_buf, &display_len))
248     {
249         *src += strlen(*src);
250         return 0;
251     }
252     wrbuf_write(display_term, display_buf, display_len);
253     if (right_trunc)
254     {
255         /* ICU sort keys seem to be of the form
256            basechars \x01 accents \x01 length
257            For now we'll just right truncate from basechars . This 
258            may give false hits due to accents not being used.
259         */
260         i = res_len;
261         while (--i >= 0 && res_buf[i] != '\x01')
262             ;
263         if (i > 0)
264         {
265             while (--i >= 0 && res_buf[i] != '\x01')
266                 ;
267         }
268         if (i == 0)
269         {  /* did not find base chars at all. Throw error */
270             return -1;
271         }
272         res_len = i; /* reduce res_len */
273     }
274     for (i = 0; i < res_len; i++)
275     {
276         if (strchr(REGEX_CHARS "\\", res_buf[i]))
277             wrbuf_putc(term_dict, '\\');
278         if (res_buf[i] < 32)
279             wrbuf_putc(term_dict, 1);
280             
281         wrbuf_putc(term_dict, res_buf[i]);
282     }
283     if (right_trunc)
284         wrbuf_puts(term_dict, ".*");
285     return 1;
286 }
287
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290                     const char **src, WRBUF term_dict, int space_split,
291                     WRBUF display_term)
292 {
293     const char *s0;
294     const char **map;
295     int i = 0;
296
297     const char *space_start = 0;
298     const char *space_end = 0;
299
300     if (!term_pre(zm, src, NULL, NULL, !space_split))
301         return 0;
302     s0 = *src;
303     while (*s0)
304     {
305         const char *s1 = s0;
306         int q_map_match = 0;
307         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr(REGEX_CHARS, *space_start))
326                         wrbuf_putc(term_dict, '\\');
327                     wrbuf_putc(display_term, *space_start);
328                     wrbuf_putc(term_dict, *space_start);
329                     space_start++;
330                                
331                 }
332                 /* and reset */
333                 space_start = space_end = 0;
334             }
335         }
336         i++;
337
338         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
339     }
340     *src = s0;
341     return i;
342 }
343
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346                     const char **src, WRBUF term_dict, int space_split,
347                     WRBUF display_term)
348 {
349     const char *s0;
350     const char **map;
351     int i = 0;
352
353     if (!term_pre(zm, src, "#", "#", !space_split))
354         return 0;
355     s0 = *src;
356     while (*s0)
357     {
358         if (*s0 == '#')
359         {
360             i++;
361             wrbuf_puts(term_dict, ".*");
362             wrbuf_putc(display_term, *s0);
363             s0++;
364         }
365         else
366         {
367             const char *s1 = s0;
368             int q_map_match = 0;
369             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370             if (space_split && **map == *CHR_SPACE)
371                 break;
372
373             i++;
374             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
375         }
376     }
377     *src = s0;
378     return i;
379 }
380
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383                     WRBUF term_dict, int *errors, int space_split,
384                     WRBUF display_term)
385 {
386     int i = 0;
387     const char *s0;
388     const char **map;
389
390     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
391         return 0;
392     s0 = *src;
393     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394         isdigit(((const unsigned char *)s0)[1]))
395     {
396         *errors = s0[1] - '0';
397         s0 += 3;
398         if (*errors > 3)
399             *errors = 3;
400     }
401     while (*s0)
402     {
403         if (strchr("^\\()[].*+?|-", *s0))
404         {
405             wrbuf_putc(display_term, *s0);
406             wrbuf_putc(term_dict, *s0);
407             s0++;
408             i++;
409         }
410         else
411         {
412             const char *s1 = s0;
413             int q_map_match = 0;
414             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
415             if (space_split && **map == *CHR_SPACE)
416                 break;
417
418             i++;
419             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
420         }
421     }
422     *src = s0;
423     
424     return i;
425 }
426
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429                     WRBUF term_dict, int space_split, WRBUF display_term)
430 {
431     return term_103(zm, src, term_dict, NULL, space_split, display_term);
432 }
433
434
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src, 
437                     WRBUF term_dict, int space_split, WRBUF display_term)
438 {
439     const char *s0;
440     const char **map;
441     int i = 0;
442
443     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
444         return 0;
445     s0 = *src;
446     while (*s0)
447     {
448         if (*s0 == '?')
449         {
450             i++;
451             wrbuf_putc(display_term, *s0);
452             s0++;
453             if (*s0 >= '0' && *s0 <= '9')
454             {
455                 int limit = 0;
456                 while (*s0 >= '0' && *s0 <= '9')
457                 {
458                     limit = limit * 10 + (*s0 - '0');
459                     wrbuf_putc(display_term, *s0);
460                     s0++;
461                 }
462                 if (limit > 20)
463                     limit = 20;
464                 while (--limit >= 0)
465                 {
466                     wrbuf_puts(term_dict, ".?");
467                 }
468             }
469             else
470             {
471                 wrbuf_puts(term_dict, ".*");
472             }
473         }
474         else if (*s0 == '*')
475         {
476             i++;
477             wrbuf_puts(term_dict, ".*");
478             wrbuf_putc(display_term, *s0);
479             s0++;
480         }
481         else if (*s0 == '#')
482         {
483             i++;
484             wrbuf_puts(term_dict, ".");
485             wrbuf_putc(display_term, *s0);
486             s0++;
487         }
488         else
489         {
490             const char *s1 = s0;
491             int q_map_match = 0;
492             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493             if (space_split && **map == *CHR_SPACE)
494                 break;
495
496             i++;
497             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
498         }
499     }
500     *src = s0;
501     return i;
502 }
503
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src, 
506                     WRBUF term_dict, int space_split,
507                     WRBUF display_term, int right_truncate)
508 {
509     const char *s0;
510     const char **map;
511     int i = 0;
512
513     if (!term_pre(zm, src, "*!", "*!", !space_split))
514         return 0;
515     s0 = *src;
516     while (*s0)
517     {
518         if (*s0 == '*')
519         {
520             i++;
521             wrbuf_puts(term_dict, ".*");
522             wrbuf_putc(display_term, *s0);
523             s0++;
524         }
525         else if (*s0 == '!')
526         {
527             i++;
528             wrbuf_putc(term_dict, '.');
529             wrbuf_putc(display_term, *s0);
530             s0++;
531         }
532         else
533         {
534             const char *s1 = s0;
535             int q_map_match = 0;
536             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             i++;
541             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
542         }
543     }
544     if (right_truncate)
545         wrbuf_puts(term_dict, ".*");
546     *src = s0;
547     return i;
548 }
549
550
551 /* gen_regular_rel - generate regular expression from relation
552  *  val:     border value (inclusive)
553  *  islt:    1 if <=; 0 if >=.
554  */
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
556 {
557     char dst_buf[20*5*20]; /* assuming enough for expansion */
558     char *dst = dst_buf;
559     int dst_p;
560     int w, d, i;
561     int pos = 0;
562     char numstr[20];
563
564     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
565     if (val >= 0)
566     {
567         if (islt)
568             strcpy(dst, "(-[0-9]+|(");
569         else
570             strcpy(dst, "((");
571     } 
572     else
573     {
574         if (!islt)
575         {
576             strcpy(dst, "([0-9]+|-(");
577             islt = 1;
578         }
579         else
580         {
581             strcpy(dst, "(-(");
582             islt = 0;
583         }
584         val = -val;
585     }
586     dst_p = strlen(dst);
587     sprintf(numstr, "%d", val);
588     for (w = strlen(numstr); --w >= 0; pos++)
589     {
590         d = numstr[w];
591         if (pos > 0)
592         {
593             if (islt)
594             {
595                 if (d == '0')
596                     continue;
597                 d--;
598             } 
599             else
600             {
601                 if (d == '9')
602                     continue;
603                 d++;
604             }
605         }
606         
607         strcpy(dst + dst_p, numstr);
608         dst_p = strlen(dst) - pos - 1;
609
610         if (islt)
611         {
612             if (d != '0')
613             {
614                 dst[dst_p++] = '[';
615                 dst[dst_p++] = '0';
616                 dst[dst_p++] = '-';
617                 dst[dst_p++] = d;
618                 dst[dst_p++] = ']';
619             }
620             else
621                 dst[dst_p++] = d;
622         }
623         else
624         {
625             if (d != '9')
626             { 
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = d;
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = '9';
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         for (i = 0; i<pos; i++)
637         {
638             dst[dst_p++] = '[';
639             dst[dst_p++] = '0';
640             dst[dst_p++] = '-';
641             dst[dst_p++] = '9';
642             dst[dst_p++] = ']';
643         }
644         dst[dst_p++] = '|';
645     }
646     dst[dst_p] = '\0';
647     if (islt)
648     {
649         /* match everything less than 10^(pos-1) */
650         strcat(dst, "0*");
651         for (i = 1; i<pos; i++)
652             strcat(dst, "[0-9]?");
653     }
654     else
655     {
656         /* match everything greater than 10^pos */
657         for (i = 0; i <= pos; i++)
658             strcat(dst, "[0-9]");
659         strcat(dst, "[0-9]*");
660     }
661     strcat(dst, "))");
662     wrbuf_puts(term_dict, dst);
663 }
664
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
666 {
667     const char *src = wrbuf_cstr(wsrc);
668     if (src[*indx] == '\\')
669     {
670         wrbuf_putc(term_p, src[*indx]);
671         (*indx)++;
672     }
673     wrbuf_putc(term_p, src[*indx]);
674     (*indx)++;
675 }
676
677 /*
678  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
679  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
681  *              ([^-a].*|a[^-b].*|ab[c-].*)
682  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
683  *              ([^a-].*|a[^b-].*|ab[^c-].*)
684  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
685  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
686  */
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688                            const char **term_sub, WRBUF term_dict,
689                            const Odr_oid *attributeSet,
690                            zebra_map_t zm, int space_split, 
691                            WRBUF display_term,
692                            int *error_code)
693 {
694     AttrType relation;
695     int relation_value;
696     int i;
697     WRBUF term_component = wrbuf_alloc();
698
699     attr_init_APT(&relation, zapt, 2);
700     relation_value = attr_find(&relation, NULL);
701
702     *error_code = 0;
703     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704     switch (relation_value)
705     {
706     case 1:
707         if (!term_100(zm, term_sub, term_component, space_split, display_term))
708         {
709             wrbuf_destroy(term_component);
710             return 0;
711         }
712         yaz_log(log_level_rpn, "Relation <");
713         
714         wrbuf_putc(term_dict, '(');
715         for (i = 0; i < wrbuf_len(term_component); )
716         {
717             int j = 0;
718             
719             if (i)
720                 wrbuf_putc(term_dict, '|');
721             while (j < i)
722                 string_rel_add_char(term_dict, term_component, &j);
723
724             wrbuf_putc(term_dict, '[');
725
726             wrbuf_putc(term_dict, '^');
727             
728             wrbuf_putc(term_dict, 1);
729             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
730             
731             string_rel_add_char(term_dict, term_component, &i);
732             wrbuf_putc(term_dict, '-');
733             
734             wrbuf_putc(term_dict, ']');
735             wrbuf_putc(term_dict, '.');
736             wrbuf_putc(term_dict, '*');
737         }
738         wrbuf_putc(term_dict, ')');
739         break;
740     case 2:
741         if (!term_100(zm, term_sub, term_component, space_split, display_term))
742         {
743             wrbuf_destroy(term_component);
744             return 0;
745         }
746         yaz_log(log_level_rpn, "Relation <=");
747
748         wrbuf_putc(term_dict, '(');
749         for (i = 0; i < wrbuf_len(term_component); )
750         {
751             int j = 0;
752
753             while (j < i)
754                 string_rel_add_char(term_dict, term_component, &j);
755             wrbuf_putc(term_dict, '[');
756
757             wrbuf_putc(term_dict, '^');
758
759             wrbuf_putc(term_dict, 1);
760             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
761
762             string_rel_add_char(term_dict, term_component, &i);
763             wrbuf_putc(term_dict, '-');
764
765             wrbuf_putc(term_dict, ']');
766             wrbuf_putc(term_dict, '.');
767             wrbuf_putc(term_dict, '*');
768
769             wrbuf_putc(term_dict, '|');
770         }
771         for (i = 0; i < wrbuf_len(term_component); )
772             string_rel_add_char(term_dict, term_component, &i);
773         wrbuf_putc(term_dict, ')');
774         break;
775     case 5:
776         if (!term_100(zm, term_sub, term_component, space_split, display_term))
777         {
778             wrbuf_destroy(term_component);
779             return 0;
780         }
781         yaz_log(log_level_rpn, "Relation >");
782
783         wrbuf_putc(term_dict, '(');
784         for (i = 0; i < wrbuf_len(term_component); )
785         {
786             int j = 0;
787
788             while (j < i)
789                 string_rel_add_char(term_dict, term_component, &j);
790             wrbuf_putc(term_dict, '[');
791             
792             wrbuf_putc(term_dict, '^');
793             wrbuf_putc(term_dict, '-');
794             string_rel_add_char(term_dict, term_component, &i);
795
796             wrbuf_putc(term_dict, ']');
797             wrbuf_putc(term_dict, '.');
798             wrbuf_putc(term_dict, '*');
799
800             wrbuf_putc(term_dict, '|');
801         }
802         for (i = 0; i < wrbuf_len(term_component); )
803             string_rel_add_char(term_dict, term_component, &i);
804         wrbuf_putc(term_dict, '.');
805         wrbuf_putc(term_dict, '+');
806         wrbuf_putc(term_dict, ')');
807         break;
808     case 4:
809         if (!term_100(zm, term_sub, term_component, space_split, display_term))
810         {
811             wrbuf_destroy(term_component);
812             return 0;
813         }
814         yaz_log(log_level_rpn, "Relation >=");
815
816         wrbuf_putc(term_dict, '(');
817         for (i = 0; i < wrbuf_len(term_component); )
818         {
819             int j = 0;
820
821             if (i)
822                 wrbuf_putc(term_dict, '|');
823             while (j < i)
824                 string_rel_add_char(term_dict, term_component, &j);
825             wrbuf_putc(term_dict, '[');
826
827             if (i < wrbuf_len(term_component)-1)
828             {
829                 wrbuf_putc(term_dict, '^');
830                 wrbuf_putc(term_dict, '-');
831                 string_rel_add_char(term_dict, term_component, &i);
832             }
833             else
834             {
835                 string_rel_add_char(term_dict, term_component, &i);
836                 wrbuf_putc(term_dict, '-');
837             }
838             wrbuf_putc(term_dict, ']');
839             wrbuf_putc(term_dict, '.');
840             wrbuf_putc(term_dict, '*');
841         }
842         wrbuf_putc(term_dict, ')');
843         break;
844     case 3:
845     case 102:
846     case -1:
847         if (!**term_sub)
848             return 1;
849         yaz_log(log_level_rpn, "Relation =");
850         if (!term_100(zm, term_sub, term_component, space_split, display_term))
851         {
852             wrbuf_destroy(term_component);
853             return 0;
854         }
855         wrbuf_puts(term_dict, "(");
856         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857         wrbuf_puts(term_dict, ")");
858         break;
859     case 103:
860         yaz_log(log_level_rpn, "Relation always matches");
861         /* skip to end of term (we don't care what it is) */
862         while (**term_sub != '\0')
863             (*term_sub)++;
864         break;
865     default:
866         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867         wrbuf_destroy(term_component);
868         return 0;
869     }
870     wrbuf_destroy(term_component);
871     return 1;
872 }
873
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875                              const char **term_sub, 
876                              WRBUF term_dict,
877                              const Odr_oid *attributeSet, NMEM stream,
878                              struct grep_info *grep_info,
879                              const char *index_type, int complete_flag,
880                              WRBUF display_term,
881                              const char *xpath_use,
882                              struct ord_list **ol,
883                              zebra_map_t zm);
884
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886                                 Z_AttributesPlusTerm *zapt,
887                                 zint *hits_limit_value,
888                                 const char **term_ref_id_str,
889                                 NMEM nmem)
890 {
891     AttrType term_ref_id_attr;
892     AttrType hits_limit_attr;
893     int term_ref_id_int;
894  
895     attr_init_APT(&hits_limit_attr, zapt, 11);
896     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
897
898     attr_init_APT(&term_ref_id_attr, zapt, 10);
899     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
900     if (term_ref_id_int >= 0)
901     {
902         char *res = nmem_malloc(nmem, 20);
903         sprintf(res, "%d", term_ref_id_int);
904         *term_ref_id_str = res;
905     }
906
907     /* no limit given ? */
908     if (*hits_limit_value == -1)
909     {
910         if (*term_ref_id_str)
911         {
912             /* use global if term_ref is present */
913             *hits_limit_value = zh->approx_limit;
914         }
915         else
916         {
917             /* no counting if term_ref is not present */
918             *hits_limit_value = 0;
919         }
920     }
921     else if (*hits_limit_value == 0)
922     {
923         /* 0 is the same as global limit */
924         *hits_limit_value = zh->approx_limit;
925     }
926     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
927             *term_ref_id_str ? *term_ref_id_str : "none",
928             *hits_limit_value);
929     return ZEBRA_OK;
930 }
931
932 /** \brief search for term (which may be truncated)
933  */
934 static ZEBRA_RES search_term(ZebraHandle zh,
935                              Z_AttributesPlusTerm *zapt,
936                              const char **term_sub, 
937                              const Odr_oid *attributeSet, NMEM stream,
938                              struct grep_info *grep_info,
939                              const char *index_type, int complete_flag,
940                              const char *rank_type, 
941                              const char *xpath_use,
942                              NMEM rset_nmem,
943                              RSET *rset,
944                              struct rset_key_control *kc,
945                              zebra_map_t zm)
946 {
947     ZEBRA_RES res;
948     struct ord_list *ol;
949     zint hits_limit_value;
950     const char *term_ref_id_str = 0;
951     WRBUF term_dict = wrbuf_alloc();
952     WRBUF display_term = wrbuf_alloc();
953     *rset = 0;
954     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
955                           stream);
956     grep_info->isam_p_indx = 0;
957     res = string_term(zh, zapt, term_sub, term_dict,
958                       attributeSet, stream, grep_info,
959                       index_type, complete_flag,
960                       display_term, xpath_use, &ol, zm);
961     wrbuf_destroy(term_dict);
962     if (res == ZEBRA_OK && *term_sub)
963     {
964         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
965         *rset = rset_trunc(zh, grep_info->isam_p_buf,
966                            grep_info->isam_p_indx, wrbuf_buf(display_term),
967                            wrbuf_len(display_term), rank_type, 
968                            1 /* preserve pos */,
969                            zapt->term->which, rset_nmem,
970                            kc, kc->scope, ol, index_type, hits_limit_value,
971                            term_ref_id_str);
972         if (!*rset)
973             res = ZEBRA_FAIL;
974     }
975     wrbuf_destroy(display_term);
976     return res;
977 }
978
979 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
980                              const char **term_sub, 
981                              WRBUF term_dict,
982                              const Odr_oid *attributeSet, NMEM stream,
983                              struct grep_info *grep_info,
984                              const char *index_type, int complete_flag,
985                              WRBUF display_term,
986                              const char *xpath_use,
987                              struct ord_list **ol,
988                              zebra_map_t zm)
989 {
990     int r;
991     AttrType truncation;
992     int truncation_value;
993     const char *termp;
994     struct rpn_char_map_info rcmi;
995
996     int space_split = complete_flag ? 0 : 1;
997     int ord = -1;
998     int regex_range = 0;
999     int max_pos, prefix_len = 0;
1000     int relation_error;
1001     char ord_buf[32];
1002     int ord_len, i;
1003
1004     *ol = ord_list_create(stream);
1005
1006     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1007     attr_init_APT(&truncation, zapt, 5);
1008     truncation_value = attr_find(&truncation, NULL);
1009     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1010
1011     termp = *term_sub; /* start of term for each database */
1012     
1013     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1014                           attributeSet, &ord) != ZEBRA_OK)
1015     {
1016         *term_sub = 0;
1017         return ZEBRA_FAIL;
1018     }
1019     
1020     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1021     
1022     *ol = ord_list_append(stream, *ol, ord);
1023     ord_len = key_SU_encode(ord, ord_buf);
1024     
1025     wrbuf_putc(term_dict, '(');
1026     
1027     for (i = 0; i<ord_len; i++)
1028     {
1029         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1030         wrbuf_putc(term_dict, ord_buf[i]);
1031     }
1032     wrbuf_putc(term_dict, ')');
1033     
1034     prefix_len = wrbuf_len(term_dict);
1035
1036     if (zebra_maps_is_icu(zm))
1037     {
1038         int relation_value;
1039         AttrType relation;
1040         
1041         attr_init_APT(&relation, zapt, 2);
1042         relation_value = attr_find(&relation, NULL);
1043         if (relation_value == 103) /* always matches */
1044             termp += strlen(termp); /* move to end of term */
1045         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1046         {
1047             /* ICU case */
1048             switch (truncation_value)
1049             {
1050             case -1:         /* not specified */
1051             case 100:        /* do not truncate */
1052                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1053                 {
1054                     *term_sub = 0;
1055                     return ZEBRA_OK;
1056                 }
1057                 break;
1058             case 1:          /* right truncation */
1059                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1060                 {
1061                     *term_sub = 0;
1062                     return ZEBRA_OK;
1063                 }
1064                 break;
1065             default:
1066                 zebra_setError_zint(zh,
1067                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1068                                     truncation_value);
1069                 return ZEBRA_FAIL;
1070             }
1071         }
1072         else
1073         {
1074             zebra_setError_zint(zh,
1075                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1076                                 relation_value);
1077             return ZEBRA_FAIL;
1078         }
1079     }
1080     else
1081     {
1082         /* non-ICU case. using string.chr and friends */
1083         switch (truncation_value)
1084         {
1085         case -1:         /* not specified */
1086         case 100:        /* do not truncate */
1087             if (!string_relation(zh, zapt, &termp, term_dict,
1088                                  attributeSet,
1089                                  zm, space_split, display_term,
1090                                  &relation_error))
1091             {
1092                 if (relation_error)
1093                 {
1094                     zebra_setError(zh, relation_error, 0);
1095                     return ZEBRA_FAIL;
1096                 }
1097                 *term_sub = 0;
1098                 return ZEBRA_OK;
1099             }
1100             break;
1101         case 1:          /* right truncation */
1102             wrbuf_putc(term_dict, '(');
1103             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1104             {
1105                 *term_sub = 0;
1106                 return ZEBRA_OK;
1107             }
1108             wrbuf_puts(term_dict, ".*)");
1109             break;
1110         case 2:          /* left truncation */
1111             wrbuf_puts(term_dict, "(.*");
1112             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1113             {
1114                 *term_sub = 0;
1115                 return ZEBRA_OK;
1116             }
1117             wrbuf_putc(term_dict, ')');
1118             break;
1119         case 3:          /* left&right truncation */
1120             wrbuf_puts(term_dict, "(.*");
1121             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1122             {
1123                 *term_sub = 0;
1124                 return ZEBRA_OK;
1125             }
1126             wrbuf_puts(term_dict, ".*)");
1127             break;
1128         case 101:        /* process # in term */
1129             wrbuf_putc(term_dict, '(');
1130             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1131             {
1132                 *term_sub = 0;
1133                 return ZEBRA_OK;
1134             }
1135             wrbuf_puts(term_dict, ")");
1136             break;
1137         case 102:        /* Regexp-1 */
1138             wrbuf_putc(term_dict, '(');
1139             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1140             {
1141                 *term_sub = 0;
1142                 return ZEBRA_OK;
1143             }
1144             wrbuf_putc(term_dict, ')');
1145             break;
1146         case 103:       /* Regexp-2 */
1147             regex_range = 1;
1148             wrbuf_putc(term_dict, '(');
1149             if (!term_103(zm, &termp, term_dict, &regex_range,
1150                           space_split, display_term))
1151             {
1152                 *term_sub = 0;
1153                 return ZEBRA_OK;
1154             }
1155             wrbuf_putc(term_dict, ')');
1156             break;
1157         case 104:        /* process # and ! in term */
1158             wrbuf_putc(term_dict, '(');
1159             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1160             {
1161                 *term_sub = 0;
1162                 return ZEBRA_OK;
1163             }
1164             wrbuf_putc(term_dict, ')');
1165             break;
1166         case 105:        /* process * and ! in term */
1167             wrbuf_putc(term_dict, '(');
1168             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1169             {
1170                 *term_sub = 0;
1171                 return ZEBRA_OK;
1172             }
1173             wrbuf_putc(term_dict, ')');
1174             break;
1175         case 106:        /* process * and ! in term */
1176             wrbuf_putc(term_dict, '(');
1177             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1178             {
1179                 *term_sub = 0;
1180                 return ZEBRA_OK;
1181             }
1182             wrbuf_putc(term_dict, ')');
1183             break;
1184         default:
1185             zebra_setError_zint(zh,
1186                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1187                                 truncation_value);
1188             return ZEBRA_FAIL;
1189         }
1190     }
1191     if (1)
1192     {
1193         char buf[1000];
1194         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1195         esc_str(buf, sizeof(buf), input, strlen(input));
1196     }
1197     {
1198         WRBUF pr_wr = wrbuf_alloc();
1199
1200         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1201         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1202         wrbuf_destroy(pr_wr);
1203     }
1204     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1205                          grep_info, &max_pos, 
1206                          ord_len /* number of "exact" chars */,
1207                          grep_handle);
1208     if (r == 1)
1209         zebra_set_partial_result(zh);
1210     else if (r)
1211         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1212     *term_sub = termp;
1213     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1214     return ZEBRA_OK;
1215 }
1216
1217
1218
1219 static void grep_info_delete(struct grep_info *grep_info)
1220 {
1221 #ifdef TERM_COUNT
1222     xfree(grep_info->term_no);
1223 #endif
1224     xfree(grep_info->isam_p_buf);
1225 }
1226
1227 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1228                                    Z_AttributesPlusTerm *zapt,
1229                                    struct grep_info *grep_info,
1230                                    const char *index_type)
1231 {
1232 #ifdef TERM_COUNT
1233     grep_info->term_no = 0;
1234 #endif
1235     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1236     grep_info->isam_p_size = 0;
1237     grep_info->isam_p_buf = NULL;
1238     grep_info->zh = zh;
1239     grep_info->index_type = index_type;
1240     grep_info->termset = 0;
1241     if (zapt)
1242     {
1243         AttrType truncmax;
1244         int truncmax_value;
1245
1246         attr_init_APT(&truncmax, zapt, 13);
1247         truncmax_value = attr_find(&truncmax, NULL);
1248         if (truncmax_value != -1)
1249             grep_info->trunc_max = truncmax_value;
1250     }
1251     if (zapt)
1252     {
1253         AttrType termset;
1254         int termset_value_numeric;
1255         const char *termset_value_string;
1256
1257         attr_init_APT(&termset, zapt, 8);
1258         termset_value_numeric =
1259             attr_find_ex(&termset, NULL, &termset_value_string);
1260         if (termset_value_numeric != -1)
1261         {
1262 #if TERMSET_DISABLE
1263             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1264             return ZEBRA_FAIL;
1265 #else
1266             char resname[32];
1267             const char *termset_name = 0;
1268             if (termset_value_numeric != -2)
1269             {
1270                 
1271                 sprintf(resname, "%d", termset_value_numeric);
1272                 termset_name = resname;
1273             }
1274             else
1275                 termset_name = termset_value_string;
1276             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1277             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1278             if (!grep_info->termset)
1279             {
1280                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1281                 return ZEBRA_FAIL;
1282             }
1283 #endif
1284         }
1285     }
1286     return ZEBRA_OK;
1287 }
1288
1289 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1290                                      Z_AttributesPlusTerm *zapt,
1291                                      const char *termz,
1292                                      const Odr_oid *attributeSet,
1293                                      NMEM stream,
1294                                      const char *index_type, int complete_flag,
1295                                      const char *rank_type,
1296                                      const char *xpath_use,
1297                                      NMEM rset_nmem,
1298                                      RSET **result_sets, int *num_result_sets,
1299                                      struct rset_key_control *kc,
1300                                      zebra_map_t zm)
1301 {
1302     struct grep_info grep_info;
1303     const char *termp = termz;
1304     int alloc_sets = 0;
1305     
1306     *num_result_sets = 0;
1307     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1308         return ZEBRA_FAIL;
1309     while (1)
1310     { 
1311         ZEBRA_RES res;
1312
1313         if (alloc_sets == *num_result_sets)
1314         {
1315             int add = 10;
1316             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1317                                               sizeof(*rnew));
1318             if (alloc_sets)
1319                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1320             alloc_sets = alloc_sets + add;
1321             *result_sets = rnew;
1322         }
1323         res = search_term(zh, zapt, &termp, attributeSet,
1324                           stream, &grep_info,
1325                           index_type, complete_flag,
1326                           rank_type,
1327                           xpath_use, rset_nmem,
1328                           &(*result_sets)[*num_result_sets],
1329                           kc, zm);
1330         if (res != ZEBRA_OK)
1331         {
1332             int i;
1333             for (i = 0; i < *num_result_sets; i++)
1334                 rset_delete((*result_sets)[i]);
1335             grep_info_delete(&grep_info);
1336             return res;
1337         }
1338         if ((*result_sets)[*num_result_sets] == 0)
1339             break;
1340         (*num_result_sets)++;
1341
1342         if (!*termp)
1343             break;
1344     }
1345     grep_info_delete(&grep_info);
1346     return ZEBRA_OK;
1347 }
1348                                
1349 /**
1350    \brief Create result set(s) for list of terms
1351    \param zh Zebra Handle
1352    \param zapt Attributes Plust Term (RPN leaf)
1353    \param termz term as used in query but converted to UTF-8
1354    \param attributeSet default attribute set
1355    \param stream memory for result
1356    \param index_type register type ("w", "p",..)
1357    \param complete_flag whether it's phrases or not
1358    \param rank_type term flags for ranking
1359    \param xpath_use use attribute for X-Path (-1 for no X-path)
1360    \param rset_nmem memory for result sets
1361    \param result_sets output result set for each term in list (output)
1362    \param num_result_sets number of output result sets
1363    \param kc rset key control to be used for created result sets
1364 */
1365 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1366                                    Z_AttributesPlusTerm *zapt,
1367                                    const char *termz,
1368                                    const Odr_oid *attributeSet,
1369                                    NMEM stream,
1370                                    const char *index_type, int complete_flag,
1371                                    const char *rank_type,
1372                                    const char *xpath_use,
1373                                    NMEM rset_nmem,
1374                                    RSET **result_sets, int *num_result_sets,
1375                                    struct rset_key_control *kc)
1376 {
1377     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1378     if (zebra_maps_is_icu(zm))
1379         zebra_map_tokenize_start(zm, termz, strlen(termz));
1380     return search_terms_chrmap(zh, zapt, termz, attributeSet,
1381                                stream, index_type, complete_flag,
1382                                rank_type, xpath_use,
1383                                rset_nmem, result_sets, num_result_sets,
1384                                kc, zm);
1385 }
1386
1387
1388 /** \brief limit a search by position - returns result set
1389  */
1390 static ZEBRA_RES search_position(ZebraHandle zh,
1391                                  Z_AttributesPlusTerm *zapt,
1392                                  const Odr_oid *attributeSet,
1393                                  const char *index_type,
1394                                  NMEM rset_nmem,
1395                                  RSET *rset,
1396                                  struct rset_key_control *kc)
1397 {
1398     int position_value;
1399     AttrType position;
1400     int ord = -1;
1401     char ord_buf[32];
1402     char term_dict[100];
1403     int ord_len;
1404     char *val;
1405     ISAM_P isam_p;
1406     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1407     
1408     attr_init_APT(&position, zapt, 3);
1409     position_value = attr_find(&position, NULL);
1410     switch(position_value)
1411     {
1412     case 3:
1413     case -1:
1414         return ZEBRA_OK;
1415     case 1:
1416     case 2:
1417         break;
1418     default:
1419         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1420                             position_value);
1421         return ZEBRA_FAIL;
1422     }
1423
1424
1425     if (!zebra_maps_is_first_in_field(zm))
1426     {
1427         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1428                             position_value);
1429         return ZEBRA_FAIL;
1430     }
1431
1432     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1433                           attributeSet, &ord) != ZEBRA_OK)
1434     {
1435         return ZEBRA_FAIL;
1436     }
1437     ord_len = key_SU_encode(ord, ord_buf);
1438     memcpy(term_dict, ord_buf, ord_len);
1439     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1440     val = dict_lookup(zh->reg->dict, term_dict);
1441     if (val)
1442     {
1443         assert(*val == sizeof(ISAM_P));
1444         memcpy(&isam_p, val+1, sizeof(isam_p));
1445
1446         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1447                                        isam_p, 0);
1448     }
1449     return ZEBRA_OK;
1450 }
1451
1452 /** \brief returns result set for phrase search
1453  */
1454 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1455                                        Z_AttributesPlusTerm *zapt,
1456                                        const char *termz_org,
1457                                        const Odr_oid *attributeSet,
1458                                        NMEM stream,
1459                                        const char *index_type,
1460                                        int complete_flag,
1461                                        const char *rank_type,
1462                                        const char *xpath_use,
1463                                        NMEM rset_nmem,
1464                                        RSET *rset,
1465                                        struct rset_key_control *kc)
1466 {
1467     RSET *result_sets = 0;
1468     int num_result_sets = 0;
1469     ZEBRA_RES res =
1470         search_terms_list(zh, zapt, termz_org, attributeSet,
1471                           stream, index_type, complete_flag,
1472                           rank_type, xpath_use,
1473                           rset_nmem,
1474                           &result_sets, &num_result_sets, kc);
1475     
1476     if (res != ZEBRA_OK)
1477         return res;
1478
1479     if (num_result_sets > 0)
1480     {
1481         RSET first_set = 0;
1482         res = search_position(zh, zapt, attributeSet, 
1483                               index_type,
1484                               rset_nmem, &first_set,
1485                               kc);
1486         if (res != ZEBRA_OK)
1487         {
1488             int i;
1489             for (i = 0; i<num_result_sets; i++)
1490                 rset_delete(result_sets[i]);
1491             return res;
1492         }
1493         if (first_set)
1494         {
1495             RSET *nsets = nmem_malloc(stream,
1496                                       sizeof(RSET) * (num_result_sets+1));
1497             nsets[0] = first_set;
1498             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1499             result_sets = nsets;
1500             num_result_sets++;
1501         }
1502     }
1503     if (num_result_sets == 0)
1504         *rset = rset_create_null(rset_nmem, kc, 0); 
1505     else if (num_result_sets == 1)
1506         *rset = result_sets[0];
1507     else
1508         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1509                                  num_result_sets, result_sets,
1510                                  1 /* ordered */, 0 /* exclusion */,
1511                                  3 /* relation */, 1 /* distance */);
1512     if (!*rset)
1513         return ZEBRA_FAIL;
1514     return ZEBRA_OK;
1515 }
1516
1517 /** \brief returns result set for or-list search
1518  */
1519 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1520                                         Z_AttributesPlusTerm *zapt,
1521                                         const char *termz_org,
1522                                         const Odr_oid *attributeSet,
1523                                         NMEM stream,
1524                                         const char *index_type, 
1525                                         int complete_flag,
1526                                         const char *rank_type,
1527                                         const char *xpath_use,
1528                                         NMEM rset_nmem,
1529                                         RSET *rset,
1530                                         struct rset_key_control *kc)
1531 {
1532     RSET *result_sets = 0;
1533     int num_result_sets = 0;
1534     int i;
1535     ZEBRA_RES res =
1536         search_terms_list(zh, zapt, termz_org, attributeSet,
1537                           stream, index_type, complete_flag,
1538                           rank_type, xpath_use,
1539                           rset_nmem,
1540                           &result_sets, &num_result_sets, kc);
1541     if (res != ZEBRA_OK)
1542         return res;
1543
1544     for (i = 0; i<num_result_sets; i++)
1545     {
1546         RSET first_set = 0;
1547         res = search_position(zh, zapt, attributeSet, 
1548                               index_type,
1549                               rset_nmem, &first_set,
1550                               kc);
1551         if (res != ZEBRA_OK)
1552         {
1553             for (i = 0; i<num_result_sets; i++)
1554                 rset_delete(result_sets[i]);
1555             return res;
1556         }
1557
1558         if (first_set)
1559         {
1560             RSET tmp_set[2];
1561
1562             tmp_set[0] = first_set;
1563             tmp_set[1] = result_sets[i];
1564             
1565             result_sets[i] = rset_create_prox(
1566                 rset_nmem, kc, kc->scope,
1567                 2, tmp_set,
1568                 1 /* ordered */, 0 /* exclusion */,
1569                 3 /* relation */, 1 /* distance */);
1570         }
1571     }
1572     if (num_result_sets == 0)
1573         *rset = rset_create_null(rset_nmem, kc, 0); 
1574     else if (num_result_sets == 1)
1575         *rset = result_sets[0];
1576     else
1577         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1578                                num_result_sets, result_sets);
1579     if (!*rset)
1580         return ZEBRA_FAIL;
1581     return ZEBRA_OK;
1582 }
1583
1584 /** \brief returns result set for and-list search
1585  */
1586 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1587                                          Z_AttributesPlusTerm *zapt,
1588                                          const char *termz_org,
1589                                          const Odr_oid *attributeSet,
1590                                          NMEM stream,
1591                                          const char *index_type, 
1592                                          int complete_flag,
1593                                          const char *rank_type, 
1594                                          const char *xpath_use,
1595                                          NMEM rset_nmem,
1596                                          RSET *rset,
1597                                          struct rset_key_control *kc)
1598 {
1599     RSET *result_sets = 0;
1600     int num_result_sets = 0;
1601     int i;
1602     ZEBRA_RES res =
1603         search_terms_list(zh, zapt, termz_org, attributeSet,
1604                           stream, index_type, complete_flag,
1605                           rank_type, xpath_use,
1606                           rset_nmem,
1607                           &result_sets, &num_result_sets,
1608                           kc);
1609     if (res != ZEBRA_OK)
1610         return res;
1611     for (i = 0; i<num_result_sets; i++)
1612     {
1613         RSET first_set = 0;
1614         res = search_position(zh, zapt, attributeSet, 
1615                               index_type,
1616                               rset_nmem, &first_set,
1617                               kc);
1618         if (res != ZEBRA_OK)
1619         {
1620             for (i = 0; i<num_result_sets; i++)
1621                 rset_delete(result_sets[i]);
1622             return res;
1623         }
1624
1625         if (first_set)
1626         {
1627             RSET tmp_set[2];
1628
1629             tmp_set[0] = first_set;
1630             tmp_set[1] = result_sets[i];
1631             
1632             result_sets[i] = rset_create_prox(
1633                 rset_nmem, kc, kc->scope,
1634                 2, tmp_set,
1635                 1 /* ordered */, 0 /* exclusion */,
1636                 3 /* relation */, 1 /* distance */);
1637         }
1638     }
1639
1640
1641     if (num_result_sets == 0)
1642         *rset = rset_create_null(rset_nmem, kc, 0); 
1643     else if (num_result_sets == 1)
1644         *rset = result_sets[0];
1645     else
1646         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1647                                 num_result_sets, result_sets);
1648     if (!*rset)
1649         return ZEBRA_FAIL;
1650     return ZEBRA_OK;
1651 }
1652
1653 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1654                             const char **term_sub,
1655                             WRBUF term_dict,
1656                             const Odr_oid *attributeSet,
1657                             struct grep_info *grep_info,
1658                             int *max_pos,
1659                             zebra_map_t zm,
1660                             WRBUF display_term,
1661                             int *error_code)
1662 {
1663     AttrType relation;
1664     int relation_value;
1665     int term_value;
1666     int r;
1667     WRBUF term_num = wrbuf_alloc();
1668
1669     *error_code = 0;
1670     attr_init_APT(&relation, zapt, 2);
1671     relation_value = attr_find(&relation, NULL);
1672
1673     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1674
1675     switch (relation_value)
1676     {
1677     case 1:
1678         yaz_log(log_level_rpn, "Relation <");
1679         if (!term_100(zm, term_sub, term_num, 1, display_term))
1680         { 
1681             wrbuf_destroy(term_num);
1682             return 0;
1683         }
1684         term_value = atoi(wrbuf_cstr(term_num));
1685         gen_regular_rel(term_dict, term_value-1, 1);
1686         break;
1687     case 2:
1688         yaz_log(log_level_rpn, "Relation <=");
1689         if (!term_100(zm, term_sub, term_num, 1, display_term))
1690         {
1691             wrbuf_destroy(term_num);
1692             return 0;
1693         }
1694         term_value = atoi(wrbuf_cstr(term_num));
1695         gen_regular_rel(term_dict, term_value, 1);
1696         break;
1697     case 4:
1698         yaz_log(log_level_rpn, "Relation >=");
1699         if (!term_100(zm, term_sub, term_num, 1, display_term))
1700         {
1701             wrbuf_destroy(term_num);
1702             return 0;
1703         }
1704         term_value = atoi(wrbuf_cstr(term_num));
1705         gen_regular_rel(term_dict, term_value, 0);
1706         break;
1707     case 5:
1708         yaz_log(log_level_rpn, "Relation >");
1709         if (!term_100(zm, term_sub, term_num, 1, display_term))
1710         {
1711             wrbuf_destroy(term_num);
1712             return 0;
1713         }
1714         term_value = atoi(wrbuf_cstr(term_num));
1715         gen_regular_rel(term_dict, term_value+1, 0);
1716         break;
1717     case -1:
1718     case 3:
1719         yaz_log(log_level_rpn, "Relation =");
1720         if (!term_100(zm, term_sub, term_num, 1, display_term))
1721         {
1722             wrbuf_destroy(term_num);
1723             return 0; 
1724         }
1725         term_value = atoi(wrbuf_cstr(term_num));
1726         wrbuf_printf(term_dict, "(0*%d)", term_value);
1727         break;
1728     case 103:
1729         /* term_tmp untouched.. */
1730         while (**term_sub != '\0')
1731             (*term_sub)++;
1732         break;
1733     default:
1734         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1735         wrbuf_destroy(term_num); 
1736         return 0;
1737     }
1738     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1739                          0, grep_info, max_pos, 0, grep_handle);
1740
1741     if (r == 1)
1742         zebra_set_partial_result(zh);
1743     else if (r)
1744         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1745     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1746     wrbuf_destroy(term_num);
1747     return 1;
1748 }
1749
1750 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1751                               const char **term_sub, 
1752                               WRBUF term_dict,
1753                               const Odr_oid *attributeSet, NMEM stream,
1754                               struct grep_info *grep_info,
1755                               const char *index_type, int complete_flag,
1756                               WRBUF display_term,
1757                               const char *xpath_use,
1758                               struct ord_list **ol)
1759 {
1760     const char *termp;
1761     struct rpn_char_map_info rcmi;
1762     int max_pos;
1763     int relation_error = 0;
1764     int ord, ord_len, i;
1765     char ord_buf[32];
1766     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1767     
1768     *ol = ord_list_create(stream);
1769
1770     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1771
1772     termp = *term_sub;
1773     
1774     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1775                           attributeSet, &ord) != ZEBRA_OK)
1776     {
1777         return ZEBRA_FAIL;
1778     }
1779     
1780     wrbuf_rewind(term_dict);
1781     
1782     *ol = ord_list_append(stream, *ol, ord);
1783     
1784     ord_len = key_SU_encode(ord, ord_buf);
1785     
1786     wrbuf_putc(term_dict, '(');
1787     for (i = 0; i < ord_len; i++)
1788     {
1789         wrbuf_putc(term_dict, 1);
1790         wrbuf_putc(term_dict, ord_buf[i]);
1791     }
1792     wrbuf_putc(term_dict, ')');
1793     
1794     if (!numeric_relation(zh, zapt, &termp, term_dict,
1795                           attributeSet, grep_info, &max_pos, zm,
1796                           display_term, &relation_error))
1797     {
1798         if (relation_error)
1799         {
1800             zebra_setError(zh, relation_error, 0);
1801             return ZEBRA_FAIL;
1802         }
1803         *term_sub = 0;
1804         return ZEBRA_OK;
1805     }
1806     *term_sub = termp;
1807     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1808     return ZEBRA_OK;
1809 }
1810
1811                                  
1812 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1813                                         Z_AttributesPlusTerm *zapt,
1814                                         const char *termz,
1815                                         const Odr_oid *attributeSet,
1816                                         NMEM stream,
1817                                         const char *index_type, 
1818                                         int complete_flag,
1819                                         const char *rank_type, 
1820                                         const char *xpath_use,
1821                                         NMEM rset_nmem,
1822                                         RSET *rset,
1823                                         struct rset_key_control *kc)
1824 {
1825     const char *termp = termz;
1826     RSET *result_sets = 0;
1827     int num_result_sets = 0;
1828     ZEBRA_RES res;
1829     struct grep_info grep_info;
1830     int alloc_sets = 0;
1831     zint hits_limit_value;
1832     const char *term_ref_id_str = 0;
1833
1834     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1835                           stream);
1836
1837     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1838     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1839         return ZEBRA_FAIL;
1840     while (1)
1841     { 
1842         struct ord_list *ol;
1843         WRBUF term_dict = wrbuf_alloc();
1844         WRBUF display_term = wrbuf_alloc();
1845         if (alloc_sets == num_result_sets)
1846         {
1847             int add = 10;
1848             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1849                                               sizeof(*rnew));
1850             if (alloc_sets)
1851                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1852             alloc_sets = alloc_sets + add;
1853             result_sets = rnew;
1854         }
1855         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1856         grep_info.isam_p_indx = 0;
1857         res = numeric_term(zh, zapt, &termp, term_dict,
1858                            attributeSet, stream, &grep_info,
1859                            index_type, complete_flag,
1860                            display_term, xpath_use, &ol);
1861         wrbuf_destroy(term_dict);
1862         if (res == ZEBRA_FAIL || termp == 0)
1863         {
1864             wrbuf_destroy(display_term);
1865             break;
1866         }
1867         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1868         result_sets[num_result_sets] =
1869             rset_trunc(zh, grep_info.isam_p_buf,
1870                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1871                        wrbuf_len(display_term), rank_type,
1872                        0 /* preserve position */,
1873                        zapt->term->which, rset_nmem, 
1874                        kc, kc->scope, ol, index_type,
1875                        hits_limit_value,
1876                        term_ref_id_str);
1877         wrbuf_destroy(display_term);
1878         if (!result_sets[num_result_sets])
1879             break;
1880         num_result_sets++;
1881         if (!*termp)
1882             break;
1883     }
1884     grep_info_delete(&grep_info);
1885
1886     if (res != ZEBRA_OK)
1887         return res;
1888     if (num_result_sets == 0)
1889         *rset = rset_create_null(rset_nmem, kc, 0);
1890     else if (num_result_sets == 1)
1891         *rset = result_sets[0];
1892     else
1893         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1894                                 num_result_sets, result_sets);
1895     if (!*rset)
1896         return ZEBRA_FAIL;
1897     return ZEBRA_OK;
1898 }
1899
1900 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1901                                       Z_AttributesPlusTerm *zapt,
1902                                       const char *termz,
1903                                       const Odr_oid *attributeSet,
1904                                       NMEM stream,
1905                                       const char *rank_type, NMEM rset_nmem,
1906                                       RSET *rset,
1907                                       struct rset_key_control *kc)
1908 {
1909     Record rec;
1910     zint sysno = atozint(termz);
1911     
1912     if (sysno <= 0)
1913         sysno = 0;
1914     rec = rec_get(zh->reg->records, sysno);
1915     if (!rec)
1916         sysno = 0;
1917
1918     rec_free(&rec);
1919
1920     if (sysno <= 0)
1921     {
1922         *rset = rset_create_null(rset_nmem, kc, 0);
1923     }
1924     else
1925     {
1926         RSFD rsfd;
1927         struct it_key key;
1928         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1929                                  res_get(zh->res, "setTmpDir"), 0);
1930         rsfd = rset_open(*rset, RSETF_WRITE);
1931         
1932         key.mem[0] = sysno;
1933         key.mem[1] = 1;
1934         key.len = 2;
1935         rset_write(rsfd, &key);
1936         rset_close(rsfd);
1937     }
1938     return ZEBRA_OK;
1939 }
1940
1941 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1942                                const Odr_oid *attributeSet, NMEM stream,
1943                                Z_SortKeySpecList *sort_sequence,
1944                                const char *rank_type,
1945                                NMEM rset_nmem,
1946                                RSET *rset,
1947                                struct rset_key_control *kc)
1948 {
1949     int i;
1950     int sort_relation_value;
1951     AttrType sort_relation_type;
1952     Z_SortKeySpec *sks;
1953     Z_SortKey *sk;
1954     char termz[20];
1955     
1956     attr_init_APT(&sort_relation_type, zapt, 7);
1957     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1958
1959     if (!sort_sequence->specs)
1960     {
1961         sort_sequence->num_specs = 10;
1962         sort_sequence->specs = (Z_SortKeySpec **)
1963             nmem_malloc(stream, sort_sequence->num_specs *
1964                         sizeof(*sort_sequence->specs));
1965         for (i = 0; i<sort_sequence->num_specs; i++)
1966             sort_sequence->specs[i] = 0;
1967     }
1968     if (zapt->term->which != Z_Term_general)
1969         i = 0;
1970     else
1971         i = atoi_n((char *) zapt->term->u.general->buf,
1972                    zapt->term->u.general->len);
1973     if (i >= sort_sequence->num_specs)
1974         i = 0;
1975     sprintf(termz, "%d", i);
1976
1977     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1978     sks->sortElement = (Z_SortElement *)
1979         nmem_malloc(stream, sizeof(*sks->sortElement));
1980     sks->sortElement->which = Z_SortElement_generic;
1981     sk = sks->sortElement->u.generic = (Z_SortKey *)
1982         nmem_malloc(stream, sizeof(*sk));
1983     sk->which = Z_SortKey_sortAttributes;
1984     sk->u.sortAttributes = (Z_SortAttributes *)
1985         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1986
1987     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1988     sk->u.sortAttributes->list = zapt->attributes;
1989
1990     sks->sortRelation = (int *)
1991         nmem_malloc(stream, sizeof(*sks->sortRelation));
1992     if (sort_relation_value == 1)
1993         *sks->sortRelation = Z_SortKeySpec_ascending;
1994     else if (sort_relation_value == 2)
1995         *sks->sortRelation = Z_SortKeySpec_descending;
1996     else 
1997         *sks->sortRelation = Z_SortKeySpec_ascending;
1998
1999     sks->caseSensitivity = (int *)
2000         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2001     *sks->caseSensitivity = 0;
2002
2003     sks->which = Z_SortKeySpec_null;
2004     sks->u.null = odr_nullval ();
2005     sort_sequence->specs[i] = sks;
2006     *rset = rset_create_null(rset_nmem, kc, 0);
2007     return ZEBRA_OK;
2008 }
2009
2010
2011 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2012                            const Odr_oid *attributeSet,
2013                            struct xpath_location_step *xpath, int max,
2014                            NMEM mem)
2015 {
2016     const Odr_oid *curAttributeSet = attributeSet;
2017     AttrType use;
2018     const char *use_string = 0;
2019     
2020     attr_init_APT(&use, zapt, 1);
2021     attr_find_ex(&use, &curAttributeSet, &use_string);
2022
2023     if (!use_string || *use_string != '/')
2024         return -1;
2025
2026     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2027 }
2028  
2029                
2030
2031 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2032                         const char *index_type, const char *term, 
2033                         const char *xpath_use,
2034                         NMEM rset_nmem,
2035                         struct rset_key_control *kc)
2036 {
2037     struct grep_info grep_info;
2038     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2039                                            zinfo_index_category_index,
2040                                            index_type, xpath_use);
2041     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2042         return rset_create_null(rset_nmem, kc, 0);
2043     
2044     if (ord < 0)
2045         return rset_create_null(rset_nmem, kc, 0);
2046     else
2047     {
2048         int i, r, max_pos;
2049         char ord_buf[32];
2050         RSET rset;
2051         WRBUF term_dict = wrbuf_alloc();
2052         int ord_len = key_SU_encode(ord, ord_buf);
2053         int term_type = Z_Term_characterString;
2054         const char *flags = "void";
2055
2056         wrbuf_putc(term_dict, '(');
2057         for (i = 0; i<ord_len; i++)
2058         {
2059             wrbuf_putc(term_dict, 1);
2060             wrbuf_putc(term_dict, ord_buf[i]);
2061         }
2062         wrbuf_putc(term_dict, ')');
2063         wrbuf_puts(term_dict, term);
2064         
2065         grep_info.isam_p_indx = 0;
2066         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2067                              &grep_info, &max_pos, 0, grep_handle);
2068         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2069                 grep_info.isam_p_indx);
2070         rset = rset_trunc(zh, grep_info.isam_p_buf,
2071                           grep_info.isam_p_indx, term, strlen(term),
2072                           flags, 1, term_type, rset_nmem,
2073                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2074                           0 /* term_ref_id_str */);
2075         grep_info_delete(&grep_info);
2076         wrbuf_destroy(term_dict);
2077         return rset;
2078     }
2079 }
2080
2081 static
2082 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2083                            NMEM stream, const char *rank_type, RSET rset,
2084                            int xpath_len, struct xpath_location_step *xpath,
2085                            NMEM rset_nmem,
2086                            RSET *rset_out,
2087                            struct rset_key_control *kc)
2088 {
2089     int i;
2090     int always_matches = rset ? 0 : 1;
2091
2092     if (xpath_len < 0)
2093     {
2094         *rset_out = rset;
2095         return ZEBRA_OK;
2096     }
2097
2098     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2099     for (i = 0; i<xpath_len; i++)
2100     {
2101         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2102
2103     }
2104
2105     /*
2106     //a    ->    a/.*
2107     //a/b  ->    b/a/.*
2108     /a     ->    a/
2109     /a/b   ->    b/a/
2110
2111     /      ->    none
2112
2113     a[@attr = value]/b[@other = othervalue]
2114
2115     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2116     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2117     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2118     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2119     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2120     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2121       
2122     */
2123
2124     dict_grep_cmap(zh->reg->dict, 0, 0);
2125     
2126     {
2127         int level = xpath_len;
2128         int first_path = 1;
2129         
2130         while (--level >= 0)
2131         {
2132             WRBUF xpath_rev = wrbuf_alloc();
2133             int i;
2134             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2135
2136             for (i = level; i >= 1; --i)
2137             {
2138                 const char *cp = xpath[i].part;
2139                 if (*cp)
2140                 {
2141                     for (; *cp; cp++)
2142                     {
2143                         if (*cp == '*')
2144                             wrbuf_puts(xpath_rev, "[^/]*");
2145                         else if (*cp == ' ')
2146                             wrbuf_puts(xpath_rev, "\001 ");
2147                         else
2148                             wrbuf_putc(xpath_rev, *cp);
2149
2150                         /* wrbuf_putc does not null-terminate , but
2151                            wrbuf_puts below ensures it does.. so xpath_rev
2152                            is OK iff length is > 0 */
2153                     }
2154                     wrbuf_puts(xpath_rev, "/");
2155                 }
2156                 else if (i == 1)  /* // case */
2157                     wrbuf_puts(xpath_rev, ".*");
2158             }
2159             if (xpath[level].predicate &&
2160                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2161                 xpath[level].predicate->u.relation.name[0])
2162             {
2163                 WRBUF wbuf = wrbuf_alloc();
2164                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2165                 if (xpath[level].predicate->u.relation.value)
2166                 {
2167                     const char *cp = xpath[level].predicate->u.relation.value;
2168                     wrbuf_putc(wbuf, '=');
2169                     
2170                     while (*cp)
2171                     {
2172                         if (strchr(REGEX_CHARS, *cp))
2173                             wrbuf_putc(wbuf, '\\');
2174                         wrbuf_putc(wbuf, *cp);
2175                         cp++;
2176                     }
2177                 }
2178                 rset_attr = xpath_trunc(
2179                     zh, stream, "0", wrbuf_cstr(wbuf), 
2180                     ZEBRA_XPATH_ATTR_NAME, 
2181                     rset_nmem, kc);
2182                 wrbuf_destroy(wbuf);
2183             } 
2184             else 
2185             {
2186                 if (!first_path)
2187                 {
2188                     wrbuf_destroy(xpath_rev);
2189                     continue;
2190                 }
2191             }
2192             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2193                     wrbuf_cstr(xpath_rev));
2194             if (wrbuf_len(xpath_rev))
2195             {
2196                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2197                                              wrbuf_cstr(xpath_rev),
2198                                              ZEBRA_XPATH_ELM_BEGIN, 
2199                                              rset_nmem, kc);
2200                 if (always_matches)
2201                     rset = rset_start_tag;
2202                 else
2203                 {
2204                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2205                                                wrbuf_cstr(xpath_rev),
2206                                                ZEBRA_XPATH_ELM_END, 
2207                                                rset_nmem, kc);
2208                     
2209                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2210                                                rset_start_tag, rset,
2211                                                rset_end_tag, rset_attr);
2212                 }
2213             }
2214             wrbuf_destroy(xpath_rev);
2215             first_path = 0;
2216         }
2217     }
2218     *rset_out = rset;
2219     return ZEBRA_OK;
2220 }
2221
2222 #define MAX_XPATH_STEPS 10
2223
2224 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2225                                      Z_AttributesPlusTerm *zapt,
2226                                      const Odr_oid *attributeSet, NMEM stream,
2227                                      Z_SortKeySpecList *sort_sequence,
2228                                      NMEM rset_nmem,
2229                                      RSET *rset,
2230                                      struct rset_key_control *kc);
2231
2232 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2233                                 const Odr_oid *attributeSet, NMEM stream,
2234                                 Z_SortKeySpecList *sort_sequence,
2235                                 int num_bases, const char **basenames, 
2236                                 NMEM rset_nmem,
2237                                 RSET *rset,
2238                                 struct rset_key_control *kc)
2239 {
2240     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2241     ZEBRA_RES res = ZEBRA_OK;
2242     int i;
2243     for (i = 0; i < num_bases; i++)
2244     {
2245
2246         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2247         {
2248             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2249                            basenames[i]);
2250             res = ZEBRA_FAIL;
2251             break;
2252         }
2253         res = rpn_search_database(zh, zapt, attributeSet, stream,
2254                                   sort_sequence,
2255                                   rset_nmem, rsets+i, kc);
2256         if (res != ZEBRA_OK)
2257             break;
2258     }
2259     if (res != ZEBRA_OK)
2260     {   /* must clean up the already created sets */
2261         while (--i >= 0)
2262             rset_delete(rsets[i]);
2263         *rset = 0;
2264     }
2265     else 
2266     {
2267         if (num_bases == 1)
2268             *rset = rsets[0];
2269         else if (num_bases == 0)
2270             *rset = rset_create_null(rset_nmem, kc, 0); 
2271         else
2272             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2273                                    num_bases, rsets);
2274     }
2275     return res;
2276 }
2277
2278 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2279                                      Z_AttributesPlusTerm *zapt,
2280                                      const Odr_oid *attributeSet, NMEM stream,
2281                                      Z_SortKeySpecList *sort_sequence,
2282                                      NMEM rset_nmem,
2283                                      RSET *rset,
2284                                      struct rset_key_control *kc)
2285 {
2286     ZEBRA_RES res = ZEBRA_OK;
2287     const char *index_type;
2288     char *search_type = NULL;
2289     char rank_type[128];
2290     int complete_flag;
2291     int sort_flag;
2292     char termz[IT_MAX_WORD+1];
2293     int xpath_len;
2294     const char *xpath_use = 0;
2295     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2296
2297     if (!log_level_set)
2298     {
2299         log_level_rpn = yaz_log_module_level("rpn");
2300         log_level_set = 1;
2301     }
2302     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2303                     rank_type, &complete_flag, &sort_flag);
2304     
2305     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2306     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2307     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2308     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2309
2310     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2311         return ZEBRA_FAIL;
2312
2313     if (sort_flag)
2314         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2315                              rank_type, rset_nmem, rset, kc);
2316     /* consider if an X-Path query is used */
2317     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2318                                 xpath, MAX_XPATH_STEPS, stream);
2319     if (xpath_len >= 0)
2320     {
2321         if (xpath[xpath_len-1].part[0] == '@') 
2322             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2323         else
2324             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2325
2326         if (1)
2327         {
2328             AttrType relation;
2329             int relation_value;
2330
2331             attr_init_APT(&relation, zapt, 2);
2332             relation_value = attr_find(&relation, NULL);
2333
2334             if (relation_value == 103) /* alwaysmatches */
2335             {
2336                 *rset = 0; /* signal no "term" set */
2337                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2338                                         xpath_len, xpath, rset_nmem, rset, kc);
2339             }
2340         }
2341     }
2342
2343     /* search using one of the various search type strategies
2344        termz is our UTF-8 search term
2345        attributeSet is top-level default attribute set 
2346        stream is ODR for search
2347        reg_id is the register type
2348        complete_flag is 1 for complete subfield, 0 for incomplete
2349        xpath_use is use-attribute to be used for X-Path search, 0 for none
2350     */
2351     if (!strcmp(search_type, "phrase"))
2352     {
2353         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2354                                     index_type, complete_flag, rank_type,
2355                                     xpath_use,
2356                                     rset_nmem,
2357                                     rset, kc);
2358     }
2359     else if (!strcmp(search_type, "and-list"))
2360     {
2361         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2362                                       index_type, complete_flag, rank_type,
2363                                       xpath_use,
2364                                       rset_nmem,
2365                                       rset, kc);
2366     }
2367     else if (!strcmp(search_type, "or-list"))
2368     {
2369         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2370                                      index_type, complete_flag, rank_type,
2371                                      xpath_use,
2372                                      rset_nmem,
2373                                      rset, kc);
2374     }
2375     else if (!strcmp(search_type, "local"))
2376     {
2377         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2378                                    rank_type, rset_nmem, rset, kc);
2379     }
2380     else if (!strcmp(search_type, "numeric"))
2381     {
2382         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2383                                      index_type, complete_flag, rank_type,
2384                                      xpath_use,
2385                                      rset_nmem,
2386                                      rset, kc);
2387     }
2388     else
2389     {
2390         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2391         res = ZEBRA_FAIL;
2392     }
2393     if (res != ZEBRA_OK)
2394         return res;
2395     if (!*rset)
2396         return ZEBRA_FAIL;
2397     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2398                             xpath_len, xpath, rset_nmem, rset, kc);
2399 }
2400
2401 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2402                                       const Odr_oid *attributeSet, 
2403                                       NMEM stream, NMEM rset_nmem,
2404                                       Z_SortKeySpecList *sort_sequence,
2405                                       int num_bases, const char **basenames,
2406                                       RSET **result_sets, int *num_result_sets,
2407                                       Z_Operator *parent_op,
2408                                       struct rset_key_control *kc);
2409
2410 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2411                                    zint *approx_limit)
2412 {
2413     ZEBRA_RES res = ZEBRA_OK;
2414     if (zs->which == Z_RPNStructure_complex)
2415     {
2416         if (res == ZEBRA_OK)
2417             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2418                                            approx_limit);
2419         if (res == ZEBRA_OK)
2420             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2421                                            approx_limit);
2422     }
2423     else if (zs->which == Z_RPNStructure_simple)
2424     {
2425         if (zs->u.simple->which == Z_Operand_APT)
2426         {
2427             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2428             AttrType global_hits_limit_attr;
2429             int l;
2430             
2431             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2432             
2433             l = attr_find(&global_hits_limit_attr, NULL);
2434             if (l != -1)
2435                 *approx_limit = l;
2436         }
2437     }
2438     return res;
2439 }
2440
2441 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2442                          const Odr_oid *attributeSet, 
2443                          NMEM stream, NMEM rset_nmem,
2444                          Z_SortKeySpecList *sort_sequence,
2445                          int num_bases, const char **basenames,
2446                          RSET *result_set)
2447 {
2448     RSET *result_sets = 0;
2449     int num_result_sets = 0;
2450     ZEBRA_RES res;
2451     struct rset_key_control *kc = zebra_key_control_create(zh);
2452
2453     res = rpn_search_structure(zh, zs, attributeSet,
2454                                stream, rset_nmem,
2455                                sort_sequence, 
2456                                num_bases, basenames,
2457                                &result_sets, &num_result_sets,
2458                                0 /* no parent op */,
2459                                kc);
2460     if (res != ZEBRA_OK)
2461     {
2462         int i;
2463         for (i = 0; i<num_result_sets; i++)
2464             rset_delete(result_sets[i]);
2465         *result_set = 0;
2466     }
2467     else
2468     {
2469         assert(num_result_sets == 1);
2470         assert(result_sets);
2471         assert(*result_sets);
2472         *result_set = *result_sets;
2473     }
2474     (*kc->dec)(kc);
2475     return res;
2476 }
2477
2478 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2479                                const Odr_oid *attributeSet, 
2480                                NMEM stream, NMEM rset_nmem,
2481                                Z_SortKeySpecList *sort_sequence,
2482                                int num_bases, const char **basenames,
2483                                RSET **result_sets, int *num_result_sets,
2484                                Z_Operator *parent_op,
2485                                struct rset_key_control *kc)
2486 {
2487     *num_result_sets = 0;
2488     if (zs->which == Z_RPNStructure_complex)
2489     {
2490         ZEBRA_RES res;
2491         Z_Operator *zop = zs->u.complex->roperator;
2492         RSET *result_sets_l = 0;
2493         int num_result_sets_l = 0;
2494         RSET *result_sets_r = 0;
2495         int num_result_sets_r = 0;
2496
2497         res = rpn_search_structure(zh, zs->u.complex->s1,
2498                                    attributeSet, stream, rset_nmem,
2499                                    sort_sequence,
2500                                    num_bases, basenames,
2501                                    &result_sets_l, &num_result_sets_l,
2502                                    zop, kc);
2503         if (res != ZEBRA_OK)
2504         {
2505             int i;
2506             for (i = 0; i<num_result_sets_l; i++)
2507                 rset_delete(result_sets_l[i]);
2508             return res;
2509         }
2510         res = rpn_search_structure(zh, zs->u.complex->s2,
2511                                    attributeSet, stream, rset_nmem,
2512                                    sort_sequence,
2513                                    num_bases, basenames,
2514                                    &result_sets_r, &num_result_sets_r,
2515                                    zop, kc);
2516         if (res != ZEBRA_OK)
2517         {
2518             int i;
2519             for (i = 0; i<num_result_sets_l; i++)
2520                 rset_delete(result_sets_l[i]);
2521             for (i = 0; i<num_result_sets_r; i++)
2522                 rset_delete(result_sets_r[i]);
2523             return res;
2524         }
2525
2526         /* make a new list of result for all children */
2527         *num_result_sets = num_result_sets_l + num_result_sets_r;
2528         *result_sets = nmem_malloc(stream, *num_result_sets * 
2529                                    sizeof(**result_sets));
2530         memcpy(*result_sets, result_sets_l, 
2531                num_result_sets_l * sizeof(**result_sets));
2532         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2533                num_result_sets_r * sizeof(**result_sets));
2534
2535         if (!parent_op || parent_op->which != zop->which
2536             || (zop->which != Z_Operator_and &&
2537                 zop->which != Z_Operator_or))
2538         {
2539             /* parent node different from this one (or non-present) */
2540             /* we must combine result sets now */
2541             RSET rset;
2542             switch (zop->which)
2543             {
2544             case Z_Operator_and:
2545                 rset = rset_create_and(rset_nmem, kc,
2546                                        kc->scope,
2547                                        *num_result_sets, *result_sets);
2548                 break;
2549             case Z_Operator_or:
2550                 rset = rset_create_or(rset_nmem, kc,
2551                                       kc->scope, 0, /* termid */
2552                                       *num_result_sets, *result_sets);
2553                 break;
2554             case Z_Operator_and_not:
2555                 rset = rset_create_not(rset_nmem, kc,
2556                                        kc->scope,
2557                                        (*result_sets)[0],
2558                                        (*result_sets)[1]);
2559                 break;
2560             case Z_Operator_prox:
2561                 if (zop->u.prox->which != Z_ProximityOperator_known)
2562                 {
2563                     zebra_setError(zh, 
2564                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2565                                    0);
2566                     return ZEBRA_FAIL;
2567                 }
2568                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2569                 {
2570                     zebra_setError_zint(zh,
2571                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2572                                         *zop->u.prox->u.known);
2573                     return ZEBRA_FAIL;
2574                 }
2575                 else
2576                 {
2577                     rset = rset_create_prox(rset_nmem, kc,
2578                                             kc->scope,
2579                                             *num_result_sets, *result_sets, 
2580                                             *zop->u.prox->ordered,
2581                                             (!zop->u.prox->exclusion ? 
2582                                              0 : *zop->u.prox->exclusion),
2583                                             *zop->u.prox->relationType,
2584                                             *zop->u.prox->distance );
2585                 }
2586                 break;
2587             default:
2588                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2589                 return ZEBRA_FAIL;
2590             }
2591             *num_result_sets = 1;
2592             *result_sets = nmem_malloc(stream, *num_result_sets * 
2593                                        sizeof(**result_sets));
2594             (*result_sets)[0] = rset;
2595         }
2596     }
2597     else if (zs->which == Z_RPNStructure_simple)
2598     {
2599         RSET rset;
2600         ZEBRA_RES res;
2601
2602         if (zs->u.simple->which == Z_Operand_APT)
2603         {
2604             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2605             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2606                                  attributeSet, stream, sort_sequence,
2607                                  num_bases, basenames, rset_nmem, &rset,
2608                                  kc);
2609             if (res != ZEBRA_OK)
2610                 return res;
2611         }
2612         else if (zs->u.simple->which == Z_Operand_resultSetId)
2613         {
2614             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2615             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2616             if (!rset)
2617             {
2618                 zebra_setError(zh, 
2619                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2620                                zs->u.simple->u.resultSetId);
2621                 return ZEBRA_FAIL;
2622             }
2623             rset_dup(rset);
2624         }
2625         else
2626         {
2627             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2628             return ZEBRA_FAIL;
2629         }
2630         *num_result_sets = 1;
2631         *result_sets = nmem_malloc(stream, *num_result_sets * 
2632                                    sizeof(**result_sets));
2633         (*result_sets)[0] = rset;
2634     }
2635     else
2636     {
2637         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2638         return ZEBRA_FAIL;
2639     }
2640     return ZEBRA_OK;
2641 }
2642
2643
2644
2645 /*
2646  * Local variables:
2647  * c-basic-offset: 4
2648  * indent-tabs-mode: nil
2649  * End:
2650  * vim: shiftwidth=4 tabstop=8 expandtab
2651  */
2652