f52150c4e22ba49f5817918a383441aa0a003755
[idzebra-moved-to-github.git] / index / zsets.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2010 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20
21 #include <stdio.h>
22 #include <assert.h>
23 #ifdef WIN32
24 #include <io.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include "index.h"
30 #include "rank.h"
31 #include <yaz/diagbib1.h>
32 #include <rset.h>
33
34 #define ZSET_SORT_MAX_LEVEL 10
35
36 struct zebra_set_term_entry {
37     int reg_type;
38     char *db;
39     char *index_name;
40     char *term;
41 };
42
43 struct zebra_set {
44     char *name;
45     RSET rset;
46     NMEM nmem;
47     NMEM rset_nmem; /* for creating the rsets in */
48     zint hits;
49     int num_bases;
50     const char **basenames;
51     Z_RPNQuery *rpn;
52     Z_SortKeySpecList *sortSpec;
53     struct zset_sort_info *sort_info;
54     struct zebra_set_term_entry *term_entries;
55     int term_entries_max;
56     struct zebra_set *next;
57     int locked;
58     int estimated_hit_count;
59
60     zint cache_position;  /* last position */
61     RSFD cache_rfd;       /* rfd (NULL if not existing) */
62     zint cache_psysno;    /* sysno for last position */
63     zint approx_limit;    /* limit before we do approx */
64 };
65
66 struct zset_sort_entry {
67     zint sysno;
68     int score;
69 };
70
71 struct zset_sort_info {
72     int max_entries;
73     int num_entries;
74     struct zset_sort_entry *all_entries;
75     struct zset_sort_entry **entries;
76 };
77
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
83
84 static void loglevels(void)
85 {
86     if (log_level_set)
87         return;
88     log_level_sort = yaz_log_module_level("sorting");
89     log_level_searchhits = yaz_log_module_level("searchhits");
90     log_level_searchterms = yaz_log_module_level("searchterms");
91     log_level_resultsets = yaz_log_module_level("resultsets");
92     log_level_set = 1;
93 }
94
95
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97                                  Z_RPNQuery *rpn, ZebraSet sset)
98 {
99     RSET rset = 0;
100     Z_SortKeySpecList *sort_sequence;
101     int sort_status, i;
102     ZEBRA_RES res = ZEBRA_OK;
103
104     sort_sequence = (Z_SortKeySpecList *)
105         nmem_malloc(nmem, sizeof(*sort_sequence));
106     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107     sort_sequence->specs = (Z_SortKeySpec **)
108         nmem_malloc(nmem, sort_sequence->num_specs *
109                     sizeof(*sort_sequence->specs));
110     for (i = 0; i<sort_sequence->num_specs; i++)
111         sort_sequence->specs[i] = 0;
112     
113     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
114
115     res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
116                          sset->approx_limit,
117                          nmem, rset_nmem,
118                          sort_sequence,
119                          sset->num_bases, sset->basenames,
120                          &rset);
121     if (res != ZEBRA_OK)
122     {
123         sset->rset = 0;
124         return res;
125     }
126     for (i = 0; sort_sequence->specs[i]; i++)
127         ;
128     sort_sequence->num_specs = i;
129     rset_set_hits_limit(rset, sset->approx_limit);
130
131     if (!i)
132     {
133         res = resultSetRank(zh, sset, rset, rset_nmem);
134     }
135     else
136     {
137         res = resultSetSortSingle(zh, nmem, sset, rset,
138                                   sort_sequence, &sort_status);
139     }
140     sset->rset = rset;
141     return res;
142 }
143
144
145 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
146                           int num_bases, char **basenames,
147                           const char *setname,
148                           zint *hits, int *estimated_hit_count)
149 {
150     ZebraSet zebraSet;
151     int i;
152     ZEBRA_RES res;
153
154     *hits = 0;
155     *estimated_hit_count = 0;
156
157     zebraSet = resultSetAdd(zh, setname, 1);
158     if (!zebraSet)
159         return ZEBRA_FAIL;
160     zebraSet->locked = 1;
161     zebraSet->rpn = 0;
162     zebraSet->nmem = m;
163     zebraSet->rset_nmem = nmem_create(); 
164
165     zebraSet->num_bases = num_bases;
166     zebraSet->basenames = 
167         nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
168     for (i = 0; i<num_bases; i++)
169         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
170
171     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
172                           rpn, zebraSet);
173     *hits = zebraSet->hits;
174     if (zebraSet->estimated_hit_count)
175         *estimated_hit_count = 1;
176
177     if (zebraSet->rset)
178         zebraSet->rpn = rpn;
179     zebraSet->locked = 0;
180     if (!zebraSet->rset)
181         return ZEBRA_FAIL;
182     return res;
183 }
184
185 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
186                       const char *db, const char *index_name, 
187                       const char *term)
188 {
189     assert(zh); /* compiler shut up */
190     if (!s->nmem)
191         s->nmem = nmem_create();
192     if (!s->term_entries)
193     {
194         int i;
195         s->term_entries_max = 1000;
196         s->term_entries =
197             nmem_malloc(s->nmem, s->term_entries_max * 
198                         sizeof(*s->term_entries));
199         for (i = 0; i < s->term_entries_max; i++)
200             s->term_entries[i].term = 0;
201     }
202     if (s->hits < s->term_entries_max)
203     {
204         s->term_entries[s->hits].reg_type = reg_type;
205         s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
206         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
207         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
208     }
209     (s->hits)++;
210 }
211
212 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
213 {
214     ZebraSet s;
215     int i;
216
217     for (s = zh->sets; s; s = s->next)
218         if (!strcmp(s->name, name))
219             break;
220     
221     if (!log_level_set)
222         loglevels();
223     if (s)
224     {
225         yaz_log(log_level_resultsets, "updating result set %s", name);
226         if (!ov || s->locked)
227             return NULL;
228         if (s->rset)
229         {
230             if (s->cache_rfd)
231                 rset_close(s->cache_rfd);
232             rset_delete(s->rset);
233         }
234         if (s->rset_nmem)
235             nmem_destroy(s->rset_nmem);
236         if (s->nmem)
237             nmem_destroy(s->nmem);
238     }
239     else
240     {
241         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
242
243         yaz_log(log_level_resultsets, "adding result set %s", name);
244         s = (ZebraSet) xmalloc(sizeof(*s));
245         s->next = zh->sets;
246         zh->sets = s;
247         s->name = xstrdup(name);
248
249         s->sort_info = (struct zset_sort_info *)
250             xmalloc(sizeof(*s->sort_info));
251         s->sort_info->max_entries = atoi(sort_max_str);
252         if (s->sort_info->max_entries < 2)
253             s->sort_info->max_entries = 2;
254
255         s->sort_info->entries = (struct zset_sort_entry **)
256             xmalloc(sizeof(*s->sort_info->entries) *
257                     s->sort_info->max_entries);
258         s->sort_info->all_entries = (struct zset_sort_entry *)
259             xmalloc(sizeof(*s->sort_info->all_entries) *
260                     s->sort_info->max_entries);
261         for (i = 0; i < s->sort_info->max_entries; i++)
262             s->sort_info->entries[i] = s->sort_info->all_entries + i;
263     }
264     s->locked = 0;
265     s->term_entries = 0;
266     s->hits = 0;
267     s->rset = 0;
268     s->rset_nmem = 0;
269     s->nmem = 0;
270     s->rpn = 0;
271     s->sortSpec = 0;
272     s->cache_position = 0;
273     s->cache_rfd = 0;
274     s->approx_limit = zh->approx_limit;
275     s->estimated_hit_count = 0;
276     return s;
277 }
278
279 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
280 {
281     ZebraSet s;
282
283     for (s = zh->sets; s; s = s->next)
284         if (!strcmp(s->name, name))
285         {
286             if (!s->term_entries && !s->rset && s->rpn)
287             {
288                 NMEM nmem = nmem_create();
289                 yaz_log(log_level_resultsets, "research %s", name);
290                 if (!s->rset_nmem)
291                     s->rset_nmem = nmem_create();
292                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
293                 if (s->rset && s->sortSpec)
294                 {
295                     int sort_status;
296                     yaz_log(log_level_resultsets, "resort %s", name);
297                     resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
298                                         &sort_status);
299                 }
300                 nmem_destroy(nmem);
301             }
302             return s;
303         }
304     return NULL;
305 }
306
307 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
308                                 const char ***basenames, int *num_bases)
309 {
310     ZebraSet sset = resultSetGet(zh, setname);
311     if (!sset)
312         return ZEBRA_FAIL;
313     *basenames = sset->basenames;
314     *num_bases = sset->num_bases;
315     return ZEBRA_OK;
316
317 }
318
319 void resultSetInvalidate(ZebraHandle zh)
320 {
321     ZebraSet s = zh->sets;
322     
323     yaz_log(log_level_resultsets, "invalidating result sets");
324     for (; s; s = s->next)
325     {
326         if (s->rset)
327         {
328             if (s->cache_rfd)
329                 rset_close(s->cache_rfd);
330             rset_delete(s->rset);
331         }
332         s->rset = 0;
333         s->cache_rfd = 0;
334         s->cache_position = 0;
335         if (s->rset_nmem)
336             nmem_destroy(s->rset_nmem);
337         s->rset_nmem=0;
338     }
339 }
340
341 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
342 {
343     ZebraSet * ss = &zh->sets;
344     int i;
345     
346     if (statuses)
347         for (i = 0; i<num; i++)
348             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
349     while (*ss)
350     {
351         int i = -1;
352         ZebraSet s = *ss;
353         if (num >= 0)
354         {
355             for (i = 0; i<num; i++)
356                 if (!strcmp(s->name, names[i]))
357                 {
358                     if (statuses)
359                         statuses[i] = Z_DeleteStatus_success;
360                     i = -1;
361                     break;
362                 }
363         }
364         if (i < 0)
365         {
366             *ss = s->next;
367             
368             xfree(s->sort_info->all_entries);
369             xfree(s->sort_info->entries);
370             xfree(s->sort_info);
371             
372             if (s->nmem)
373                 nmem_destroy(s->nmem);
374             if (s->rset)
375             {
376                 if (s->cache_rfd)
377                     rset_close(s->cache_rfd);
378                 rset_delete(s->rset);
379             }
380             if (s->rset_nmem)
381                 nmem_destroy(s->rset_nmem);
382             xfree(s->name);
383             xfree(s);
384         }
385         else
386             ss = &s->next;
387     }
388 }
389
390 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
391                                                  const char *name, 
392                                                  zint start, int num)
393 {
394     zint pos_small[10];
395     zint *pos = pos_small;
396     ZebraMetaRecord *mr;
397     int i;
398
399     if (num > 10000 || num <= 0)
400         return 0;
401
402     if (num > 10)
403         pos = xmalloc(sizeof(*pos) * num);
404     
405     for (i = 0; i<num; i++)
406         pos[i] = start+i;
407
408     mr = zebra_meta_records_create(zh, name, num, pos);
409     
410     if (num > 10)
411         xfree(pos);
412     return mr;
413 }
414
415 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
416                                            int num, zint *positions)
417 {
418     ZebraSet sset;
419     ZebraMetaRecord *sr = 0;
420     RSET rset;
421     int i;
422     struct zset_sort_info *sort_info;
423     size_t sysno_mem_index = 0;
424
425     if (zh->m_staticrank)
426         sysno_mem_index = 1;
427
428     if (!log_level_set)
429         loglevels();
430     if (!(sset = resultSetGet(zh, name)))
431         return NULL;
432     if (!(rset = sset->rset))
433     {
434         if (!sset->term_entries)
435             return 0;
436         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
437         for (i = 0; i<num; i++)
438         {
439             sr[i].sysno = 0;
440             sr[i].score = -1;
441             sr[i].term = 0;
442             sr[i].db = 0;
443
444             if (positions[i] <= sset->term_entries_max)
445             {
446                 sr[i].term = sset->term_entries[positions[i]-1].term;
447                 sr[i].db = sset->term_entries[positions[i]-1].db;
448             }
449         }
450     }
451     else
452     {
453         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
454         for (i = 0; i<num; i++)
455         {
456             sr[i].sysno = 0;
457             sr[i].score = -1;
458             sr[i].term = 0;
459             sr[i].db = 0;
460         }
461         sort_info = sset->sort_info;
462         if (sort_info)
463         {
464             zint position;
465             
466             for (i = 0; i<num; i++)
467             {
468                 position = positions[i];
469                 if (position > 0 && position <= sort_info->num_entries)
470                 {
471                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
472                             " (sorted)", position);
473                     sr[i].sysno = sort_info->entries[position-1]->sysno;
474                     sr[i].score = sort_info->entries[position-1]->score;
475                 }
476             }
477         }
478         /* did we really get all entries using sort ? */
479         for (i = 0; i<num; i++)
480         {
481             if (!sr[i].sysno)
482                 break;
483         }
484         if (i < num) /* nope, get the rest, unsorted - sorry */
485         {
486             zint position = 0;
487             int num_i = 0;
488             zint psysno = 0;
489             RSFD rfd;
490             struct it_key key;
491             
492             if (sort_info)
493                 position = sort_info->num_entries;
494             while (num_i < num && positions[num_i] <= position)
495                 num_i++;
496             
497             if (sset->cache_rfd &&
498                 num_i < num && positions[num_i] > sset->cache_position)
499             {
500                 position = sset->cache_position;
501                 rfd = sset->cache_rfd;
502                 psysno = sset->cache_psysno;
503             }
504             else
505             {
506                 if (sset->cache_rfd)
507                     rset_close(sset->cache_rfd);
508                 rfd = rset_open(rset, RSETF_READ);
509             }
510             while (num_i < num && rset_read(rfd, &key, 0))
511             {
512                 zint this_sys = key.mem[sysno_mem_index];
513                 if (this_sys != psysno)
514                 {
515                     psysno = this_sys;
516                     if (sort_info)
517                     {
518                         /* determine we alreay have this in our set */
519                         for (i = sort_info->num_entries; --i >= 0; )
520                             if (psysno == sort_info->entries[i]->sysno)
521                                 break;
522                         if (i >= 0)
523                             continue;
524                     }
525                     position++;
526                     assert(num_i < num);
527                     if (position == positions[num_i])
528                     {
529                         sr[num_i].sysno = psysno;
530                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
531                         sr[num_i].score = -1;
532                         num_i++;
533                     }
534                 }
535             }
536             sset->cache_position = position;
537             sset->cache_psysno = psysno;
538             sset->cache_rfd = rfd;
539         }
540     }
541     return sr;
542 }
543
544 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545                                 int num)
546 {
547     assert(zh); /* compiler shut up about unused arg */
548     xfree(records);
549 }
550
551 struct sortKeyInfo {
552     int relation;
553     int *ord; /* array of ord for each database searched */
554     int *numerical; /* array of ord for each database searched */
555     const char *index_type;
556 };
557
558 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
559                          int database_no,
560                          struct sortKeyInfo *criteria, int num_criteria,
561                          zint sysno,
562                          char *cmp_buf[], char *tmp_cmp_buf[])
563 {
564     struct zset_sort_entry *new_entry = NULL;
565     struct zset_sort_info *sort_info = sset->sort_info;
566     int i, j;
567     WRBUF w = wrbuf_alloc();
568
569     zebra_sort_sysno(zh->reg->sort_index, sysno);
570     for (i = 0; i<num_criteria; i++)
571     {
572         char *this_entry_buf = tmp_cmp_buf[i];
573         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
574         
575         if (criteria[i].ord[database_no] != -1)
576         {
577             yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
578                     criteria[i].ord[database_no]);
579             zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
580             wrbuf_rewind(w);
581             if (zebra_sort_read(zh->reg->sort_index, 0, w))
582             {
583                 /* consider each sort entry and take lowest/highest one
584                    of the one as sorting key depending on whether sort is
585                    ascending/descending */
586                 int off = 0;
587                 while (off != wrbuf_len(w))
588                 {
589                     size_t l = strlen(wrbuf_buf(w)+off);
590                     assert(off < wrbuf_len(w));
591
592                     if (l >= SORT_IDX_ENTRYSIZE)
593                         l = SORT_IDX_ENTRYSIZE-1;
594                     if ( (off == 0)
595                          || (criteria[i].relation == 'A'
596                              && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
597                          || (criteria[i].relation == 'D'
598                              && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
599                         )
600                     {
601                         memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
602                         this_entry_buf[l] = '\0';
603                     }
604                     off += 1 + strlen(wrbuf_buf(w)+off);
605                 }
606             }
607         }
608         else
609         {
610             yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
611         }
612     }
613     wrbuf_destroy(w);
614     i = sort_info->num_entries;
615     while (--i >= 0)
616     {
617         int rel = 0;
618         for (j = 0; j<num_criteria; j++)
619         {
620             char *this_entry_buf = tmp_cmp_buf[j];
621             char *other_entry_buf = 
622                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
623             if (criteria[j].numerical[database_no])
624             {
625                 char this_entry_org[1024];
626                 char other_entry_org[1024];
627                 double diff;
628                 const char *index_type = criteria[j].index_type;
629                 zebra_term_untrans(zh, index_type, this_entry_org,
630                                    this_entry_buf);
631                 zebra_term_untrans(zh, index_type, other_entry_org,
632                                    other_entry_buf);
633                 diff = atof(this_entry_org) - atof(other_entry_org);
634                 
635                 if (diff > 0.0)
636                     rel = 1;
637                 else if (diff < 0.0)
638                     rel = -1;
639                 else
640                     rel = 0;
641             }
642             else
643             {
644                 rel = memcmp(this_entry_buf, other_entry_buf,
645                              SORT_IDX_ENTRYSIZE);
646             }
647             /* when the compare is equal, continue to next criteria, 
648                else break out */
649             if (rel)
650                 break;
651         }       
652         if (!rel)
653             break;
654         if (criteria[j].relation == 'A')
655         {
656             if (rel > 0)
657                 break;
658         }
659         else if (criteria[j].relation == 'D')
660         {
661             if (rel < 0)
662                 break;
663         }
664     }
665     ++i;
666     yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
667     j = sort_info->max_entries;
668     if (i == j){
669         yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
670         return;
671     }
672
673     if (sort_info->num_entries == j)
674         --j;
675     else
676         j = (sort_info->num_entries)++;
677     new_entry = sort_info->entries[j];
678     /* move up all higher entries (to make room) */
679     while (j != i)
680     {
681         int k;
682         for (k = 0; k<num_criteria; k++)
683         {
684             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
685             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
686             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
687         }
688         sort_info->entries[j] = sort_info->entries[j-1];
689         --j;
690     }
691     /* and insert the new entry at the correct place */
692     sort_info->entries[i] = new_entry;
693     assert(new_entry);
694     /* and add this to the compare buffer */
695     for (i = 0; i<num_criteria; i++)
696     {
697         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
698         char *this_entry_buf = tmp_cmp_buf[i];
699         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
700     }
701     new_entry->sysno = sysno;
702     new_entry->score = -1;
703 }
704
705 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
706                          zint sysno, int score, int relation)
707 {
708     struct zset_sort_entry *new_entry = NULL;
709     int i, j;
710     assert(zh); /* compiler shut up about unused arg */
711
712     i = sort_info->num_entries;
713     while (--i >= 0)
714     {
715         int rel = 0;
716
717         rel = score - sort_info->entries[i]->score;
718
719         if (relation == 'D')
720         {
721             if (rel >= 0)
722                 break;
723         }
724         else if (relation == 'A')
725         {
726             if (rel <= 0)
727                 break;
728         }
729     }
730     ++i;
731     j = sort_info->max_entries;
732     if (i == j)
733         return;
734
735     if (sort_info->num_entries == j)
736         --j;
737     else
738         j = (sort_info->num_entries)++;
739     
740     new_entry = sort_info->entries[j];
741     while (j != i)
742     {
743         sort_info->entries[j] = sort_info->entries[j-1];
744         --j;
745     }
746     sort_info->entries[i] = new_entry;
747     assert(new_entry);
748     new_entry->sysno = sysno;
749     new_entry->score = score;
750 }
751
752 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
753 {
754     Z_RPNQuery *dst = 0;
755     ODR encode = odr_createmem(ODR_ENCODE);
756     ODR decode = odr_createmem(ODR_DECODE);
757
758     if (z_RPNQuery(encode, &src, 0, 0))
759     {
760         int len;
761         char *buf = odr_getbuf(encode, &len, 0);
762
763         if (buf)
764         {
765             odr_setbuf(decode, buf, len, 0);
766             z_RPNQuery(decode, &dst, 0, 0);
767         }
768     }
769     nmem_transfer(nmem, decode->mem);
770     odr_destroy(encode);
771     odr_destroy(decode);
772     return dst;
773 }
774
775 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
776 {
777     Z_SortKeySpecList *dst = 0;
778     ODR encode = odr_createmem(ODR_ENCODE);
779     ODR decode = odr_createmem(ODR_DECODE);
780
781     if (z_SortKeySpecList(encode, &src, 0, 0))
782     {
783         int len;
784         char *buf = odr_getbuf(encode, &len, 0);
785
786         if (buf)
787         {
788             odr_setbuf(decode, buf, len, 0);
789             z_SortKeySpecList(decode, &dst, 0, 0);
790         }
791     }
792     nmem_transfer(nmem, decode->mem);
793     odr_destroy(encode);
794     odr_destroy(decode);
795     return dst;
796 }
797
798 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
799                         ZebraSet rset)
800 {
801     ZebraSet nset;
802     int i;
803
804     nset = resultSetAdd(zh, setname, 1);
805     if (!nset)
806         return 0;
807
808     nset->nmem = nmem_create();
809
810     nset->num_bases = rset->num_bases;
811     nset->basenames = 
812         nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
813     for (i = 0; i<rset->num_bases; i++)
814         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
815
816     if (rset->rset)
817         nset->rset = rset_dup(rset->rset);
818     if (rset->rpn)
819         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
820     return nset;
821 }
822
823 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
824                         int num_input_setnames, const char **input_setnames,
825                         const char *output_setname,
826                         Z_SortKeySpecList *sort_sequence, int *sort_status)
827 {
828     ZebraSet sset;
829     RSET rset;
830
831     if (num_input_setnames == 0)
832     {
833         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
834         return ZEBRA_FAIL;
835     }
836     if (num_input_setnames > 1)
837     {
838         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
839         return ZEBRA_FAIL;
840     }
841     if (!log_level_set)
842         loglevels();
843     yaz_log(log_level_sort, "result set sort input=%s output=%s",
844             *input_setnames, output_setname);
845     sset = resultSetGet(zh, input_setnames[0]);
846     if (!sset)
847     {
848         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
849                        input_setnames[0]);
850         return ZEBRA_FAIL;
851     }
852     if (!(rset = sset->rset))
853     {
854         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
855                        input_setnames[0]);
856         return ZEBRA_FAIL;
857     }
858     if (strcmp(output_setname, input_setnames[0]))
859         sset = resultSetClone(zh, output_setname, sset);
860     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
861     return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
862                                sort_status);
863 }
864
865 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
866                               ZebraSet sset, RSET rset,
867                               Z_SortKeySpecList *sort_sequence,
868                               int *sort_status)
869 {
870     int i;
871     int ib;
872     int n = 0;
873     zint kno = 0;
874     zint psysno = 0;
875     struct it_key key;
876     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
877     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
878     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
879     int num_criteria;
880     RSFD rfd;
881     TERMID termid;
882     TERMID *terms;
883     int numTerms = 0;
884     size_t sysno_mem_index = 0;
885     
886     int numbases = zh->num_basenames;
887     yaz_log(log_level_sort, "searching %d databases",numbases);
888
889     if (zh->m_staticrank)
890         sysno_mem_index = 1;
891
892     assert(nmem); /* compiler shut up about unused param */
893     sset->sort_info->num_entries = 0;
894
895     rset_getterms(rset, 0, 0, &n);
896     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
897     rset_getterms(rset, terms, n, &numTerms);
898
899     sset->hits = 0;
900     num_criteria = sort_sequence->num_specs;
901     if (num_criteria > ZSET_SORT_MAX_LEVEL)
902         num_criteria = ZSET_SORT_MAX_LEVEL;
903     /* set up the search criteria */
904     for (i = 0; i < num_criteria; i++)
905     {
906         Z_SortKeySpec *sks = sort_sequence->specs[i];
907         Z_SortKey *sk;
908         ZEBRA_RES res;
909         
910         sort_criteria[i].ord = (int *)
911             nmem_malloc(nmem, sizeof(int)*numbases);
912         sort_criteria[i].numerical = (int *)
913             nmem_malloc(nmem, sizeof(int)*numbases);
914         
915         /* initialize ord and numerical for each database */
916         for (ib = 0; ib < numbases; ib++)
917         {
918             sort_criteria[i].ord[ib] = -1;
919             sort_criteria[i].numerical[ib] = 0;
920         }
921
922         if (sks->which == Z_SortKeySpec_missingValueData)
923         {
924             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
925             return ZEBRA_FAIL;
926         }
927         if (*sks->sortRelation == Z_SortKeySpec_ascending)
928             sort_criteria[i].relation = 'A';
929         else if (*sks->sortRelation == Z_SortKeySpec_descending)
930             sort_criteria[i].relation = 'D';
931         else
932         {
933             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
934             return ZEBRA_FAIL;
935         }
936         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
937         {
938             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
939             return ZEBRA_FAIL;
940         }
941         else if (sks->sortElement->which != Z_SortElement_generic)
942         {
943             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
944             return ZEBRA_FAIL;
945         }       
946         sk = sks->sortElement->u.generic;
947         switch (sk->which)
948         {
949         case Z_SortKey_sortField:
950             yaz_log(log_level_sort, "key %d is of type sortField", i+1);
951             for (ib = 0; ib < numbases; ib++)
952             {
953                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
954                 sort_criteria[i].numerical[ib] = 0;
955                 sort_criteria[i].ord[ib] = 
956                     zebraExplain_lookup_attr_str(zh->reg->zei,
957                                                  zinfo_index_category_sort,
958                                                  0, sk->u.sortField);
959                 if (sks->which != Z_SortKeySpec_null
960                     && sort_criteria[i].ord[ib] == -1)
961                 {
962                     zebra_setError(zh,
963                                    YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
964                     return ZEBRA_FAIL;
965                 }
966             }
967             break;
968         case Z_SortKey_elementSpec:
969             yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
970             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
971             return ZEBRA_FAIL;
972         case Z_SortKey_sortAttributes:
973             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
974             /* for every database we searched, get the sort index file
975                id (ord) and its numerical indication and store them in
976                the sort_criteria */
977             for (ib = 0; ib < numbases; ib++)
978             {
979                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
980                 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
981                                          &sort_criteria[i].ord[ib],
982                                          &sort_criteria[i].numerical[ib]);
983             }
984             
985             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
986                 return ZEBRA_FAIL;
987             break;
988         }
989         /* right now we look up the index type based on the first database
990            if the index_type's can differ between the indexes of different
991            databases (which i guess they can?) then we have to store the
992            index types for each database, just like the ord and numerical */
993         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
994                                     &sort_criteria[i].index_type,
995                                     0, 0))
996         {
997             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
998             return ZEBRA_FAIL;
999         }
1000     }
1001     /* allocate space for each cmpare buf + one extra for tmp comparison */
1002     /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1003        all other result entries to compare against. This is slowly filled when records are processed.
1004        tmp_cmp_buf is an array with a value of the current record for each criteria
1005     */
1006     for (i = 0; i<num_criteria; i++)
1007     {
1008         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1009                              * SORT_IDX_ENTRYSIZE);
1010         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1011     }
1012     rfd = rset_open(rset, RSETF_READ);
1013     while (rset_read(rfd, &key, &termid))
1014     {
1015         zint this_sys = key.mem[sysno_mem_index];
1016         if (log_level_searchhits)
1017             key_logdump_txt(log_level_searchhits, &key, termid->name);
1018         kno++;
1019         if (this_sys != psysno)
1020         {
1021             int database_no = 0;
1022             if ((sset->hits & 255) == 0 && zh->break_handler_func)
1023             {
1024                 if (zh->break_handler_func(zh->break_handler_data))
1025                 {
1026                     rset_set_hits_limit(rset, 0);
1027                     break;
1028                 }
1029             }
1030             (sset->hits)++;
1031             psysno = this_sys;
1032
1033             /* determine database from the term, but only bother if more than
1034                one database is in use*/
1035             if (numbases > 1 && termid->ol)
1036             {
1037                 const char *this_db = 0;
1038                 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord,  0, &this_db, 0)
1039                     == 0 && this_db)
1040                 {
1041                     for (ib = 0; ib < numbases; ib++)
1042                         if (!strcmp(this_db, zh->basenames[ib]))
1043                             database_no = ib;
1044                 }
1045             }
1046 #if 0
1047             yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1048                 database_no);
1049             ord_list_print(termid->ol);
1050 #endif
1051             resultSetInsertSort(zh, sset, database_no,
1052                                 sort_criteria, num_criteria, psysno, cmp_buf,
1053                                 tmp_cmp_buf);
1054         }
1055     }
1056     rset_close(rfd);
1057
1058     /* free the compare buffers */
1059     for (i = 0; i<num_criteria; i++)
1060     {
1061         xfree(cmp_buf[i]);
1062         xfree(tmp_cmp_buf[i]);
1063     }
1064
1065     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1066             kno, sset->hits);   
1067     for (i = 0; i < numTerms; i++)
1068         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1069                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1070     *sort_status = Z_SortResponse_success;
1071     return ZEBRA_OK;
1072 }
1073
1074 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1075 {
1076     ZebraSet s;
1077
1078     if ((s = resultSetGet(zh, resultSetId)))
1079         return s->rset;
1080     return NULL;
1081 }
1082
1083 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1084                         RSET rset, NMEM nmem)
1085 {
1086     struct it_key key;
1087     TERMID termid;
1088     TERMID *terms;
1089     zint kno = 0;
1090     int numTerms = 0;
1091     int n = 0;
1092     int i;
1093     ZebraRankClass rank_class;
1094     struct zset_sort_info *sort_info;
1095     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1096     size_t sysno_mem_index = 0;
1097
1098     if (zh->m_staticrank)
1099         sysno_mem_index = 1;
1100
1101     if (!log_level_set)
1102         loglevels();
1103     sort_info = zebraSet->sort_info;
1104     sort_info->num_entries = 0;
1105     zebraSet->hits = 0;
1106     zebraSet->estimated_hit_count = 0;
1107     rset_getterms(rset, 0, 0, &n);
1108     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1109     rset_getterms(rset, terms, n, &numTerms);
1110
1111     rank_class = zebraRankLookup(zh, rank_handler_name);
1112     if (!rank_class)
1113     {
1114         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1115         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1116         return ZEBRA_FAIL;
1117     }
1118     else
1119     {
1120         RSFD rfd = rset_open(rset, RSETF_READ);
1121         struct rank_control *rc = rank_class->control;
1122         int score;
1123         zint count = 0;
1124         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1125                                      nmem, terms, numTerms);
1126         zint psysno = 0;  /* previous doc id / sys no */
1127         zint pstaticrank = 0; /* previous static rank */
1128         int stop_flag = 0;
1129         while (rset_read(rfd, &key, &termid))
1130         {
1131             zint this_sys = key.mem[sysno_mem_index];
1132
1133             zint seqno = key.mem[key.len-1];
1134             kno++;
1135             if (log_level_searchhits)
1136                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1137             if (this_sys != psysno) 
1138             {   /* new record .. */
1139                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1140                 {
1141                     if (zh->break_handler_func(zh->break_handler_data))
1142                     {
1143                         yaz_log(YLOG_LOG, "Aborted search");
1144                         stop_flag = 1;
1145                     }
1146                 }
1147                 if (rfd->counted_items > rset->hits_limit)
1148                     stop_flag = 1;
1149                 if (stop_flag)
1150                 {
1151                     zebraSet->estimated_hit_count = 1;
1152                     break;
1153                 }
1154                 if (psysno)
1155                 {   /* only if we did have a previous record */
1156                     score = (*rc->calc)(handle, psysno, pstaticrank,
1157                                         &stop_flag);
1158                     /* insert the hit. A=Ascending */
1159                     resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1160                     count++;
1161                 }
1162                 psysno = this_sys;
1163                 if (zh->m_staticrank)
1164                     pstaticrank = key.mem[0];
1165             }
1166             (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1167         }
1168         /* no more items */
1169         if (psysno)
1170         {   /* we had - at least - one record */
1171             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1172             /* insert the hit. A=Ascending */
1173             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1174             count++;
1175         }
1176         (*rc->end)(zh->reg, handle);
1177         rset_close(rfd);
1178     }
1179     zebraSet->hits = rset->hits_count;
1180
1181     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1182             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1183     for (i = 0; i < numTerms; i++)
1184     {
1185         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1186                 ZINT_FORMAT,
1187                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1188     }
1189     return ZEBRA_OK;
1190 }
1191
1192 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1193 {
1194     ZebraRankClass p = zh->reg->rank_classes;
1195     while (p && strcmp(p->control->name, name))
1196         p = p->next;
1197     if (p && !p->init_flag)
1198     {
1199         if (p->control->create)
1200             p->class_handle = (*p->control->create)(zh);
1201         p->init_flag = 1;
1202     }
1203     return p;
1204 }
1205
1206 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1207 {
1208     ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1209     p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1210     memcpy(p->control, ctrl, sizeof(*p->control));
1211     p->control->name = xstrdup(ctrl->name);
1212     p->init_flag = 0;
1213     p->next = reg->rank_classes;
1214     reg->rank_classes = p;
1215 }
1216
1217 void zebraRankDestroy(struct zebra_register *reg)
1218 {
1219     ZebraRankClass p = reg->rank_classes;
1220     while (p)
1221     {
1222         ZebraRankClass p_next = p->next;
1223         if (p->init_flag && p->control->destroy)
1224             (*p->control->destroy)(reg, p->class_handle);
1225         xfree(p->control->name);
1226         xfree(p->control);
1227         xfree(p);
1228         p = p_next;
1229     }
1230     reg->rank_classes = NULL;
1231 }
1232
1233 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1234                                  zint *hits_array, int *approx_array)
1235 {
1236     int no = 0;
1237     int i;
1238     for (i = 0; i<rset->no_children; i++)
1239         no += trav_rset_for_termids(rset->children[i],
1240                                     (termid_array ? termid_array + no : 0),
1241                                     (hits_array ? hits_array + no : 0),
1242                                     (approx_array ? approx_array + no : 0));
1243     if (rset->term)
1244     {
1245         if (termid_array)
1246             termid_array[no] = rset->term;
1247         if (hits_array)
1248             hits_array[no] = rset->hits_count;
1249         if (approx_array)
1250             approx_array[no] = rset->hits_approx;
1251 #if 0
1252         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1253                 " count=" ZINT_FORMAT,
1254                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1255 #endif
1256         no++;
1257     }
1258     return no;
1259 }
1260
1261 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1262                                    int *num_terms)
1263 {
1264     ZebraSet sset = resultSetGet(zh, setname);
1265     *num_terms = 0;
1266     if (sset)
1267     {
1268         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1269         return ZEBRA_OK;
1270     }
1271     return ZEBRA_FAIL;
1272 }
1273
1274 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1275                                      int no, zint *count, int *approx,
1276                                      char *termbuf, size_t *termlen,
1277                                      const char **term_ref_id)
1278 {
1279     ZebraSet sset = resultSetGet(zh, setname);
1280     if (sset)
1281     {
1282         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1283         if (no >= 0 && no < num_terms)
1284         {
1285             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1286             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1287             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1288             
1289             trav_rset_for_termids(sset->rset, term_array,
1290                                   hits_array, approx_array);
1291
1292             if (count)
1293                 *count = hits_array[no];
1294             if (approx)
1295                 *approx = approx_array[no];
1296             if (termbuf)
1297             {
1298                 char *inbuf = term_array[no]->name;
1299                 size_t inleft = strlen(inbuf);
1300                 size_t outleft = *termlen - 1;
1301
1302                 if (zh->iconv_from_utf8 != 0)
1303                 {
1304                     char *outbuf = termbuf;
1305                     size_t ret;
1306                     
1307                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1308                                     &outbuf, &outleft);
1309                     if (ret == (size_t)(-1))
1310                         *termlen = 0;
1311                     else
1312                     {
1313                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1314                                   &outbuf, &outleft);
1315                         *termlen = outbuf - termbuf;
1316                     }
1317                 }
1318                 else
1319                 {
1320                     if (inleft > outleft)
1321                         inleft = outleft;
1322                     *termlen = inleft;
1323                     memcpy(termbuf, inbuf, *termlen);
1324                 }
1325                 termbuf[*termlen] = '\0';
1326             }
1327             if (term_ref_id)
1328                 *term_ref_id = term_array[no]->ref_id;
1329
1330             xfree(term_array);
1331             xfree(hits_array);
1332             xfree(approx_array);
1333             return ZEBRA_OK;
1334         }
1335     }
1336     return ZEBRA_FAIL;
1337 }
1338
1339 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1340                                     zint sysno, zebra_snippets *snippets)
1341 {
1342     ZebraSet sset = resultSetGet(zh, setname);
1343     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1344             setname, sysno);
1345     if (!sset)
1346         return ZEBRA_FAIL;
1347     else
1348     {
1349         struct rset_key_control *kc = zebra_key_control_create(zh);
1350         NMEM nmem = nmem_create();
1351         struct it_key key;
1352         RSET rsets[2], rset_comb;
1353         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1354                                           res_get(zh->res, "setTmpDir"),0 );
1355         
1356         TERMID termid;
1357         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1358         
1359         key.mem[0] = sysno;
1360         key.mem[1] = 0;
1361         key.mem[2] = 0;
1362         key.mem[3] = 0;
1363         key.len = 2;
1364         rset_write(rsfd, &key);
1365         rset_close(rsfd);
1366
1367         rsets[0] = rset_temp;
1368         rsets[1] = rset_dup(sset->rset);
1369         
1370         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1371
1372         rsfd = rset_open(rset_comb, RSETF_READ);
1373
1374         while (rset_read(rsfd, &key, &termid))
1375         {
1376             if (termid)
1377             {
1378                 struct ord_list *ol;
1379                 for (ol = termid->ol; ol; ol = ol->next)
1380                 {
1381                     zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1382                                           ol->ord, termid->name);
1383                 }
1384             }
1385         }
1386         rset_close(rsfd);
1387         
1388         rset_delete(rset_comb);
1389         nmem_destroy(nmem);
1390         kc->dec(kc);
1391     }
1392     return ZEBRA_OK;
1393 }
1394
1395 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, 
1396                                       const char **basenames, int num_bases,
1397                                       zint recid,
1398                                       zint *sysnos, int *no_sysnos)
1399 {
1400     ZEBRA_RES res = ZEBRA_OK;
1401     int sysnos_offset = 0;
1402     int i;
1403     
1404     if (!zh->reg->isamb || !zh->m_segment_indexing)
1405     {
1406         if (sysnos_offset < *no_sysnos)
1407             *sysnos = recid;
1408         sysnos_offset++;
1409     }
1410     else
1411     {
1412         for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1413         {
1414             const char *database = basenames[i];
1415             if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1416             {
1417                 const char *index_type = "w";
1418                 const char *use_string = "_ALLRECORDS";
1419                 int ord;
1420                 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1421                 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1422                                                    index_type, use_string);
1423                 if (ord != -1)
1424                 {
1425                     char ord_buf[32];
1426                     int ord_len = key_SU_encode(ord, ord_buf);
1427                     char *info;
1428                 
1429                     ord_buf[ord_len] = '\0';
1430                 
1431                     info = dict_lookup(zh->reg->dict, ord_buf);
1432                     if (info)
1433                     {
1434                         if (*info != sizeof(ISAM_P))
1435                         {
1436                             res = ZEBRA_FAIL;
1437                         }
1438                         else
1439                         {
1440                             ISAM_P isam_p;
1441                             ISAMB_PP pt;
1442                             struct it_key key_until, key_found;
1443                             int i = 0;
1444                             int r;
1445                         
1446                             memcpy(&isam_p, info+1, sizeof(ISAM_P));
1447                         
1448                             pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1449                             if (!pt)
1450                                 res = ZEBRA_FAIL;
1451                             else
1452                             {
1453                                 key_until.mem[i++] = recid;
1454                                 key_until.mem[i++] = 0;  /* section_id */
1455                                 if (zh->m_segment_indexing)
1456                                     key_until.mem[i++] = 0; /* segment */
1457                                 key_until.mem[i++] = 0;
1458                                 key_until.len = i;
1459                             
1460                                 r = isamb_pp_forward(pt, &key_found, &key_until);
1461                                 while (r && key_found.mem[0] == recid)
1462                                 {
1463                                     if (sysnos_offset < *no_sysnos)
1464                                         sysnos[sysnos_offset++] = 
1465                                             key_found.mem[key_found.len-1];
1466                                     r = isamb_pp_read(pt, &key_found);
1467                                 }
1468                                 isamb_pp_close(pt);
1469                             }
1470                         }
1471                     }
1472                 }
1473             }
1474         }
1475     }
1476     *no_sysnos = sysnos_offset;
1477     return res;
1478 }
1479
1480 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, 
1481                                       const char *setname,
1482                                       zint recid,
1483                                       zint *sysnos, int *no_sysnos)
1484 {
1485     const char **basenames;
1486     int num_bases;
1487     ZEBRA_RES res;
1488
1489     res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1490     if (res != ZEBRA_OK)
1491         return ZEBRA_FAIL;
1492
1493     return zebra_recid_to_sysno(zh, basenames, num_bases,
1494                                 recid, sysnos, no_sysnos);
1495 }
1496
1497 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1498                      zint approx_limit)
1499 {
1500     zint psysno = 0;
1501     struct it_key key;
1502     RSFD rfd;
1503
1504     yaz_log(YLOG_DEBUG, "count_set");
1505
1506     rset->hits_limit = approx_limit;
1507
1508     *count = 0;
1509     rfd = rset_open(rset, RSETF_READ);
1510     while (rset_read(rfd, &key,0 /* never mind terms */))
1511     {
1512         if (key.mem[0] != psysno)
1513         {
1514             psysno = key.mem[0];
1515             if (rfd->counted_items >= rset->hits_limit)
1516                 break;
1517         }
1518     }
1519     rset_close(rfd);
1520     *count = rset->hits_count;
1521 }
1522                    
1523
1524 /*
1525  * Local variables:
1526  * c-basic-offset: 4
1527  * c-file-style: "Stroustrup"
1528  * indent-tabs-mode: nil
1529  * End:
1530  * vim: shiftwidth=4 tabstop=8 expandtab
1531  */
1532