1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2010 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
119 sset->num_bases, sset->basenames,
126 for (i = 0; sort_sequence->specs[i]; i++)
128 sort_sequence->num_specs = i;
129 rset_set_hits_limit(rset, sset->approx_limit);
133 res = resultSetRank(zh, sset, rset, rset_nmem);
137 res = resultSetSortSingle(zh, nmem, sset, rset,
138 sort_sequence, &sort_status);
145 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
146 int num_bases, char **basenames,
148 zint *hits, int *estimated_hit_count)
155 *estimated_hit_count = 0;
157 zebraSet = resultSetAdd(zh, setname, 1);
160 zebraSet->locked = 1;
163 zebraSet->rset_nmem = nmem_create();
165 zebraSet->num_bases = num_bases;
166 zebraSet->basenames =
167 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
168 for (i = 0; i<num_bases; i++)
169 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
171 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
173 *hits = zebraSet->hits;
174 if (zebraSet->estimated_hit_count)
175 *estimated_hit_count = 1;
179 zebraSet->locked = 0;
185 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
186 const char *db, const char *index_name,
189 assert(zh); /* compiler shut up */
191 s->nmem = nmem_create();
192 if (!s->term_entries)
195 s->term_entries_max = 1000;
197 nmem_malloc(s->nmem, s->term_entries_max *
198 sizeof(*s->term_entries));
199 for (i = 0; i < s->term_entries_max; i++)
200 s->term_entries[i].term = 0;
202 if (s->hits < s->term_entries_max)
204 s->term_entries[s->hits].reg_type = reg_type;
205 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
206 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
207 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
212 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
217 for (s = zh->sets; s; s = s->next)
218 if (!strcmp(s->name, name))
225 yaz_log(log_level_resultsets, "updating result set %s", name);
226 if (!ov || s->locked)
231 rset_close(s->cache_rfd);
232 rset_delete(s->rset);
235 nmem_destroy(s->rset_nmem);
237 nmem_destroy(s->nmem);
241 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
243 yaz_log(log_level_resultsets, "adding result set %s", name);
244 s = (ZebraSet) xmalloc(sizeof(*s));
247 s->name = xstrdup(name);
249 s->sort_info = (struct zset_sort_info *)
250 xmalloc(sizeof(*s->sort_info));
251 s->sort_info->max_entries = atoi(sort_max_str);
252 if (s->sort_info->max_entries < 2)
253 s->sort_info->max_entries = 2;
255 s->sort_info->entries = (struct zset_sort_entry **)
256 xmalloc(sizeof(*s->sort_info->entries) *
257 s->sort_info->max_entries);
258 s->sort_info->all_entries = (struct zset_sort_entry *)
259 xmalloc(sizeof(*s->sort_info->all_entries) *
260 s->sort_info->max_entries);
261 for (i = 0; i < s->sort_info->max_entries; i++)
262 s->sort_info->entries[i] = s->sort_info->all_entries + i;
272 s->cache_position = 0;
274 s->approx_limit = zh->approx_limit;
275 s->estimated_hit_count = 0;
279 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
283 for (s = zh->sets; s; s = s->next)
284 if (!strcmp(s->name, name))
286 if (!s->term_entries && !s->rset && s->rpn)
288 NMEM nmem = nmem_create();
289 yaz_log(log_level_resultsets, "research %s", name);
291 s->rset_nmem = nmem_create();
292 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
293 if (s->rset && s->sortSpec)
296 yaz_log(log_level_resultsets, "resort %s", name);
297 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
307 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
308 const char ***basenames, int *num_bases)
310 ZebraSet sset = resultSetGet(zh, setname);
313 *basenames = sset->basenames;
314 *num_bases = sset->num_bases;
319 void resultSetInvalidate(ZebraHandle zh)
321 ZebraSet s = zh->sets;
323 yaz_log(log_level_resultsets, "invalidating result sets");
324 for (; s; s = s->next)
329 rset_close(s->cache_rfd);
330 rset_delete(s->rset);
334 s->cache_position = 0;
336 nmem_destroy(s->rset_nmem);
341 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
343 ZebraSet * ss = &zh->sets;
347 for (i = 0; i<num; i++)
348 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
355 for (i = 0; i<num; i++)
356 if (!strcmp(s->name, names[i]))
359 statuses[i] = Z_DeleteStatus_success;
368 xfree(s->sort_info->all_entries);
369 xfree(s->sort_info->entries);
373 nmem_destroy(s->nmem);
377 rset_close(s->cache_rfd);
378 rset_delete(s->rset);
381 nmem_destroy(s->rset_nmem);
390 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
395 zint *pos = pos_small;
399 if (num > 10000 || num <= 0)
403 pos = xmalloc(sizeof(*pos) * num);
405 for (i = 0; i<num; i++)
408 mr = zebra_meta_records_create(zh, name, num, pos);
415 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
416 int num, zint *positions)
419 ZebraMetaRecord *sr = 0;
422 struct zset_sort_info *sort_info;
423 size_t sysno_mem_index = 0;
425 if (zh->m_staticrank)
430 if (!(sset = resultSetGet(zh, name)))
432 if (!(rset = sset->rset))
434 if (!sset->term_entries)
436 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
437 for (i = 0; i<num; i++)
444 if (positions[i] <= sset->term_entries_max)
446 sr[i].term = sset->term_entries[positions[i]-1].term;
447 sr[i].db = sset->term_entries[positions[i]-1].db;
453 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
454 for (i = 0; i<num; i++)
461 sort_info = sset->sort_info;
466 for (i = 0; i<num; i++)
468 position = positions[i];
469 if (position > 0 && position <= sort_info->num_entries)
471 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
472 " (sorted)", position);
473 sr[i].sysno = sort_info->entries[position-1]->sysno;
474 sr[i].score = sort_info->entries[position-1]->score;
478 /* did we really get all entries using sort ? */
479 for (i = 0; i<num; i++)
484 if (i < num) /* nope, get the rest, unsorted - sorry */
493 position = sort_info->num_entries;
494 while (num_i < num && positions[num_i] <= position)
497 if (sset->cache_rfd &&
498 num_i < num && positions[num_i] > sset->cache_position)
500 position = sset->cache_position;
501 rfd = sset->cache_rfd;
502 psysno = sset->cache_psysno;
507 rset_close(sset->cache_rfd);
508 rfd = rset_open(rset, RSETF_READ);
510 while (num_i < num && rset_read(rfd, &key, 0))
512 zint this_sys = key.mem[sysno_mem_index];
513 if (this_sys != psysno)
518 /* determine we alreay have this in our set */
519 for (i = sort_info->num_entries; --i >= 0; )
520 if (psysno == sort_info->entries[i]->sysno)
527 if (position == positions[num_i])
529 sr[num_i].sysno = psysno;
530 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
531 sr[num_i].score = -1;
536 sset->cache_position = position;
537 sset->cache_psysno = psysno;
538 sset->cache_rfd = rfd;
544 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
547 assert(zh); /* compiler shut up about unused arg */
553 int *ord; /* array of ord for each database searched */
554 int *numerical; /* array of ord for each database searched */
555 const char *index_type;
558 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
560 struct sortKeyInfo *criteria, int num_criteria,
562 char *cmp_buf[], char *tmp_cmp_buf[])
564 struct zset_sort_entry *new_entry = NULL;
565 struct zset_sort_info *sort_info = sset->sort_info;
567 WRBUF w = wrbuf_alloc();
569 zebra_sort_sysno(zh->reg->sort_index, sysno);
570 for (i = 0; i<num_criteria; i++)
572 char *this_entry_buf = tmp_cmp_buf[i];
573 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
575 if (criteria[i].ord[database_no] != -1)
577 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
578 criteria[i].ord[database_no]);
579 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
581 if (zebra_sort_read(zh->reg->sort_index, 0, w))
583 /* consider each sort entry and take lowest/highest one
584 of the one as sorting key depending on whether sort is
585 ascending/descending */
587 while (off != wrbuf_len(w))
589 size_t l = strlen(wrbuf_buf(w)+off);
590 assert(off < wrbuf_len(w));
592 if (l >= SORT_IDX_ENTRYSIZE)
593 l = SORT_IDX_ENTRYSIZE-1;
595 || (criteria[i].relation == 'A'
596 && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
597 || (criteria[i].relation == 'D'
598 && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
601 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
602 this_entry_buf[l] = '\0';
604 off += 1 + strlen(wrbuf_buf(w)+off);
610 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
614 i = sort_info->num_entries;
618 for (j = 0; j<num_criteria; j++)
620 char *this_entry_buf = tmp_cmp_buf[j];
621 char *other_entry_buf =
622 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
623 if (criteria[j].numerical[database_no])
625 char this_entry_org[1024];
626 char other_entry_org[1024];
628 const char *index_type = criteria[j].index_type;
629 zebra_term_untrans(zh, index_type, this_entry_org,
631 zebra_term_untrans(zh, index_type, other_entry_org,
633 diff = atof(this_entry_org) - atof(other_entry_org);
644 rel = memcmp(this_entry_buf, other_entry_buf,
647 /* when the compare is equal, continue to next criteria,
654 if (criteria[j].relation == 'A')
659 else if (criteria[j].relation == 'D')
666 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
667 j = sort_info->max_entries;
669 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
673 if (sort_info->num_entries == j)
676 j = (sort_info->num_entries)++;
677 new_entry = sort_info->entries[j];
678 /* move up all higher entries (to make room) */
682 for (k = 0; k<num_criteria; k++)
684 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
685 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
686 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
688 sort_info->entries[j] = sort_info->entries[j-1];
691 /* and insert the new entry at the correct place */
692 sort_info->entries[i] = new_entry;
694 /* and add this to the compare buffer */
695 for (i = 0; i<num_criteria; i++)
697 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
698 char *this_entry_buf = tmp_cmp_buf[i];
699 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
701 new_entry->sysno = sysno;
702 new_entry->score = -1;
705 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
706 zint sysno, int score, int relation)
708 struct zset_sort_entry *new_entry = NULL;
710 assert(zh); /* compiler shut up about unused arg */
712 i = sort_info->num_entries;
717 rel = score - sort_info->entries[i]->score;
724 else if (relation == 'A')
731 j = sort_info->max_entries;
735 if (sort_info->num_entries == j)
738 j = (sort_info->num_entries)++;
740 new_entry = sort_info->entries[j];
743 sort_info->entries[j] = sort_info->entries[j-1];
746 sort_info->entries[i] = new_entry;
748 new_entry->sysno = sysno;
749 new_entry->score = score;
752 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
755 ODR encode = odr_createmem(ODR_ENCODE);
756 ODR decode = odr_createmem(ODR_DECODE);
758 if (z_RPNQuery(encode, &src, 0, 0))
761 char *buf = odr_getbuf(encode, &len, 0);
765 odr_setbuf(decode, buf, len, 0);
766 z_RPNQuery(decode, &dst, 0, 0);
769 nmem_transfer(nmem, decode->mem);
775 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
777 Z_SortKeySpecList *dst = 0;
778 ODR encode = odr_createmem(ODR_ENCODE);
779 ODR decode = odr_createmem(ODR_DECODE);
781 if (z_SortKeySpecList(encode, &src, 0, 0))
784 char *buf = odr_getbuf(encode, &len, 0);
788 odr_setbuf(decode, buf, len, 0);
789 z_SortKeySpecList(decode, &dst, 0, 0);
792 nmem_transfer(nmem, decode->mem);
798 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
804 nset = resultSetAdd(zh, setname, 1);
808 nset->nmem = nmem_create();
810 nset->num_bases = rset->num_bases;
812 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
813 for (i = 0; i<rset->num_bases; i++)
814 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
817 nset->rset = rset_dup(rset->rset);
819 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
823 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
824 int num_input_setnames, const char **input_setnames,
825 const char *output_setname,
826 Z_SortKeySpecList *sort_sequence, int *sort_status)
831 if (num_input_setnames == 0)
833 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
836 if (num_input_setnames > 1)
838 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
843 yaz_log(log_level_sort, "result set sort input=%s output=%s",
844 *input_setnames, output_setname);
845 sset = resultSetGet(zh, input_setnames[0]);
848 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
852 if (!(rset = sset->rset))
854 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
858 if (strcmp(output_setname, input_setnames[0]))
859 sset = resultSetClone(zh, output_setname, sset);
860 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
861 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
865 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
866 ZebraSet sset, RSET rset,
867 Z_SortKeySpecList *sort_sequence,
876 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
877 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
878 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
884 size_t sysno_mem_index = 0;
886 int numbases = zh->num_basenames;
887 yaz_log(log_level_sort, "searching %d databases",numbases);
889 if (zh->m_staticrank)
892 assert(nmem); /* compiler shut up about unused param */
893 sset->sort_info->num_entries = 0;
895 rset_getterms(rset, 0, 0, &n);
896 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
897 rset_getterms(rset, terms, n, &numTerms);
900 num_criteria = sort_sequence->num_specs;
901 if (num_criteria > ZSET_SORT_MAX_LEVEL)
902 num_criteria = ZSET_SORT_MAX_LEVEL;
903 /* set up the search criteria */
904 for (i = 0; i < num_criteria; i++)
906 Z_SortKeySpec *sks = sort_sequence->specs[i];
910 sort_criteria[i].ord = (int *)
911 nmem_malloc(nmem, sizeof(int)*numbases);
912 sort_criteria[i].numerical = (int *)
913 nmem_malloc(nmem, sizeof(int)*numbases);
915 /* initialize ord and numerical for each database */
916 for (ib = 0; ib < numbases; ib++)
918 sort_criteria[i].ord[ib] = -1;
919 sort_criteria[i].numerical[ib] = 0;
922 if (sks->which == Z_SortKeySpec_missingValueData)
924 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
927 if (*sks->sortRelation == Z_SortKeySpec_ascending)
928 sort_criteria[i].relation = 'A';
929 else if (*sks->sortRelation == Z_SortKeySpec_descending)
930 sort_criteria[i].relation = 'D';
933 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
936 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
938 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
941 else if (sks->sortElement->which != Z_SortElement_generic)
943 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
946 sk = sks->sortElement->u.generic;
949 case Z_SortKey_sortField:
950 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
951 for (ib = 0; ib < numbases; ib++)
953 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
954 sort_criteria[i].numerical[ib] = 0;
955 sort_criteria[i].ord[ib] =
956 zebraExplain_lookup_attr_str(zh->reg->zei,
957 zinfo_index_category_sort,
959 if (sks->which != Z_SortKeySpec_null
960 && sort_criteria[i].ord[ib] == -1)
963 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
968 case Z_SortKey_elementSpec:
969 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
970 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
972 case Z_SortKey_sortAttributes:
973 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
974 /* for every database we searched, get the sort index file
975 id (ord) and its numerical indication and store them in
977 for (ib = 0; ib < numbases; ib++)
979 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
980 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
981 &sort_criteria[i].ord[ib],
982 &sort_criteria[i].numerical[ib]);
985 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
989 /* right now we look up the index type based on the first database
990 if the index_type's can differ between the indexes of different
991 databases (which i guess they can?) then we have to store the
992 index types for each database, just like the ord and numerical */
993 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
994 &sort_criteria[i].index_type,
997 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1001 /* allocate space for each cmpare buf + one extra for tmp comparison */
1002 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1003 all other result entries to compare against. This is slowly filled when records are processed.
1004 tmp_cmp_buf is an array with a value of the current record for each criteria
1006 for (i = 0; i<num_criteria; i++)
1008 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1009 * SORT_IDX_ENTRYSIZE);
1010 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1012 rfd = rset_open(rset, RSETF_READ);
1013 while (rset_read(rfd, &key, &termid))
1015 zint this_sys = key.mem[sysno_mem_index];
1016 if (log_level_searchhits)
1017 key_logdump_txt(log_level_searchhits, &key, termid->name);
1019 if (this_sys != psysno)
1021 int database_no = 0;
1022 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1024 if (zh->break_handler_func(zh->break_handler_data))
1026 rset_set_hits_limit(rset, 0);
1033 /* determine database from the term, but only bother if more than
1034 one database is in use*/
1035 if (numbases > 1 && termid->ol)
1037 const char *this_db = 0;
1038 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1041 for (ib = 0; ib < numbases; ib++)
1042 if (!strcmp(this_db, zh->basenames[ib]))
1047 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1049 ord_list_print(termid->ol);
1051 resultSetInsertSort(zh, sset, database_no,
1052 sort_criteria, num_criteria, psysno, cmp_buf,
1058 /* free the compare buffers */
1059 for (i = 0; i<num_criteria; i++)
1062 xfree(tmp_cmp_buf[i]);
1065 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1067 for (i = 0; i < numTerms; i++)
1068 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1069 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1070 *sort_status = Z_SortResponse_success;
1074 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1078 if ((s = resultSetGet(zh, resultSetId)))
1083 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1084 RSET rset, NMEM nmem)
1093 ZebraRankClass rank_class;
1094 struct zset_sort_info *sort_info;
1095 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1096 size_t sysno_mem_index = 0;
1098 if (zh->m_staticrank)
1099 sysno_mem_index = 1;
1103 sort_info = zebraSet->sort_info;
1104 sort_info->num_entries = 0;
1106 zebraSet->estimated_hit_count = 0;
1107 rset_getterms(rset, 0, 0, &n);
1108 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1109 rset_getterms(rset, terms, n, &numTerms);
1111 rank_class = zebraRankLookup(zh, rank_handler_name);
1114 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1115 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1120 RSFD rfd = rset_open(rset, RSETF_READ);
1121 struct rank_control *rc = rank_class->control;
1124 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1125 nmem, terms, numTerms);
1126 zint psysno = 0; /* previous doc id / sys no */
1127 zint pstaticrank = 0; /* previous static rank */
1129 while (rset_read(rfd, &key, &termid))
1131 zint this_sys = key.mem[sysno_mem_index];
1133 zint seqno = key.mem[key.len-1];
1135 if (log_level_searchhits)
1136 key_logdump_txt(log_level_searchhits, &key, termid->name);
1137 if (this_sys != psysno)
1138 { /* new record .. */
1139 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1141 if (zh->break_handler_func(zh->break_handler_data))
1143 yaz_log(YLOG_LOG, "Aborted search");
1147 if (rfd->counted_items > rset->hits_limit)
1151 zebraSet->estimated_hit_count = 1;
1155 { /* only if we did have a previous record */
1156 score = (*rc->calc)(handle, psysno, pstaticrank,
1158 /* insert the hit. A=Ascending */
1159 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1163 if (zh->m_staticrank)
1164 pstaticrank = key.mem[0];
1166 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1170 { /* we had - at least - one record */
1171 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1172 /* insert the hit. A=Ascending */
1173 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1176 (*rc->end)(zh->reg, handle);
1179 zebraSet->hits = rset->hits_count;
1181 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1182 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1183 for (i = 0; i < numTerms; i++)
1185 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1187 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1192 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1194 ZebraRankClass p = zh->reg->rank_classes;
1195 while (p && strcmp(p->control->name, name))
1197 if (p && !p->init_flag)
1199 if (p->control->create)
1200 p->class_handle = (*p->control->create)(zh);
1206 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1208 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1209 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1210 memcpy(p->control, ctrl, sizeof(*p->control));
1211 p->control->name = xstrdup(ctrl->name);
1213 p->next = reg->rank_classes;
1214 reg->rank_classes = p;
1217 void zebraRankDestroy(struct zebra_register *reg)
1219 ZebraRankClass p = reg->rank_classes;
1222 ZebraRankClass p_next = p->next;
1223 if (p->init_flag && p->control->destroy)
1224 (*p->control->destroy)(reg, p->class_handle);
1225 xfree(p->control->name);
1230 reg->rank_classes = NULL;
1233 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1234 zint *hits_array, int *approx_array)
1238 for (i = 0; i<rset->no_children; i++)
1239 no += trav_rset_for_termids(rset->children[i],
1240 (termid_array ? termid_array + no : 0),
1241 (hits_array ? hits_array + no : 0),
1242 (approx_array ? approx_array + no : 0));
1246 termid_array[no] = rset->term;
1248 hits_array[no] = rset->hits_count;
1250 approx_array[no] = rset->hits_approx;
1252 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1253 " count=" ZINT_FORMAT,
1254 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1261 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1264 ZebraSet sset = resultSetGet(zh, setname);
1268 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1274 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1275 int no, zint *count, int *approx,
1276 char *termbuf, size_t *termlen,
1277 const char **term_ref_id)
1279 ZebraSet sset = resultSetGet(zh, setname);
1282 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1283 if (no >= 0 && no < num_terms)
1285 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1286 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1287 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1289 trav_rset_for_termids(sset->rset, term_array,
1290 hits_array, approx_array);
1293 *count = hits_array[no];
1295 *approx = approx_array[no];
1298 char *inbuf = term_array[no]->name;
1299 size_t inleft = strlen(inbuf);
1300 size_t outleft = *termlen - 1;
1302 if (zh->iconv_from_utf8 != 0)
1304 char *outbuf = termbuf;
1307 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1309 if (ret == (size_t)(-1))
1313 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1315 *termlen = outbuf - termbuf;
1320 if (inleft > outleft)
1323 memcpy(termbuf, inbuf, *termlen);
1325 termbuf[*termlen] = '\0';
1328 *term_ref_id = term_array[no]->ref_id;
1332 xfree(approx_array);
1339 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1340 zint sysno, zebra_snippets *snippets)
1342 ZebraSet sset = resultSetGet(zh, setname);
1343 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1349 struct rset_key_control *kc = zebra_key_control_create(zh);
1350 NMEM nmem = nmem_create();
1352 RSET rsets[2], rset_comb;
1353 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1354 res_get(zh->res, "setTmpDir"),0 );
1357 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1364 rset_write(rsfd, &key);
1367 rsets[0] = rset_temp;
1368 rsets[1] = rset_dup(sset->rset);
1370 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1372 rsfd = rset_open(rset_comb, RSETF_READ);
1374 while (rset_read(rsfd, &key, &termid))
1378 struct ord_list *ol;
1379 for (ol = termid->ol; ol; ol = ol->next)
1381 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1382 ol->ord, termid->name);
1388 rset_delete(rset_comb);
1395 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1396 const char **basenames, int num_bases,
1398 zint *sysnos, int *no_sysnos)
1400 ZEBRA_RES res = ZEBRA_OK;
1401 int sysnos_offset = 0;
1404 if (!zh->reg->isamb || !zh->m_segment_indexing)
1406 if (sysnos_offset < *no_sysnos)
1412 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1414 const char *database = basenames[i];
1415 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1417 const char *index_type = "w";
1418 const char *use_string = "_ALLRECORDS";
1420 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1421 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1422 index_type, use_string);
1426 int ord_len = key_SU_encode(ord, ord_buf);
1429 ord_buf[ord_len] = '\0';
1431 info = dict_lookup(zh->reg->dict, ord_buf);
1434 if (*info != sizeof(ISAM_P))
1442 struct it_key key_until, key_found;
1446 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1448 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1453 key_until.mem[i++] = recid;
1454 key_until.mem[i++] = 0; /* section_id */
1455 if (zh->m_segment_indexing)
1456 key_until.mem[i++] = 0; /* segment */
1457 key_until.mem[i++] = 0;
1460 r = isamb_pp_forward(pt, &key_found, &key_until);
1461 while (r && key_found.mem[0] == recid)
1463 if (sysnos_offset < *no_sysnos)
1464 sysnos[sysnos_offset++] =
1465 key_found.mem[key_found.len-1];
1466 r = isamb_pp_read(pt, &key_found);
1476 *no_sysnos = sysnos_offset;
1480 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1481 const char *setname,
1483 zint *sysnos, int *no_sysnos)
1485 const char **basenames;
1489 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1490 if (res != ZEBRA_OK)
1493 return zebra_recid_to_sysno(zh, basenames, num_bases,
1494 recid, sysnos, no_sysnos);
1497 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1504 yaz_log(YLOG_DEBUG, "count_set");
1506 rset->hits_limit = approx_limit;
1509 rfd = rset_open(rset, RSETF_READ);
1510 while (rset_read(rfd, &key,0 /* never mind terms */))
1512 if (key.mem[0] != psysno)
1514 psysno = key.mem[0];
1515 if (rfd->counted_items >= rset->hits_limit)
1520 *count = rset->hits_count;
1527 * c-file-style: "Stroustrup"
1528 * indent-tabs-mode: nil
1530 * vim: shiftwidth=4 tabstop=8 expandtab