1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
119 sset->num_bases, sset->basenames,
126 for (i = 0; sort_sequence->specs[i]; i++)
128 sort_sequence->num_specs = i;
129 rset_set_hits_limit(rset, sset->approx_limit);
133 res = resultSetRank(zh, sset, rset, rset_nmem);
137 res = resultSetSortSingle(zh, nmem, sset, rset,
138 sort_sequence, &sort_status);
145 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
146 int num_bases, char **basenames,
148 zint *hits, int *estimated_hit_count)
155 *estimated_hit_count = 0;
157 zebraSet = resultSetAdd(zh, setname, 1);
160 zebraSet->locked = 1;
163 zebraSet->rset_nmem = nmem_create();
165 zebraSet->num_bases = num_bases;
166 zebraSet->basenames =
167 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
168 for (i = 0; i<num_bases; i++)
169 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
171 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
173 *hits = zebraSet->hits;
174 if (zebraSet->estimated_hit_count)
175 *estimated_hit_count = 1;
179 zebraSet->locked = 0;
185 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
186 const char *db, const char *index_name,
189 assert(zh); /* compiler shut up */
191 s->nmem = nmem_create();
192 if (!s->term_entries)
195 s->term_entries_max = 1000;
197 nmem_malloc(s->nmem, s->term_entries_max *
198 sizeof(*s->term_entries));
199 for (i = 0; i < s->term_entries_max; i++)
200 s->term_entries[i].term = 0;
202 if (s->hits < s->term_entries_max)
204 s->term_entries[s->hits].reg_type = reg_type;
205 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
206 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
207 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
212 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
217 for (s = zh->sets; s; s = s->next)
218 if (!strcmp(s->name, name))
225 yaz_log(log_level_resultsets, "updating result set %s", name);
226 if (!ov || s->locked)
231 rset_close(s->cache_rfd);
232 rset_delete(s->rset);
235 nmem_destroy(s->rset_nmem);
237 nmem_destroy(s->nmem);
241 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
243 yaz_log(log_level_resultsets, "adding result set %s", name);
244 s = (ZebraSet) xmalloc(sizeof(*s));
247 s->name = xstrdup(name);
249 s->sort_info = (struct zset_sort_info *)
250 xmalloc(sizeof(*s->sort_info));
251 s->sort_info->max_entries = atoi(sort_max_str);
252 if (s->sort_info->max_entries < 2)
253 s->sort_info->max_entries = 2;
255 s->sort_info->entries = (struct zset_sort_entry **)
256 xmalloc(sizeof(*s->sort_info->entries) *
257 s->sort_info->max_entries);
258 s->sort_info->all_entries = (struct zset_sort_entry *)
259 xmalloc(sizeof(*s->sort_info->all_entries) *
260 s->sort_info->max_entries);
261 for (i = 0; i < s->sort_info->max_entries; i++)
262 s->sort_info->entries[i] = s->sort_info->all_entries + i;
272 s->cache_position = 0;
274 s->approx_limit = zh->approx_limit;
275 s->estimated_hit_count = 0;
279 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
283 for (s = zh->sets; s; s = s->next)
284 if (!strcmp(s->name, name))
286 if (!s->term_entries && !s->rset && s->rpn)
288 NMEM nmem = nmem_create();
289 yaz_log(log_level_resultsets, "research %s", name);
291 s->rset_nmem = nmem_create();
292 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
293 if (s->rset && s->sortSpec)
296 yaz_log(log_level_resultsets, "resort %s", name);
297 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
307 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
308 const char ***basenames, int *num_bases)
310 ZebraSet sset = resultSetGet(zh, setname);
313 *basenames = sset->basenames;
314 *num_bases = sset->num_bases;
319 void resultSetInvalidate(ZebraHandle zh)
321 ZebraSet s = zh->sets;
323 yaz_log(log_level_resultsets, "invalidating result sets");
324 for (; s; s = s->next)
329 rset_close(s->cache_rfd);
330 rset_delete(s->rset);
334 s->cache_position = 0;
336 nmem_destroy(s->rset_nmem);
341 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
343 ZebraSet * ss = &zh->sets;
347 for (i = 0; i<num; i++)
348 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
355 for (i = 0; i<num; i++)
356 if (!strcmp(s->name, names[i]))
359 statuses[i] = Z_DeleteStatus_success;
368 xfree(s->sort_info->all_entries);
369 xfree(s->sort_info->entries);
373 nmem_destroy(s->nmem);
377 rset_close(s->cache_rfd);
378 rset_delete(s->rset);
381 nmem_destroy(s->rset_nmem);
390 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
395 zint *pos = pos_small;
399 if (num > 10000 || num <= 0)
403 pos = xmalloc(sizeof(*pos) * num);
405 for (i = 0; i<num; i++)
408 mr = zebra_meta_records_create(zh, name, num, pos);
415 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
416 int num, zint *positions)
419 ZebraMetaRecord *sr = 0;
422 struct zset_sort_info *sort_info;
423 size_t sysno_mem_index = 0;
425 if (zh->m_staticrank)
430 if (!(sset = resultSetGet(zh, name)))
432 if (!(rset = sset->rset))
434 if (!sset->term_entries)
436 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
437 for (i = 0; i<num; i++)
444 if (positions[i] <= sset->term_entries_max)
446 sr[i].term = sset->term_entries[positions[i]-1].term;
447 sr[i].db = sset->term_entries[positions[i]-1].db;
453 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
454 for (i = 0; i<num; i++)
461 sort_info = sset->sort_info;
466 for (i = 0; i<num; i++)
468 position = positions[i];
469 if (position > 0 && position <= sort_info->num_entries)
471 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
472 " (sorted)", position);
473 sr[i].sysno = sort_info->entries[position-1]->sysno;
474 sr[i].score = sort_info->entries[position-1]->score;
478 /* did we really get all entries using sort ? */
479 for (i = 0; i<num; i++)
484 if (i < num) /* nope, get the rest, unsorted - sorry */
493 position = sort_info->num_entries;
494 while (num_i < num && positions[num_i] <= position)
497 if (sset->cache_rfd &&
498 num_i < num && positions[num_i] > sset->cache_position)
500 position = sset->cache_position;
501 rfd = sset->cache_rfd;
502 psysno = sset->cache_psysno;
507 rset_close(sset->cache_rfd);
508 rfd = rset_open(rset, RSETF_READ);
510 while (num_i < num && rset_read(rfd, &key, 0))
512 zint this_sys = key.mem[sysno_mem_index];
513 if (this_sys != psysno)
518 /* determine we alreay have this in our set */
519 for (i = sort_info->num_entries; --i >= 0; )
520 if (psysno == sort_info->entries[i]->sysno)
527 if (position == positions[num_i])
529 sr[num_i].sysno = psysno;
530 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
531 sr[num_i].score = -1;
536 sset->cache_position = position;
537 sset->cache_psysno = psysno;
538 sset->cache_rfd = rfd;
544 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
547 assert(zh); /* compiler shut up about unused arg */
553 int *ord; /* array of ord for each database searched */
554 int *numerical; /* array of ord for each database searched */
555 const char *index_type;
558 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
560 struct sortKeyInfo *criteria, int num_criteria,
562 char *cmp_buf[], char *tmp_cmp_buf[])
564 struct zset_sort_entry *new_entry = NULL;
565 struct zset_sort_info *sort_info = sset->sort_info;
567 WRBUF w = wrbuf_alloc();
569 zebra_sort_sysno(zh->reg->sort_index, sysno);
570 for (i = 0; i<num_criteria; i++)
572 char *this_entry_buf = tmp_cmp_buf[i];
573 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
575 if (criteria[i].ord[database_no] != -1)
577 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
578 criteria[i].ord[database_no]);
579 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
581 if (zebra_sort_read(zh->reg->sort_index, 0, w))
583 /* consider each sort entry and take lowest/highest one
584 of the one as sorting key depending on whether sort is
585 ascending/descending */
587 while (off != wrbuf_len(w))
589 size_t l = strlen(wrbuf_buf(w)+off);
590 assert(off < wrbuf_len(w));
592 if (l >= SORT_IDX_ENTRYSIZE)
593 l = SORT_IDX_ENTRYSIZE-1;
595 || (criteria[i].relation == 'A'
596 && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
597 || (criteria[i].relation == 'D'
598 && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
601 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
602 this_entry_buf[l] = '\0';
604 off += 1 + strlen(wrbuf_buf(w)+off);
610 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
614 i = sort_info->num_entries;
618 for (j = 0; j<num_criteria; j++)
620 char *this_entry_buf = tmp_cmp_buf[j];
621 char *other_entry_buf =
622 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
623 if (criteria[j].numerical[database_no])
625 char this_entry_org[1024];
626 char other_entry_org[1024];
628 const char *index_type = criteria[j].index_type;
629 zebra_term_untrans(zh, index_type, this_entry_org,
631 zebra_term_untrans(zh, index_type, other_entry_org,
633 diff = atof(this_entry_org) - atof(other_entry_org);
644 rel = memcmp(this_entry_buf, other_entry_buf,
647 /* when the compare is equal, continue to next criteria,
654 if (criteria[j].relation == 'A')
659 else if (criteria[j].relation == 'D')
666 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
667 j = sort_info->max_entries;
669 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
673 if (sort_info->num_entries == j)
676 j = (sort_info->num_entries)++;
677 new_entry = sort_info->entries[j];
678 /* move up all higher entries (to make room) */
682 for (k = 0; k<num_criteria; k++)
684 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
685 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
686 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
688 sort_info->entries[j] = sort_info->entries[j-1];
691 /* and insert the new entry at the correct place */
692 sort_info->entries[i] = new_entry;
694 /* and add this to the compare buffer */
695 for (i = 0; i<num_criteria; i++)
697 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
698 char *this_entry_buf = tmp_cmp_buf[i];
699 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
701 new_entry->sysno = sysno;
702 new_entry->score = -1;
705 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
706 zint sysno, int score, int relation)
708 struct zset_sort_entry *new_entry = NULL;
710 assert(zh); /* compiler shut up about unused arg */
712 i = sort_info->num_entries;
717 rel = score - sort_info->entries[i]->score;
724 else if (relation == 'A')
731 j = sort_info->max_entries;
735 if (sort_info->num_entries == j)
738 j = (sort_info->num_entries)++;
740 new_entry = sort_info->entries[j];
743 sort_info->entries[j] = sort_info->entries[j-1];
746 sort_info->entries[i] = new_entry;
748 new_entry->sysno = sysno;
749 new_entry->score = score;
752 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
755 ODR encode = odr_createmem(ODR_ENCODE);
756 ODR decode = odr_createmem(ODR_DECODE);
758 if (z_RPNQuery(encode, &src, 0, 0))
761 char *buf = odr_getbuf(encode, &len, 0);
765 odr_setbuf(decode, buf, len, 0);
766 z_RPNQuery(decode, &dst, 0, 0);
769 nmem_transfer(nmem, decode->mem);
775 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
777 Z_SortKeySpecList *dst = 0;
778 ODR encode = odr_createmem(ODR_ENCODE);
779 ODR decode = odr_createmem(ODR_DECODE);
781 if (z_SortKeySpecList(encode, &src, 0, 0))
784 char *buf = odr_getbuf(encode, &len, 0);
788 odr_setbuf(decode, buf, len, 0);
789 z_SortKeySpecList(decode, &dst, 0, 0);
792 nmem_transfer(nmem, decode->mem);
798 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
804 nset = resultSetAdd(zh, setname, 1);
808 nset->nmem = nmem_create();
810 nset->num_bases = rset->num_bases;
812 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
813 for (i = 0; i<rset->num_bases; i++)
814 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
817 nset->rset = rset_dup(rset->rset);
819 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
823 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
824 int num_input_setnames, const char **input_setnames,
825 const char *output_setname,
826 Z_SortKeySpecList *sort_sequence, int *sort_status)
831 if (num_input_setnames == 0)
833 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
836 if (num_input_setnames > 1)
838 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
843 yaz_log(log_level_sort, "result set sort input=%s output=%s",
844 *input_setnames, output_setname);
845 sset = resultSetGet(zh, input_setnames[0]);
848 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
852 if (!(rset = sset->rset))
854 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
858 if (strcmp(output_setname, input_setnames[0]))
859 sset = resultSetClone(zh, output_setname, sset);
860 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
861 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
865 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
866 ZebraSet sset, RSET rset,
867 Z_SortKeySpecList *sort_sequence,
876 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
877 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
878 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
884 size_t sysno_mem_index = 0;
886 int numbases = zh->num_basenames;
887 yaz_log(log_level_sort, "searching %d databases",numbases);
889 if (zh->m_staticrank)
892 assert(nmem); /* compiler shut up about unused param */
893 sset->sort_info->num_entries = 0;
895 rset_getterms(rset, 0, 0, &n);
896 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
897 rset_getterms(rset, terms, n, &numTerms);
900 num_criteria = sort_sequence->num_specs;
901 if (num_criteria > ZSET_SORT_MAX_LEVEL)
902 num_criteria = ZSET_SORT_MAX_LEVEL;
903 /* set up the search criteria */
904 for (i = 0; i < num_criteria; i++)
906 Z_SortKeySpec *sks = sort_sequence->specs[i];
909 sort_criteria[i].ord = (int *)
910 nmem_malloc(nmem, sizeof(int)*numbases);
911 sort_criteria[i].numerical = (int *)
912 nmem_malloc(nmem, sizeof(int)*numbases);
914 /* initialize ord and numerical for each database */
915 for (ib = 0; ib < numbases; ib++)
917 sort_criteria[i].ord[ib] = -1;
918 sort_criteria[i].numerical[ib] = 0;
921 if (sks->which == Z_SortKeySpec_missingValueData)
923 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
926 if (*sks->sortRelation == Z_SortKeySpec_ascending)
927 sort_criteria[i].relation = 'A';
928 else if (*sks->sortRelation == Z_SortKeySpec_descending)
929 sort_criteria[i].relation = 'D';
932 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
935 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
937 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
940 else if (sks->sortElement->which != Z_SortElement_generic)
942 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
945 sk = sks->sortElement->u.generic;
948 case Z_SortKey_sortField:
949 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
950 for (ib = 0; ib < numbases; ib++)
952 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
953 sort_criteria[i].numerical[ib] = 0;
954 sort_criteria[i].ord[ib] =
955 zebraExplain_lookup_attr_str(zh->reg->zei,
956 zinfo_index_category_sort,
958 if (sks->which != Z_SortKeySpec_null
959 && sort_criteria[i].ord[ib] == -1)
962 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
967 case Z_SortKey_elementSpec:
968 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
969 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
971 case Z_SortKey_sortAttributes:
972 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
973 /* for every database we searched, get the sort index file
974 id (ord) and its numerical indication and store them in
976 for (ib = 0; ib < numbases; ib++)
978 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
979 if (zebra_sort_get_ord(zh, sk->u.sortAttributes,
980 &sort_criteria[i].ord[ib],
981 &sort_criteria[i].numerical[ib]) !=
982 ZEBRA_OK && sks->which != Z_SortKeySpec_null)
987 /* right now we look up the index type based on the first database
988 if the index_type's can differ between the indexes of different
989 databases (which i guess they can?) then we have to store the
990 index types for each database, just like the ord and numerical */
991 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
992 &sort_criteria[i].index_type,
995 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
999 /* allocate space for each cmpare buf + one extra for tmp comparison */
1000 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1001 all other result entries to compare against. This is slowly filled when records are processed.
1002 tmp_cmp_buf is an array with a value of the current record for each criteria
1004 for (i = 0; i<num_criteria; i++)
1006 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1007 * SORT_IDX_ENTRYSIZE);
1008 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1010 rfd = rset_open(rset, RSETF_READ);
1011 while (rset_read(rfd, &key, &termid))
1013 zint this_sys = key.mem[sysno_mem_index];
1014 if (log_level_searchhits)
1015 key_logdump_txt(log_level_searchhits, &key, termid->name);
1017 if (this_sys != psysno)
1019 int database_no = 0;
1020 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1022 if (zh->break_handler_func(zh->break_handler_data))
1024 rset_set_hits_limit(rset, 0);
1031 /* determine database from the term, but only bother if more than
1032 one database is in use*/
1033 if (numbases > 1 && termid->ol)
1035 const char *this_db = 0;
1036 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1039 for (ib = 0; ib < numbases; ib++)
1040 if (!strcmp(this_db, zh->basenames[ib]))
1045 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1047 ord_list_print(termid->ol);
1049 resultSetInsertSort(zh, sset, database_no,
1050 sort_criteria, num_criteria, psysno, cmp_buf,
1056 /* free the compare buffers */
1057 for (i = 0; i<num_criteria; i++)
1060 xfree(tmp_cmp_buf[i]);
1063 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1065 for (i = 0; i < numTerms; i++)
1066 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1067 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1068 *sort_status = Z_SortResponse_success;
1072 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1076 if ((s = resultSetGet(zh, resultSetId)))
1081 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1082 RSET rset, NMEM nmem)
1091 ZebraRankClass rank_class;
1092 struct zset_sort_info *sort_info;
1093 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1094 size_t sysno_mem_index = 0;
1096 if (zh->m_staticrank)
1097 sysno_mem_index = 1;
1101 sort_info = zebraSet->sort_info;
1102 sort_info->num_entries = 0;
1104 zebraSet->estimated_hit_count = 0;
1105 rset_getterms(rset, 0, 0, &n);
1106 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1107 rset_getterms(rset, terms, n, &numTerms);
1109 rank_class = zebraRankLookup(zh, rank_handler_name);
1112 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1113 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1118 RSFD rfd = rset_open(rset, RSETF_READ);
1119 struct rank_control *rc = rank_class->control;
1122 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1123 nmem, terms, numTerms);
1124 zint psysno = 0; /* previous doc id / sys no */
1125 zint pstaticrank = 0; /* previous static rank */
1127 while (rset_read(rfd, &key, &termid))
1129 zint this_sys = key.mem[sysno_mem_index];
1131 zint seqno = key.mem[key.len-1];
1133 if (log_level_searchhits)
1134 key_logdump_txt(log_level_searchhits, &key, termid->name);
1135 if (this_sys != psysno)
1136 { /* new record .. */
1137 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1139 if (zh->break_handler_func(zh->break_handler_data))
1141 yaz_log(YLOG_LOG, "Aborted search");
1145 if (rfd->counted_items > rset->hits_limit)
1149 zebraSet->estimated_hit_count = 1;
1153 { /* only if we did have a previous record */
1154 score = (*rc->calc)(handle, psysno, pstaticrank,
1156 /* insert the hit. A=Ascending */
1157 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1161 if (zh->m_staticrank)
1162 pstaticrank = key.mem[0];
1164 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1168 { /* we had - at least - one record */
1169 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1170 /* insert the hit. A=Ascending */
1171 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1174 (*rc->end)(zh->reg, handle);
1177 zebraSet->hits = rset->hits_count;
1179 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1180 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1181 for (i = 0; i < numTerms; i++)
1183 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1185 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1190 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1192 ZebraRankClass p = zh->reg->rank_classes;
1193 while (p && strcmp(p->control->name, name))
1195 if (p && !p->init_flag)
1197 if (p->control->create)
1198 p->class_handle = (*p->control->create)(zh);
1204 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1206 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1207 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1208 memcpy(p->control, ctrl, sizeof(*p->control));
1209 p->control->name = xstrdup(ctrl->name);
1211 p->next = reg->rank_classes;
1212 reg->rank_classes = p;
1215 void zebraRankDestroy(struct zebra_register *reg)
1217 ZebraRankClass p = reg->rank_classes;
1220 ZebraRankClass p_next = p->next;
1221 if (p->init_flag && p->control->destroy)
1222 (*p->control->destroy)(reg, p->class_handle);
1223 xfree(p->control->name);
1228 reg->rank_classes = NULL;
1231 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1232 zint *hits_array, int *approx_array)
1236 for (i = 0; i<rset->no_children; i++)
1237 no += trav_rset_for_termids(rset->children[i],
1238 (termid_array ? termid_array + no : 0),
1239 (hits_array ? hits_array + no : 0),
1240 (approx_array ? approx_array + no : 0));
1244 termid_array[no] = rset->term;
1246 hits_array[no] = rset->hits_count;
1248 approx_array[no] = rset->hits_approx;
1250 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1251 " count=" ZINT_FORMAT,
1252 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1259 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1262 ZebraSet sset = resultSetGet(zh, setname);
1266 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1272 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1273 int no, zint *count, int *approx,
1274 char *termbuf, size_t *termlen,
1275 const char **term_ref_id)
1277 ZebraSet sset = resultSetGet(zh, setname);
1280 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1281 if (no >= 0 && no < num_terms)
1283 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1284 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1285 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1287 trav_rset_for_termids(sset->rset, term_array,
1288 hits_array, approx_array);
1291 *count = hits_array[no];
1293 *approx = approx_array[no];
1296 char *inbuf = term_array[no]->name;
1297 size_t inleft = strlen(inbuf);
1298 size_t outleft = *termlen - 1;
1300 if (zh->iconv_from_utf8 != 0)
1302 char *outbuf = termbuf;
1305 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1307 if (ret == (size_t)(-1))
1311 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1313 *termlen = outbuf - termbuf;
1318 if (inleft > outleft)
1321 memcpy(termbuf, inbuf, *termlen);
1323 termbuf[*termlen] = '\0';
1326 *term_ref_id = term_array[no]->ref_id;
1330 xfree(approx_array);
1337 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1338 zint sysno, zebra_snippets *snippets)
1340 ZebraSet sset = resultSetGet(zh, setname);
1341 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1347 struct rset_key_control *kc = zebra_key_control_create(zh);
1348 NMEM nmem = nmem_create();
1350 RSET rsets[2], rset_comb;
1351 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1352 res_get(zh->res, "setTmpDir"),0 );
1355 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1362 rset_write(rsfd, &key);
1365 rsets[0] = rset_temp;
1366 rsets[1] = rset_dup(sset->rset);
1368 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1370 rsfd = rset_open(rset_comb, RSETF_READ);
1372 while (rset_read(rsfd, &key, &termid))
1376 struct ord_list *ol;
1377 for (ol = termid->ol; ol; ol = ol->next)
1379 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1380 ol->ord, termid->name);
1386 rset_delete(rset_comb);
1393 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1394 const char **basenames, int num_bases,
1396 zint *sysnos, int *no_sysnos)
1398 ZEBRA_RES res = ZEBRA_OK;
1399 int sysnos_offset = 0;
1402 if (!zh->reg->isamb || !zh->m_segment_indexing)
1404 if (sysnos_offset < *no_sysnos)
1410 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1412 const char *database = basenames[i];
1413 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1415 const char *index_type = "w";
1416 const char *use_string = "_ALLRECORDS";
1418 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1419 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1420 index_type, use_string);
1424 int ord_len = key_SU_encode(ord, ord_buf);
1427 ord_buf[ord_len] = '\0';
1429 info = dict_lookup(zh->reg->dict, ord_buf);
1432 if (*info != sizeof(ISAM_P))
1440 struct it_key key_until, key_found;
1444 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1446 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1451 key_until.mem[i++] = recid;
1452 key_until.mem[i++] = 0; /* section_id */
1453 if (zh->m_segment_indexing)
1454 key_until.mem[i++] = 0; /* segment */
1455 key_until.mem[i++] = 0;
1458 r = isamb_pp_forward(pt, &key_found, &key_until);
1459 while (r && key_found.mem[0] == recid)
1461 if (sysnos_offset < *no_sysnos)
1462 sysnos[sysnos_offset++] =
1463 key_found.mem[key_found.len-1];
1464 r = isamb_pp_read(pt, &key_found);
1474 *no_sysnos = sysnos_offset;
1478 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1479 const char *setname,
1481 zint *sysnos, int *no_sysnos)
1483 const char **basenames;
1487 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1488 if (res != ZEBRA_OK)
1491 return zebra_recid_to_sysno(zh, basenames, num_bases,
1492 recid, sysnos, no_sysnos);
1495 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1502 yaz_log(YLOG_DEBUG, "count_set");
1504 rset->hits_limit = approx_limit;
1507 rfd = rset_open(rset, RSETF_READ);
1508 while (rset_read(rfd, &key,0 /* never mind terms */))
1510 if (key.mem[0] != psysno)
1512 psysno = key.mem[0];
1513 if (rfd->counted_items >= rset->hits_limit)
1518 *count = rset->hits_count;
1525 * c-file-style: "Stroustrup"
1526 * indent-tabs-mode: nil
1528 * vim: shiftwidth=4 tabstop=8 expandtab