1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2009 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
558 struct sortKeyInfo *criteria, int num_criteria,
560 char *cmp_buf[], char *tmp_cmp_buf[])
562 struct zset_sort_entry *new_entry = NULL;
563 struct zset_sort_info *sort_info = sset->sort_info;
565 WRBUF w = wrbuf_alloc();
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 if (criteria[i].ord[database_no] != -1)
575 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576 criteria[i].ord[database_no]);
577 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
579 if (zebra_sort_read(zh->reg->sort_index, 0, w))
581 /* consider each sort entry and take lowest/highest one
582 of the one as sorting key depending on whether sort is
583 ascending/descending */
585 while (off != wrbuf_len(w))
587 size_t l = strlen(wrbuf_buf(w)+off);
588 assert(off < wrbuf_len(w));
590 if (l >= SORT_IDX_ENTRYSIZE)
591 l = SORT_IDX_ENTRYSIZE-1;
593 || (criteria[i].relation == 'A'
594 && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
595 || (criteria[i].relation == 'D'
596 && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
599 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
600 this_entry_buf[l] = '\0';
602 off += 1 + strlen(wrbuf_buf(w)+off);
608 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
612 i = sort_info->num_entries;
616 for (j = 0; j<num_criteria; j++)
618 char *this_entry_buf = tmp_cmp_buf[j];
619 char *other_entry_buf =
620 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
621 if (criteria[j].numerical[database_no])
623 char this_entry_org[1024];
624 char other_entry_org[1024];
626 const char *index_type = criteria[j].index_type;
627 zebra_term_untrans(zh, index_type, this_entry_org,
629 zebra_term_untrans(zh, index_type, other_entry_org,
631 diff = atof(this_entry_org) - atof(other_entry_org);
642 rel = memcmp(this_entry_buf, other_entry_buf,
645 /* when the compare is equal, continue to next criteria,
652 if (criteria[j].relation == 'A')
657 else if (criteria[j].relation == 'D')
664 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
665 j = sort_info->max_entries;
667 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
671 if (sort_info->num_entries == j)
674 j = (sort_info->num_entries)++;
675 new_entry = sort_info->entries[j];
676 /* move up all higher entries (to make room) */
680 for (k = 0; k<num_criteria; k++)
682 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
683 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
684 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
686 sort_info->entries[j] = sort_info->entries[j-1];
689 /* and insert the new entry at the correct place */
690 sort_info->entries[i] = new_entry;
692 /* and add this to the compare buffer */
693 for (i = 0; i<num_criteria; i++)
695 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
696 char *this_entry_buf = tmp_cmp_buf[i];
697 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
699 new_entry->sysno = sysno;
700 new_entry->score = -1;
703 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
704 zint sysno, int score, int relation)
706 struct zset_sort_entry *new_entry = NULL;
708 assert(zh); /* compiler shut up about unused arg */
710 i = sort_info->num_entries;
715 rel = score - sort_info->entries[i]->score;
722 else if (relation == 'A')
729 j = sort_info->max_entries;
733 if (sort_info->num_entries == j)
736 j = (sort_info->num_entries)++;
738 new_entry = sort_info->entries[j];
741 sort_info->entries[j] = sort_info->entries[j-1];
744 sort_info->entries[i] = new_entry;
746 new_entry->sysno = sysno;
747 new_entry->score = score;
750 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
753 ODR encode = odr_createmem(ODR_ENCODE);
754 ODR decode = odr_createmem(ODR_DECODE);
756 if (z_RPNQuery(encode, &src, 0, 0))
759 char *buf = odr_getbuf(encode, &len, 0);
763 odr_setbuf(decode, buf, len, 0);
764 z_RPNQuery(decode, &dst, 0, 0);
767 nmem_transfer(nmem, decode->mem);
773 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
775 Z_SortKeySpecList *dst = 0;
776 ODR encode = odr_createmem(ODR_ENCODE);
777 ODR decode = odr_createmem(ODR_DECODE);
779 if (z_SortKeySpecList(encode, &src, 0, 0))
782 char *buf = odr_getbuf(encode, &len, 0);
786 odr_setbuf(decode, buf, len, 0);
787 z_SortKeySpecList(decode, &dst, 0, 0);
790 nmem_transfer(nmem, decode->mem);
796 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
802 nset = resultSetAdd(zh, setname, 1);
806 nset->nmem = nmem_create();
808 nset->num_bases = rset->num_bases;
810 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
811 for (i = 0; i<rset->num_bases; i++)
812 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
815 nset->rset = rset_dup(rset->rset);
817 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
821 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
822 int num_input_setnames, const char **input_setnames,
823 const char *output_setname,
824 Z_SortKeySpecList *sort_sequence, int *sort_status)
829 if (num_input_setnames == 0)
831 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
834 if (num_input_setnames > 1)
836 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
841 yaz_log(log_level_sort, "result set sort input=%s output=%s",
842 *input_setnames, output_setname);
843 sset = resultSetGet(zh, input_setnames[0]);
846 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
850 if (!(rset = sset->rset))
852 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
856 if (strcmp(output_setname, input_setnames[0]))
857 sset = resultSetClone(zh, output_setname, sset);
858 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
859 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
863 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
864 ZebraSet sset, RSET rset,
865 Z_SortKeySpecList *sort_sequence,
874 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
875 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
876 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
882 size_t sysno_mem_index = 0;
884 int numbases = zh->num_basenames;
885 yaz_log(log_level_sort, "searching %d databases",numbases);
887 if (zh->m_staticrank)
890 assert(nmem); /* compiler shut up about unused param */
891 sset->sort_info->num_entries = 0;
893 rset_getterms(rset, 0, 0, &n);
894 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
895 rset_getterms(rset, terms, n, &numTerms);
898 num_criteria = sort_sequence->num_specs;
899 if (num_criteria > ZSET_SORT_MAX_LEVEL)
900 num_criteria = ZSET_SORT_MAX_LEVEL;
901 /* set up the search criteria */
902 for (i = 0; i < num_criteria; i++)
904 Z_SortKeySpec *sks = sort_sequence->specs[i];
908 sort_criteria[i].ord = (int *)
909 nmem_malloc(nmem, sizeof(int)*numbases);
910 sort_criteria[i].numerical = (int *)
911 nmem_malloc(nmem, sizeof(int)*numbases);
913 /* initialize ord and numerical for each database */
914 for (ib = 0; ib < numbases; ib++)
916 sort_criteria[i].ord[ib] = -1;
917 sort_criteria[i].numerical[ib] = 0;
920 if (sks->which == Z_SortKeySpec_missingValueData)
922 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
925 if (*sks->sortRelation == Z_SortKeySpec_ascending)
926 sort_criteria[i].relation = 'A';
927 else if (*sks->sortRelation == Z_SortKeySpec_descending)
928 sort_criteria[i].relation = 'D';
931 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
934 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
936 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
939 else if (sks->sortElement->which != Z_SortElement_generic)
941 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
944 sk = sks->sortElement->u.generic;
947 case Z_SortKey_sortField:
948 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
949 for (ib = 0; ib < numbases; ib++)
951 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
952 sort_criteria[i].numerical[ib] = 0;
953 sort_criteria[i].ord[ib] =
954 zebraExplain_lookup_attr_str(zh->reg->zei,
955 zinfo_index_category_sort,
957 if (sks->which != Z_SortKeySpec_null
958 && sort_criteria[i].ord[ib] == -1)
961 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
966 case Z_SortKey_elementSpec:
967 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
968 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
970 case Z_SortKey_sortAttributes:
971 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
972 /* for every database we searched, get the sort index file
973 id (ord) and its numerical indication and store them in
975 for (ib = 0; ib < numbases; ib++)
977 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
978 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
979 &sort_criteria[i].ord[ib],
980 &sort_criteria[i].numerical[ib]);
983 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
987 /* right now we look up the index type based on the first database
988 if the index_type's can differ between the indexes of different
989 databases (which i guess they can?) then we have to store the
990 index types for each database, just like the ord and numerical */
991 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
992 &sort_criteria[i].index_type,
995 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
999 /* allocate space for each cmpare buf + one extra for tmp comparison */
1000 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1001 all other result entries to compare against. This is slowly filled when records are processed.
1002 tmp_cmp_buf is an array with a value of the current record for each criteria
1004 for (i = 0; i<num_criteria; i++)
1006 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1007 * SORT_IDX_ENTRYSIZE);
1008 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1010 rfd = rset_open(rset, RSETF_READ);
1011 while (rset_read(rfd, &key, &termid))
1013 zint this_sys = key.mem[sysno_mem_index];
1014 if (log_level_searchhits)
1015 key_logdump_txt(log_level_searchhits, &key, termid->name);
1017 if (this_sys != psysno)
1019 int database_no = 0;
1020 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1022 if (zh->break_handler_func(zh->break_handler_data))
1024 rset_set_hits_limit(rset, 0);
1031 /* determine database from the term, but only bother if more than
1032 one database is in use*/
1033 if (numbases > 1 && termid->ol)
1035 const char *this_db = 0;
1036 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1039 for (ib = 0; ib < numbases; ib++)
1040 if (!strcmp(this_db, zh->basenames[ib]))
1045 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1047 ord_list_print(termid->ol);
1049 resultSetInsertSort(zh, sset, database_no,
1050 sort_criteria, num_criteria, psysno, cmp_buf,
1056 /* free the compare buffers */
1057 for (i = 0; i<num_criteria; i++)
1060 xfree(tmp_cmp_buf[i]);
1063 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1065 for (i = 0; i < numTerms; i++)
1066 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1067 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1068 *sort_status = Z_SortResponse_success;
1072 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1076 if ((s = resultSetGet(zh, resultSetId)))
1081 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1082 RSET rset, NMEM nmem)
1091 ZebraRankClass rank_class;
1092 struct zset_sort_info *sort_info;
1093 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1094 size_t sysno_mem_index = 0;
1096 if (zh->m_staticrank)
1097 sysno_mem_index = 1;
1101 sort_info = zebraSet->sort_info;
1102 sort_info->num_entries = 0;
1104 zebraSet->estimated_hit_count = 0;
1105 rset_getterms(rset, 0, 0, &n);
1106 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1107 rset_getterms(rset, terms, n, &numTerms);
1109 rank_class = zebraRankLookup(zh, rank_handler_name);
1112 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1113 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1118 RSFD rfd = rset_open(rset, RSETF_READ);
1119 struct rank_control *rc = rank_class->control;
1122 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1123 nmem, terms, numTerms);
1124 zint psysno = 0; /* previous doc id / sys no */
1125 zint pstaticrank = 0; /* previous static rank */
1127 while (rset_read(rfd, &key, &termid))
1129 zint this_sys = key.mem[sysno_mem_index];
1131 zint seqno = key.mem[key.len-1];
1133 if (log_level_searchhits)
1134 key_logdump_txt(log_level_searchhits, &key, termid->name);
1135 if (this_sys != psysno)
1136 { /* new record .. */
1137 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1139 if (zh->break_handler_func(zh->break_handler_data))
1141 yaz_log(YLOG_LOG, "Aborted search");
1145 if (rfd->counted_items > rset->hits_limit)
1148 { /* only if we did have a previous record */
1149 score = (*rc->calc)(handle, psysno, pstaticrank,
1151 /* insert the hit. A=Ascending */
1152 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1157 zebraSet->estimated_hit_count = 1;
1158 rset_set_hits_limit(rset, 0);
1162 if (zh->m_staticrank)
1163 pstaticrank = key.mem[0];
1165 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1169 { /* we had - at least - one record */
1170 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1171 /* insert the hit. A=Ascending */
1172 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1175 (*rc->end)(zh->reg, handle);
1178 zebraSet->hits = rset->hits_count;
1180 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1181 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1182 for (i = 0; i < numTerms; i++)
1184 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1186 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1191 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1193 ZebraRankClass p = zh->reg->rank_classes;
1194 while (p && strcmp(p->control->name, name))
1196 if (p && !p->init_flag)
1198 if (p->control->create)
1199 p->class_handle = (*p->control->create)(zh);
1205 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1207 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1208 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1209 memcpy(p->control, ctrl, sizeof(*p->control));
1210 p->control->name = xstrdup(ctrl->name);
1212 p->next = reg->rank_classes;
1213 reg->rank_classes = p;
1216 void zebraRankDestroy(struct zebra_register *reg)
1218 ZebraRankClass p = reg->rank_classes;
1221 ZebraRankClass p_next = p->next;
1222 if (p->init_flag && p->control->destroy)
1223 (*p->control->destroy)(reg, p->class_handle);
1224 xfree(p->control->name);
1229 reg->rank_classes = NULL;
1232 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1233 zint *hits_array, int *approx_array)
1237 for (i = 0; i<rset->no_children; i++)
1238 no += trav_rset_for_termids(rset->children[i],
1239 (termid_array ? termid_array + no : 0),
1240 (hits_array ? hits_array + no : 0),
1241 (approx_array ? approx_array + no : 0));
1245 termid_array[no] = rset->term;
1247 hits_array[no] = rset->hits_count;
1249 approx_array[no] = rset->hits_approx;
1251 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1252 " count=" ZINT_FORMAT,
1253 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1260 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1263 ZebraSet sset = resultSetGet(zh, setname);
1267 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1273 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1274 int no, zint *count, int *approx,
1275 char *termbuf, size_t *termlen,
1276 const char **term_ref_id)
1278 ZebraSet sset = resultSetGet(zh, setname);
1281 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1282 if (no >= 0 && no < num_terms)
1284 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1285 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1286 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1288 trav_rset_for_termids(sset->rset, term_array,
1289 hits_array, approx_array);
1292 *count = hits_array[no];
1294 *approx = approx_array[no];
1297 char *inbuf = term_array[no]->name;
1298 size_t inleft = strlen(inbuf);
1299 size_t outleft = *termlen - 1;
1301 if (zh->iconv_from_utf8 != 0)
1303 char *outbuf = termbuf;
1306 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1308 if (ret == (size_t)(-1))
1312 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1314 *termlen = outbuf - termbuf;
1319 if (inleft > outleft)
1322 memcpy(termbuf, inbuf, *termlen);
1324 termbuf[*termlen] = '\0';
1327 *term_ref_id = term_array[no]->ref_id;
1331 xfree(approx_array);
1338 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1339 zint sysno, zebra_snippets *snippets)
1341 ZebraSet sset = resultSetGet(zh, setname);
1342 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1348 struct rset_key_control *kc = zebra_key_control_create(zh);
1349 NMEM nmem = nmem_create();
1351 RSET rsets[2], rset_comb;
1352 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1353 res_get(zh->res, "setTmpDir"),0 );
1356 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1363 rset_write(rsfd, &key);
1366 rsets[0] = rset_temp;
1367 rsets[1] = rset_dup(sset->rset);
1369 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1371 rsfd = rset_open(rset_comb, RSETF_READ);
1373 while (rset_read(rsfd, &key, &termid))
1377 struct ord_list *ol;
1378 for (ol = termid->ol; ol; ol = ol->next)
1380 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1381 ol->ord, termid->name);
1387 rset_delete(rset_comb);
1394 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1395 const char **basenames, int num_bases,
1397 zint *sysnos, int *no_sysnos)
1399 ZEBRA_RES res = ZEBRA_OK;
1400 int sysnos_offset = 0;
1403 if (!zh->reg->isamb || !zh->m_segment_indexing)
1405 if (sysnos_offset < *no_sysnos)
1411 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1413 const char *database = basenames[i];
1414 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1416 const char *index_type = "w";
1417 const char *use_string = "_ALLRECORDS";
1419 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1420 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1421 index_type, use_string);
1425 int ord_len = key_SU_encode(ord, ord_buf);
1428 ord_buf[ord_len] = '\0';
1430 info = dict_lookup(zh->reg->dict, ord_buf);
1433 if (*info != sizeof(ISAM_P))
1441 struct it_key key_until, key_found;
1445 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1447 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1452 key_until.mem[i++] = recid;
1453 key_until.mem[i++] = 0; /* section_id */
1454 if (zh->m_segment_indexing)
1455 key_until.mem[i++] = 0; /* segment */
1456 key_until.mem[i++] = 0;
1459 r = isamb_pp_forward(pt, &key_found, &key_until);
1460 while (r && key_found.mem[0] == recid)
1462 if (sysnos_offset < *no_sysnos)
1463 sysnos[sysnos_offset++] =
1464 key_found.mem[key_found.len-1];
1465 r = isamb_pp_read(pt, &key_found);
1475 *no_sysnos = sysnos_offset;
1479 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1480 const char *setname,
1482 zint *sysnos, int *no_sysnos)
1484 const char **basenames;
1488 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1489 if (res != ZEBRA_OK)
1492 return zebra_recid_to_sysno(zh, basenames, num_bases,
1493 recid, sysnos, no_sysnos);
1496 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1503 yaz_log(YLOG_DEBUG, "count_set");
1505 rset->hits_limit = approx_limit;
1508 rfd = rset_open(rset, RSETF_READ);
1509 while (rset_read(rfd, &key,0 /* never mind terms */))
1511 if (key.mem[0] != psysno)
1513 psysno = key.mem[0];
1514 if (rfd->counted_items >= rset->hits_limit)
1519 *count = rset->hits_count;
1526 * c-file-style: "Stroustrup"
1527 * indent-tabs-mode: nil
1529 * vim: shiftwidth=4 tabstop=8 expandtab