1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
558 struct sortKeyInfo *criteria, int num_criteria,
560 char *cmp_buf[], char *tmp_cmp_buf[])
562 struct zset_sort_entry *new_entry = NULL;
563 struct zset_sort_info *sort_info = sset->sort_info;
566 zebra_sort_sysno(zh->reg->sort_index, sysno);
567 for (i = 0; i<num_criteria; i++)
569 char *this_entry_buf = tmp_cmp_buf[i];
570 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
572 if (criteria[i].ord[database_no] != -1)
574 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
575 criteria[i].ord[database_no]);
576 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
577 zebra_sort_read(zh->reg->sort_index, this_entry_buf);
581 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
584 i = sort_info->num_entries;
588 for (j = 0; j<num_criteria; j++)
590 char *this_entry_buf = tmp_cmp_buf[j];
591 char *other_entry_buf =
592 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
593 if (criteria[j].numerical[database_no])
595 char this_entry_org[1024];
596 char other_entry_org[1024];
598 /* when searching multiple databases, we use the index
599 type of the first one. So if they differ between
600 databases, we have a problem here we could store the
601 index_type for each database, but if we didn't find the
602 record in any sort index, then we still don't know to
603 which database it belongs. */
604 const char *index_type = criteria[j].index_type;
605 zebra_term_untrans(zh, index_type, this_entry_org,
607 zebra_term_untrans(zh, index_type, other_entry_org,
609 diff = atof(this_entry_org) - atof(other_entry_org);
620 rel = memcmp(this_entry_buf, other_entry_buf,
623 /* when the compare is equal, continue to next criteria,
630 if (criteria[j].relation == 'A')
635 else if (criteria[j].relation == 'D')
642 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
643 j = sort_info->max_entries;
645 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
649 if (sort_info->num_entries == j)
652 j = (sort_info->num_entries)++;
653 new_entry = sort_info->entries[j];
654 /* move up all higher entries (to make room) */
658 for (k = 0; k<num_criteria; k++)
660 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
661 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
662 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
664 sort_info->entries[j] = sort_info->entries[j-1];
667 /* and insert the new entry at the correct place */
668 sort_info->entries[i] = new_entry;
670 /* and add this to the compare buffer */
671 for (i = 0; i<num_criteria; i++)
673 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
674 char *this_entry_buf = tmp_cmp_buf[i];
675 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
677 new_entry->sysno = sysno;
678 new_entry->score = -1;
681 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
682 zint sysno, int score, int relation)
684 struct zset_sort_entry *new_entry = NULL;
686 assert(zh); /* compiler shut up about unused arg */
688 i = sort_info->num_entries;
693 rel = score - sort_info->entries[i]->score;
700 else if (relation == 'A')
707 j = sort_info->max_entries;
711 if (sort_info->num_entries == j)
714 j = (sort_info->num_entries)++;
716 new_entry = sort_info->entries[j];
719 sort_info->entries[j] = sort_info->entries[j-1];
722 sort_info->entries[i] = new_entry;
724 new_entry->sysno = sysno;
725 new_entry->score = score;
728 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
731 ODR encode = odr_createmem(ODR_ENCODE);
732 ODR decode = odr_createmem(ODR_DECODE);
734 if (z_RPNQuery(encode, &src, 0, 0))
737 char *buf = odr_getbuf(encode, &len, 0);
741 odr_setbuf(decode, buf, len, 0);
742 z_RPNQuery(decode, &dst, 0, 0);
745 nmem_transfer(nmem, decode->mem);
751 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
753 Z_SortKeySpecList *dst = 0;
754 ODR encode = odr_createmem(ODR_ENCODE);
755 ODR decode = odr_createmem(ODR_DECODE);
757 if (z_SortKeySpecList(encode, &src, 0, 0))
760 char *buf = odr_getbuf(encode, &len, 0);
764 odr_setbuf(decode, buf, len, 0);
765 z_SortKeySpecList(decode, &dst, 0, 0);
768 nmem_transfer(nmem, decode->mem);
774 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
780 nset = resultSetAdd(zh, setname, 1);
784 nset->nmem = nmem_create();
786 nset->num_bases = rset->num_bases;
788 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
789 for (i = 0; i<rset->num_bases; i++)
790 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
793 nset->rset = rset_dup(rset->rset);
795 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
799 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
800 int num_input_setnames, const char **input_setnames,
801 const char *output_setname,
802 Z_SortKeySpecList *sort_sequence, int *sort_status)
807 if (num_input_setnames == 0)
809 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
812 if (num_input_setnames > 1)
814 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
819 yaz_log(log_level_sort, "result set sort input=%s output=%s",
820 *input_setnames, output_setname);
821 sset = resultSetGet(zh, input_setnames[0]);
824 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
828 if (!(rset = sset->rset))
830 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
834 if (strcmp(output_setname, input_setnames[0]))
835 sset = resultSetClone(zh, output_setname, sset);
836 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
837 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
841 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
842 ZebraSet sset, RSET rset,
843 Z_SortKeySpecList *sort_sequence,
852 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
853 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
854 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
860 size_t sysno_mem_index = 0;
862 int numbases = zh->num_basenames;
863 yaz_log(log_level_sort, "searching %d databases",numbases);
865 if (zh->m_staticrank)
868 assert(nmem); /* compiler shut up about unused param */
869 sset->sort_info->num_entries = 0;
871 rset_getterms(rset, 0, 0, &n);
872 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
873 rset_getterms(rset, terms, n, &numTerms);
876 num_criteria = sort_sequence->num_specs;
877 if (num_criteria > ZSET_SORT_MAX_LEVEL)
878 num_criteria = ZSET_SORT_MAX_LEVEL;
879 /* set up the search criteria */
880 for (i = 0; i < num_criteria; i++)
882 Z_SortKeySpec *sks = sort_sequence->specs[i];
886 sort_criteria[i].ord = (int *)
887 nmem_malloc(nmem, sizeof(int)*numbases);
888 sort_criteria[i].numerical = (int *)
889 nmem_malloc(nmem, sizeof(int)*numbases);
891 /* initialize ord and numerical for each database */
892 for (ib = 0; ib < numbases; ib++)
894 sort_criteria[i].ord[ib] = -1;
895 sort_criteria[i].numerical[ib] = 0;
898 if (sks->which == Z_SortKeySpec_missingValueData)
900 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
903 if (*sks->sortRelation == Z_SortKeySpec_ascending)
904 sort_criteria[i].relation = 'A';
905 else if (*sks->sortRelation == Z_SortKeySpec_descending)
906 sort_criteria[i].relation = 'D';
909 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
912 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
914 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
917 else if (sks->sortElement->which != Z_SortElement_generic)
919 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
922 sk = sks->sortElement->u.generic;
925 case Z_SortKey_sortField:
926 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
927 for (ib = 0; ib < numbases; ib++)
929 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
930 sort_criteria[i].numerical[ib] = 0;
931 sort_criteria[i].ord[ib] =
932 zebraExplain_lookup_attr_str(zh->reg->zei,
933 zinfo_index_category_sort,
935 if (sks->which != Z_SortKeySpec_null
936 && sort_criteria[i].ord[ib] == -1)
939 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
944 case Z_SortKey_elementSpec:
945 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
946 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
948 case Z_SortKey_sortAttributes:
949 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
950 /* for every database we searched, get the sort index file
951 id (ord) and its numerical indication and store them in
953 for (ib = 0; ib < numbases; ib++)
955 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
956 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
957 &sort_criteria[i].ord[ib],
958 &sort_criteria[i].numerical[ib]);
961 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
965 /* right now we look up the index type based on the first database
966 if the index_type's can differ between the indexes of different
967 databases (which i guess they can?) then we have to store the
968 index types for each database, just like the ord and numerical */
969 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
970 &sort_criteria[i].index_type,
973 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
977 /* allocate space for each cmpare buf + one extra for tmp comparison */
978 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
979 all other result entries to compare against. This is slowly filled when records are processed.
980 tmp_cmp_buf is an array with a value of the current record for each criteria
982 for (i = 0; i<num_criteria; i++)
984 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
985 * SORT_IDX_ENTRYSIZE);
986 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
988 rfd = rset_open(rset, RSETF_READ);
989 while (rset_read(rfd, &key, &termid))
991 zint this_sys = key.mem[sysno_mem_index];
992 if (log_level_searchhits)
993 key_logdump_txt(log_level_searchhits, &key, termid->name);
995 if (this_sys != psysno)
998 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1000 if (zh->break_handler_func(zh->break_handler_data))
1002 rset_set_hits_limit(rset, 0);
1009 /* determine database from the term, but only bother if more than
1010 one database is in use*/
1011 if (numbases > 1 && termid->ol)
1013 const char *this_db = 0;
1014 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1017 for (ib = 0; ib < numbases; ib++)
1018 if (!strcmp(this_db, zh->basenames[ib]))
1023 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1026 ord_list_print(termid->ol);
1027 resultSetInsertSort(zh, sset, database_no,
1028 sort_criteria, num_criteria, psysno, cmp_buf,
1034 /* free the compare buffers */
1035 for (i = 0; i<num_criteria; i++)
1038 xfree(tmp_cmp_buf[i]);
1041 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1043 for (i = 0; i < numTerms; i++)
1044 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1045 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1046 *sort_status = Z_SortResponse_success;
1050 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1054 if ((s = resultSetGet(zh, resultSetId)))
1059 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1060 RSET rset, NMEM nmem)
1069 ZebraRankClass rank_class;
1070 struct zset_sort_info *sort_info;
1071 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1072 size_t sysno_mem_index = 0;
1074 if (zh->m_staticrank)
1075 sysno_mem_index = 1;
1079 sort_info = zebraSet->sort_info;
1080 sort_info->num_entries = 0;
1082 zebraSet->estimated_hit_count = 0;
1083 rset_getterms(rset, 0, 0, &n);
1084 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1085 rset_getterms(rset, terms, n, &numTerms);
1087 rank_class = zebraRankLookup(zh, rank_handler_name);
1090 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1091 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1096 RSFD rfd = rset_open(rset, RSETF_READ);
1097 struct rank_control *rc = rank_class->control;
1100 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1101 nmem, terms, numTerms);
1102 zint psysno = 0; /* previous doc id / sys no */
1103 zint pstaticrank = 0; /* previous static rank */
1105 while (rset_read(rfd, &key, &termid))
1107 zint this_sys = key.mem[sysno_mem_index];
1109 zint seqno = key.mem[key.len-1];
1111 if (log_level_searchhits)
1112 key_logdump_txt(log_level_searchhits, &key, termid->name);
1113 if (this_sys != psysno)
1114 { /* new record .. */
1115 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1117 if (zh->break_handler_func(zh->break_handler_data))
1119 yaz_log(YLOG_LOG, "Aborted search");
1123 if (rfd->counted_items > rset->hits_limit)
1126 { /* only if we did have a previous record */
1127 score = (*rc->calc)(handle, psysno, pstaticrank,
1129 /* insert the hit. A=Ascending */
1130 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1135 zebraSet->estimated_hit_count = 1;
1136 rset_set_hits_limit(rset, 0);
1140 if (zh->m_staticrank)
1141 pstaticrank = key.mem[0];
1143 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1147 { /* we had - at least - one record */
1148 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1149 /* insert the hit. A=Ascending */
1150 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1153 (*rc->end)(zh->reg, handle);
1156 zebraSet->hits = rset->hits_count;
1158 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1159 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1160 for (i = 0; i < numTerms; i++)
1162 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1164 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1169 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1171 ZebraRankClass p = zh->reg->rank_classes;
1172 while (p && strcmp(p->control->name, name))
1174 if (p && !p->init_flag)
1176 if (p->control->create)
1177 p->class_handle = (*p->control->create)(zh);
1183 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1185 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1186 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1187 memcpy(p->control, ctrl, sizeof(*p->control));
1188 p->control->name = xstrdup(ctrl->name);
1190 p->next = reg->rank_classes;
1191 reg->rank_classes = p;
1194 void zebraRankDestroy(struct zebra_register *reg)
1196 ZebraRankClass p = reg->rank_classes;
1199 ZebraRankClass p_next = p->next;
1200 if (p->init_flag && p->control->destroy)
1201 (*p->control->destroy)(reg, p->class_handle);
1202 xfree(p->control->name);
1207 reg->rank_classes = NULL;
1210 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1211 zint *hits_array, int *approx_array)
1215 for (i = 0; i<rset->no_children; i++)
1216 no += trav_rset_for_termids(rset->children[i],
1217 (termid_array ? termid_array + no : 0),
1218 (hits_array ? hits_array + no : 0),
1219 (approx_array ? approx_array + no : 0));
1223 termid_array[no] = rset->term;
1225 hits_array[no] = rset->hits_count;
1227 approx_array[no] = rset->hits_approx;
1229 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1230 " count=" ZINT_FORMAT,
1231 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1238 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1241 ZebraSet sset = resultSetGet(zh, setname);
1245 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1251 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1252 int no, zint *count, int *approx,
1253 char *termbuf, size_t *termlen,
1254 const char **term_ref_id)
1256 ZebraSet sset = resultSetGet(zh, setname);
1259 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1260 if (no >= 0 && no < num_terms)
1262 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1263 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1264 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1266 trav_rset_for_termids(sset->rset, term_array,
1267 hits_array, approx_array);
1270 *count = hits_array[no];
1272 *approx = approx_array[no];
1275 char *inbuf = term_array[no]->name;
1276 size_t inleft = strlen(inbuf);
1277 size_t outleft = *termlen - 1;
1279 if (zh->iconv_from_utf8 != 0)
1281 char *outbuf = termbuf;
1284 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1286 if (ret == (size_t)(-1))
1290 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1292 *termlen = outbuf - termbuf;
1297 if (inleft > outleft)
1300 memcpy(termbuf, inbuf, *termlen);
1302 termbuf[*termlen] = '\0';
1305 *term_ref_id = term_array[no]->ref_id;
1309 xfree(approx_array);
1316 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1317 zint sysno, zebra_snippets *snippets)
1319 ZebraSet sset = resultSetGet(zh, setname);
1320 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1326 struct rset_key_control *kc = zebra_key_control_create(zh);
1327 NMEM nmem = nmem_create();
1329 RSET rsets[2], rset_comb;
1330 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1331 res_get(zh->res, "setTmpDir"),0 );
1334 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1341 rset_write(rsfd, &key);
1344 rsets[0] = rset_temp;
1345 rsets[1] = rset_dup(sset->rset);
1347 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1349 rsfd = rset_open(rset_comb, RSETF_READ);
1351 while (rset_read(rsfd, &key, &termid))
1355 struct ord_list *ol;
1356 for (ol = termid->ol; ol; ol = ol->next)
1358 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1359 ol->ord, termid->name);
1365 rset_delete(rset_comb);
1372 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1373 const char **basenames, int num_bases,
1375 zint *sysnos, int *no_sysnos)
1377 ZEBRA_RES res = ZEBRA_OK;
1378 int sysnos_offset = 0;
1381 if (!zh->reg->isamb || !zh->m_segment_indexing)
1383 if (sysnos_offset < *no_sysnos)
1389 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1391 const char *database = basenames[i];
1392 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1394 const char *index_type = "w";
1395 const char *use_string = "_ALLRECORDS";
1397 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1398 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1399 index_type, use_string);
1403 int ord_len = key_SU_encode(ord, ord_buf);
1406 ord_buf[ord_len] = '\0';
1408 info = dict_lookup(zh->reg->dict, ord_buf);
1411 if (*info != sizeof(ISAM_P))
1419 struct it_key key_until, key_found;
1423 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1425 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1430 key_until.mem[i++] = recid;
1431 key_until.mem[i++] = 0; /* section_id */
1432 if (zh->m_segment_indexing)
1433 key_until.mem[i++] = 0; /* segment */
1434 key_until.mem[i++] = 0;
1437 r = isamb_pp_forward(pt, &key_found, &key_until);
1438 while (r && key_found.mem[0] == recid)
1440 if (sysnos_offset < *no_sysnos)
1441 sysnos[sysnos_offset++] =
1442 key_found.mem[key_found.len-1];
1443 r = isamb_pp_read(pt, &key_found);
1453 *no_sysnos = sysnos_offset;
1457 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1458 const char *setname,
1460 zint *sysnos, int *no_sysnos)
1462 const char **basenames;
1466 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1467 if (res != ZEBRA_OK)
1470 return zebra_recid_to_sysno(zh, basenames, num_bases,
1471 recid, sysnos, no_sysnos);
1474 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1481 yaz_log(YLOG_DEBUG, "count_set");
1483 rset->hits_limit = approx_limit;
1486 rfd = rset_open(rset, RSETF_READ);
1487 while (rset_read(rfd, &key,0 /* never mind terms */))
1489 if (key.mem[0] != psysno)
1491 psysno = key.mem[0];
1492 if (rfd->counted_items >= rset->hits_limit)
1497 *count = rset->hits_count;
1504 * indent-tabs-mode: nil
1506 * vim: shiftwidth=4 tabstop=8 expandtab