1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <yaz/diagbib1.h>
37 #define ZSET_SORT_MAX_LEVEL 10
39 struct zebra_set_term_entry {
50 NMEM rset_nmem; /* for creating the rsets in */
53 const char **basenames;
55 Z_SortKeySpecList *sortSpec;
56 struct zset_sort_info *sort_info;
57 struct zebra_set_term_entry *term_entries;
59 struct zebra_set *next;
61 int estimated_hit_count;
63 zint cache_position; /* last position */
64 RSFD cache_rfd; /* rfd (NULL if not existing) */
65 zint cache_psysno; /* sysno for last position */
66 zint approx_limit; /* limit before we do approx */
69 struct zset_sort_entry {
74 struct zset_sort_info {
77 struct zset_sort_entry *all_entries;
78 struct zset_sort_entry **entries;
81 static int log_level_set=0;
82 static int log_level_sort=0;
83 static int log_level_searchhits=0;
84 static int log_level_searchterms=0;
85 static int log_level_resultsets=0;
87 static void loglevels(void)
91 log_level_sort = yaz_log_module_level("sorting");
92 log_level_searchhits = yaz_log_module_level("searchhits");
93 log_level_searchterms = yaz_log_module_level("searchterms");
94 log_level_resultsets = yaz_log_module_level("resultsets");
99 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
100 Z_RPNQuery *rpn, ZebraSet sset)
103 Z_SortKeySpecList *sort_sequence;
105 ZEBRA_RES res = ZEBRA_OK;
107 sort_sequence = (Z_SortKeySpecList *)
108 nmem_malloc(nmem, sizeof(*sort_sequence));
109 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
110 sort_sequence->specs = (Z_SortKeySpec **)
111 nmem_malloc(nmem, sort_sequence->num_specs *
112 sizeof(*sort_sequence->specs));
113 for (i = 0; i<sort_sequence->num_specs; i++)
114 sort_sequence->specs[i] = 0;
116 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
118 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
122 sset->num_bases, sset->basenames,
129 for (i = 0; sort_sequence->specs[i]; i++)
131 sort_sequence->num_specs = i;
132 rset_set_hits_limit(rset, sset->approx_limit);
136 res = resultSetRank(zh, sset, rset, rset_nmem);
140 res = resultSetSortSingle(zh, nmem, sset, rset,
141 sort_sequence, &sort_status);
148 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
149 int num_bases, char **basenames,
151 zint *hits, int *estimated_hit_count)
158 *estimated_hit_count = 0;
160 zebraSet = resultSetAdd(zh, setname, 1);
163 zebraSet->locked = 1;
166 zebraSet->rset_nmem = nmem_create();
168 zebraSet->num_bases = num_bases;
169 zebraSet->basenames =
170 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
171 for (i = 0; i<num_bases; i++)
172 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
174 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
176 *hits = zebraSet->hits;
177 if (zebraSet->estimated_hit_count)
178 *estimated_hit_count = 1;
182 zebraSet->locked = 0;
188 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
189 const char *db, const char *index_name,
192 assert(zh); /* compiler shut up */
194 s->nmem = nmem_create();
195 if (!s->term_entries)
198 s->term_entries_max = 1000;
200 nmem_malloc(s->nmem, s->term_entries_max *
201 sizeof(*s->term_entries));
202 for (i = 0; i < s->term_entries_max; i++)
203 s->term_entries[i].term = 0;
205 if (s->hits < s->term_entries_max)
207 s->term_entries[s->hits].reg_type = reg_type;
208 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
209 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
210 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
215 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
220 for (s = zh->sets; s; s = s->next)
221 if (!strcmp(s->name, name))
228 yaz_log(log_level_resultsets, "updating result set %s", name);
229 if (!ov || s->locked)
234 rset_close(s->cache_rfd);
235 rset_delete(s->rset);
238 nmem_destroy(s->rset_nmem);
240 nmem_destroy(s->nmem);
244 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
246 yaz_log(log_level_resultsets, "adding result set %s", name);
247 s = (ZebraSet) xmalloc(sizeof(*s));
250 s->name = xstrdup(name);
252 s->sort_info = (struct zset_sort_info *)
253 xmalloc(sizeof(*s->sort_info));
254 s->sort_info->max_entries = atoi(sort_max_str);
255 if (s->sort_info->max_entries < 2)
256 s->sort_info->max_entries = 2;
258 s->sort_info->entries = (struct zset_sort_entry **)
259 xmalloc(sizeof(*s->sort_info->entries) *
260 s->sort_info->max_entries);
261 s->sort_info->all_entries = (struct zset_sort_entry *)
262 xmalloc(sizeof(*s->sort_info->all_entries) *
263 s->sort_info->max_entries);
264 for (i = 0; i < s->sort_info->max_entries; i++)
265 s->sort_info->entries[i] = s->sort_info->all_entries + i;
275 s->cache_position = 0;
277 s->approx_limit = zh->approx_limit;
278 s->estimated_hit_count = 0;
282 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
286 for (s = zh->sets; s; s = s->next)
287 if (!strcmp(s->name, name))
289 if (!s->term_entries && !s->rset && s->rpn)
291 NMEM nmem = nmem_create();
292 yaz_log(log_level_resultsets, "research %s", name);
294 s->rset_nmem = nmem_create();
295 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
296 if (s->rset && s->sortSpec)
299 yaz_log(log_level_resultsets, "resort %s", name);
300 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
310 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
311 const char ***basenames, int *num_bases)
313 ZebraSet sset = resultSetGet(zh, setname);
316 *basenames = sset->basenames;
317 *num_bases = sset->num_bases;
322 void resultSetInvalidate(ZebraHandle zh)
324 ZebraSet s = zh->sets;
326 yaz_log(log_level_resultsets, "invalidating result sets");
327 for (; s; s = s->next)
332 rset_close(s->cache_rfd);
333 rset_delete(s->rset);
337 s->cache_position = 0;
339 nmem_destroy(s->rset_nmem);
344 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
346 ZebraSet * ss = &zh->sets;
350 for (i = 0; i<num; i++)
351 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
358 for (i = 0; i<num; i++)
359 if (!strcmp(s->name, names[i]))
362 statuses[i] = Z_DeleteStatus_success;
371 xfree(s->sort_info->all_entries);
372 xfree(s->sort_info->entries);
376 nmem_destroy(s->nmem);
380 rset_close(s->cache_rfd);
381 rset_delete(s->rset);
384 nmem_destroy(s->rset_nmem);
393 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
398 zint *pos = pos_small;
402 if (num > 10000 || num <= 0)
406 pos = xmalloc(sizeof(*pos) * num);
408 for (i = 0; i<num; i++)
411 mr = zebra_meta_records_create(zh, name, num, pos);
418 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
419 int num, zint *positions)
422 ZebraMetaRecord *sr = 0;
425 struct zset_sort_info *sort_info;
426 size_t sysno_mem_index = 0;
428 if (zh->m_staticrank)
433 if (!(sset = resultSetGet(zh, name)))
435 if (!(rset = sset->rset))
437 if (!sset->term_entries)
439 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
440 for (i = 0; i<num; i++)
447 if (positions[i] <= sset->term_entries_max)
449 sr[i].term = sset->term_entries[positions[i]-1].term;
450 sr[i].db = sset->term_entries[positions[i]-1].db;
456 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
457 for (i = 0; i<num; i++)
464 sort_info = sset->sort_info;
469 for (i = 0; i<num; i++)
471 position = positions[i];
472 if (position > 0 && position <= sort_info->num_entries)
474 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
475 " (sorted)", position);
476 sr[i].sysno = sort_info->entries[position-1]->sysno;
477 sr[i].score = sort_info->entries[position-1]->score;
481 /* did we really get all entries using sort ? */
482 for (i = 0; i<num; i++)
487 if (i < num) /* nope, get the rest, unsorted - sorry */
496 position = sort_info->num_entries;
497 while (num_i < num && positions[num_i] <= position)
500 if (sset->cache_rfd &&
501 num_i < num && positions[num_i] > sset->cache_position)
503 position = sset->cache_position;
504 rfd = sset->cache_rfd;
505 psysno = sset->cache_psysno;
510 rset_close(sset->cache_rfd);
511 rfd = rset_open(rset, RSETF_READ);
513 while (num_i < num && rset_read(rfd, &key, 0))
515 zint this_sys = key.mem[sysno_mem_index];
516 if (this_sys != psysno)
521 /* determine we alreay have this in our set */
522 for (i = sort_info->num_entries; --i >= 0; )
523 if (psysno == sort_info->entries[i]->sysno)
530 if (position == positions[num_i])
532 sr[num_i].sysno = psysno;
533 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
534 sr[num_i].score = -1;
539 sset->cache_position = position;
540 sset->cache_psysno = psysno;
541 sset->cache_rfd = rfd;
547 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
550 assert(zh); /* compiler shut up about unused arg */
556 int *ord; /* array of ord for each database searched */
557 int *numerical; /* array of ord for each database searched */
558 const char *index_type;
561 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
563 struct sortKeyInfo *criteria, int num_criteria,
565 char *cmp_buf[], char *tmp_cmp_buf[])
567 struct zset_sort_entry *new_entry = NULL;
568 struct zset_sort_info *sort_info = sset->sort_info;
570 WRBUF w = wrbuf_alloc();
572 zebra_sort_sysno(zh->reg->sort_index, sysno);
573 for (i = 0; i<num_criteria; i++)
575 char *this_entry_buf = tmp_cmp_buf[i];
576 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
578 if (criteria[i].ord[database_no] != -1)
580 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
581 criteria[i].ord[database_no]);
582 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
584 if (zebra_sort_read(zh->reg->sort_index, 0, w))
586 /* consider each sort entry and take lowest/highest one
587 of the one as sorting key depending on whether sort is
588 ascending/descending */
590 while (off != wrbuf_len(w))
592 size_t l = strlen(wrbuf_buf(w)+off);
593 assert(off < wrbuf_len(w));
595 if (l >= SORT_IDX_ENTRYSIZE)
596 l = SORT_IDX_ENTRYSIZE-1;
598 || (criteria[i].relation == 'A'
599 && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
600 || (criteria[i].relation == 'D'
601 && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
604 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
605 this_entry_buf[l] = '\0';
607 off += 1 + strlen(wrbuf_buf(w)+off);
613 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
617 i = sort_info->num_entries;
621 for (j = 0; j<num_criteria; j++)
623 char *this_entry_buf = tmp_cmp_buf[j];
624 char *other_entry_buf =
625 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
626 if (criteria[j].numerical[database_no])
628 char this_entry_org[1024];
629 char other_entry_org[1024];
631 const char *index_type = criteria[j].index_type;
632 zebra_term_untrans(zh, index_type, this_entry_org,
634 zebra_term_untrans(zh, index_type, other_entry_org,
636 diff = atof(this_entry_org) - atof(other_entry_org);
647 rel = memcmp(this_entry_buf, other_entry_buf,
650 /* when the compare is equal, continue to next criteria,
657 if (criteria[j].relation == 'A')
662 else if (criteria[j].relation == 'D')
669 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
670 j = sort_info->max_entries;
672 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
676 if (sort_info->num_entries == j)
679 j = (sort_info->num_entries)++;
680 new_entry = sort_info->entries[j];
681 /* move up all higher entries (to make room) */
685 for (k = 0; k<num_criteria; k++)
687 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
688 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
689 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
691 sort_info->entries[j] = sort_info->entries[j-1];
694 /* and insert the new entry at the correct place */
695 sort_info->entries[i] = new_entry;
697 /* and add this to the compare buffer */
698 for (i = 0; i<num_criteria; i++)
700 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
701 char *this_entry_buf = tmp_cmp_buf[i];
702 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
704 new_entry->sysno = sysno;
705 new_entry->score = -1;
708 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
709 zint sysno, int score, int relation)
711 struct zset_sort_entry *new_entry = NULL;
713 assert(zh); /* compiler shut up about unused arg */
715 i = sort_info->num_entries;
720 rel = score - sort_info->entries[i]->score;
727 else if (relation == 'A')
734 j = sort_info->max_entries;
738 if (sort_info->num_entries == j)
741 j = (sort_info->num_entries)++;
743 new_entry = sort_info->entries[j];
746 sort_info->entries[j] = sort_info->entries[j-1];
749 sort_info->entries[i] = new_entry;
751 new_entry->sysno = sysno;
752 new_entry->score = score;
755 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
758 ODR encode = odr_createmem(ODR_ENCODE);
759 ODR decode = odr_createmem(ODR_DECODE);
761 if (z_RPNQuery(encode, &src, 0, 0))
764 char *buf = odr_getbuf(encode, &len, 0);
768 odr_setbuf(decode, buf, len, 0);
769 z_RPNQuery(decode, &dst, 0, 0);
772 nmem_transfer(nmem, decode->mem);
778 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
780 Z_SortKeySpecList *dst = 0;
781 ODR encode = odr_createmem(ODR_ENCODE);
782 ODR decode = odr_createmem(ODR_DECODE);
784 if (z_SortKeySpecList(encode, &src, 0, 0))
787 char *buf = odr_getbuf(encode, &len, 0);
791 odr_setbuf(decode, buf, len, 0);
792 z_SortKeySpecList(decode, &dst, 0, 0);
795 nmem_transfer(nmem, decode->mem);
801 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
807 nset = resultSetAdd(zh, setname, 1);
811 nset->nmem = nmem_create();
813 nset->num_bases = rset->num_bases;
815 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
816 for (i = 0; i<rset->num_bases; i++)
817 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
820 nset->rset = rset_dup(rset->rset);
822 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
826 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
827 int num_input_setnames, const char **input_setnames,
828 const char *output_setname,
829 Z_SortKeySpecList *sort_sequence, int *sort_status)
834 if (num_input_setnames == 0)
836 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
839 if (num_input_setnames > 1)
841 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
846 yaz_log(log_level_sort, "result set sort input=%s output=%s",
847 *input_setnames, output_setname);
848 sset = resultSetGet(zh, input_setnames[0]);
851 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
855 if (!(rset = sset->rset))
857 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
861 if (strcmp(output_setname, input_setnames[0]))
862 sset = resultSetClone(zh, output_setname, sset);
863 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
864 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
868 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
869 ZebraSet sset, RSET rset,
870 Z_SortKeySpecList *sort_sequence,
879 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
880 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
881 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
887 size_t sysno_mem_index = 0;
889 int numbases = zh->num_basenames;
890 yaz_log(log_level_sort, "searching %d databases",numbases);
892 if (zh->m_staticrank)
895 assert(nmem); /* compiler shut up about unused param */
896 sset->sort_info->num_entries = 0;
898 rset_getterms(rset, 0, 0, &n);
899 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
900 rset_getterms(rset, terms, n, &numTerms);
903 num_criteria = sort_sequence->num_specs;
904 if (num_criteria > ZSET_SORT_MAX_LEVEL)
905 num_criteria = ZSET_SORT_MAX_LEVEL;
906 /* set up the search criteria */
907 for (i = 0; i < num_criteria; i++)
909 Z_SortKeySpec *sks = sort_sequence->specs[i];
912 sort_criteria[i].ord = (int *)
913 nmem_malloc(nmem, sizeof(int)*numbases);
914 sort_criteria[i].numerical = (int *)
915 nmem_malloc(nmem, sizeof(int)*numbases);
917 /* initialize ord and numerical for each database */
918 for (ib = 0; ib < numbases; ib++)
920 sort_criteria[i].ord[ib] = -1;
921 sort_criteria[i].numerical[ib] = 0;
924 if (sks->which == Z_SortKeySpec_missingValueData)
926 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
929 if (*sks->sortRelation == Z_SortKeySpec_ascending)
930 sort_criteria[i].relation = 'A';
931 else if (*sks->sortRelation == Z_SortKeySpec_descending)
932 sort_criteria[i].relation = 'D';
935 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
938 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
940 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
943 else if (sks->sortElement->which != Z_SortElement_generic)
945 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
948 sk = sks->sortElement->u.generic;
951 case Z_SortKey_sortField:
952 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
953 for (ib = 0; ib < numbases; ib++)
955 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
956 sort_criteria[i].numerical[ib] = 0;
957 sort_criteria[i].ord[ib] =
958 zebraExplain_lookup_attr_str(zh->reg->zei,
959 zinfo_index_category_sort,
961 if (sks->which != Z_SortKeySpec_null
962 && sort_criteria[i].ord[ib] == -1)
965 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
970 case Z_SortKey_elementSpec:
971 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
972 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
974 case Z_SortKey_sortAttributes:
975 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
976 /* for every database we searched, get the sort index file
977 id (ord) and its numerical indication and store them in
979 for (ib = 0; ib < numbases; ib++)
981 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
982 if (zebra_sort_get_ord(zh, sk->u.sortAttributes,
983 &sort_criteria[i].ord[ib],
984 &sort_criteria[i].numerical[ib]) !=
985 ZEBRA_OK && sks->which != Z_SortKeySpec_null)
990 /* right now we look up the index type based on the first database
991 if the index_type's can differ between the indexes of different
992 databases (which i guess they can?) then we have to store the
993 index types for each database, just like the ord and numerical */
994 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
995 &sort_criteria[i].index_type,
998 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1002 /* allocate space for each cmpare buf + one extra for tmp comparison */
1003 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1004 all other result entries to compare against. This is slowly filled when records are processed.
1005 tmp_cmp_buf is an array with a value of the current record for each criteria
1007 for (i = 0; i<num_criteria; i++)
1009 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1010 * SORT_IDX_ENTRYSIZE);
1011 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1013 rfd = rset_open(rset, RSETF_READ);
1014 while (rset_read(rfd, &key, &termid))
1016 zint this_sys = key.mem[sysno_mem_index];
1017 if (log_level_searchhits)
1018 key_logdump_txt(log_level_searchhits, &key, termid->name);
1020 if (this_sys != psysno)
1022 int database_no = 0;
1023 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1025 if (zh->break_handler_func(zh->break_handler_data))
1027 rset_set_hits_limit(rset, 0);
1034 /* determine database from the term, but only bother if more than
1035 one database is in use*/
1036 if (numbases > 1 && termid->ol)
1038 const char *this_db = 0;
1039 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1042 for (ib = 0; ib < numbases; ib++)
1043 if (!strcmp(this_db, zh->basenames[ib]))
1048 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1050 ord_list_print(termid->ol);
1052 resultSetInsertSort(zh, sset, database_no,
1053 sort_criteria, num_criteria, psysno, cmp_buf,
1059 /* free the compare buffers */
1060 for (i = 0; i<num_criteria; i++)
1063 xfree(tmp_cmp_buf[i]);
1066 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1068 for (i = 0; i < numTerms; i++)
1069 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1070 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1071 *sort_status = Z_SortResponse_success;
1075 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1079 if ((s = resultSetGet(zh, resultSetId)))
1084 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1085 RSET rset, NMEM nmem)
1094 ZebraRankClass rank_class;
1095 struct zset_sort_info *sort_info;
1096 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1097 size_t sysno_mem_index = 0;
1099 if (zh->m_staticrank)
1100 sysno_mem_index = 1;
1104 sort_info = zebraSet->sort_info;
1105 sort_info->num_entries = 0;
1107 zebraSet->estimated_hit_count = 0;
1108 rset_getterms(rset, 0, 0, &n);
1109 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1110 rset_getterms(rset, terms, n, &numTerms);
1112 rank_class = zebraRankLookup(zh, rank_handler_name);
1115 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1116 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1121 RSFD rfd = rset_open(rset, RSETF_READ);
1122 struct rank_control *rc = rank_class->control;
1125 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1126 nmem, terms, numTerms);
1127 zint psysno = 0; /* previous doc id / sys no */
1128 zint pstaticrank = 0; /* previous static rank */
1130 while (rset_read(rfd, &key, &termid))
1132 zint this_sys = key.mem[sysno_mem_index];
1134 zint seqno = key.mem[key.len-1];
1136 if (log_level_searchhits)
1137 key_logdump_txt(log_level_searchhits, &key, termid->name);
1138 if (this_sys != psysno)
1139 { /* new record .. */
1140 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1142 if (zh->break_handler_func(zh->break_handler_data))
1144 yaz_log(YLOG_LOG, "Aborted search");
1148 if (rfd->counted_items > rset->hits_limit)
1152 zebraSet->estimated_hit_count = 1;
1156 { /* only if we did have a previous record */
1157 score = (*rc->calc)(handle, psysno, pstaticrank,
1159 /* insert the hit. A=Ascending */
1160 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1164 if (zh->m_staticrank)
1165 pstaticrank = key.mem[0];
1167 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1171 { /* we had - at least - one record */
1172 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1173 /* insert the hit. A=Ascending */
1174 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1177 (*rc->end)(zh->reg, handle);
1180 zebraSet->hits = rset->hits_count;
1182 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1183 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1184 for (i = 0; i < numTerms; i++)
1186 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1188 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1193 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1195 ZebraRankClass p = zh->reg->rank_classes;
1196 while (p && strcmp(p->control->name, name))
1198 if (p && !p->init_flag)
1200 if (p->control->create)
1201 p->class_handle = (*p->control->create)(zh);
1207 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1209 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1210 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1211 memcpy(p->control, ctrl, sizeof(*p->control));
1212 p->control->name = xstrdup(ctrl->name);
1214 p->next = reg->rank_classes;
1215 reg->rank_classes = p;
1218 void zebraRankDestroy(struct zebra_register *reg)
1220 ZebraRankClass p = reg->rank_classes;
1223 ZebraRankClass p_next = p->next;
1224 if (p->init_flag && p->control->destroy)
1225 (*p->control->destroy)(reg, p->class_handle);
1226 xfree(p->control->name);
1231 reg->rank_classes = NULL;
1234 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1235 zint *hits_array, int *approx_array)
1239 for (i = 0; i<rset->no_children; i++)
1240 no += trav_rset_for_termids(rset->children[i],
1241 (termid_array ? termid_array + no : 0),
1242 (hits_array ? hits_array + no : 0),
1243 (approx_array ? approx_array + no : 0));
1247 termid_array[no] = rset->term;
1249 hits_array[no] = rset->hits_count;
1251 approx_array[no] = rset->hits_approx;
1253 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1254 " count=" ZINT_FORMAT,
1255 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1262 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1265 ZebraSet sset = resultSetGet(zh, setname);
1269 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1275 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1276 int no, zint *count, int *approx,
1277 char *termbuf, size_t *termlen,
1278 const char **term_ref_id)
1280 ZebraSet sset = resultSetGet(zh, setname);
1283 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1284 if (no >= 0 && no < num_terms)
1286 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1287 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1288 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1290 trav_rset_for_termids(sset->rset, term_array,
1291 hits_array, approx_array);
1294 *count = hits_array[no];
1296 *approx = approx_array[no];
1299 char *inbuf = term_array[no]->name;
1300 size_t inleft = strlen(inbuf);
1301 size_t outleft = *termlen - 1;
1303 if (zh->iconv_from_utf8 != 0)
1305 char *outbuf = termbuf;
1308 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1310 if (ret == (size_t)(-1))
1314 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1316 *termlen = outbuf - termbuf;
1321 if (inleft > outleft)
1324 memcpy(termbuf, inbuf, *termlen);
1326 termbuf[*termlen] = '\0';
1329 *term_ref_id = term_array[no]->ref_id;
1333 xfree(approx_array);
1340 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1341 zint sysno, zebra_snippets *snippets)
1343 ZebraSet sset = resultSetGet(zh, setname);
1344 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1350 struct rset_key_control *kc = zebra_key_control_create(zh);
1351 NMEM nmem = nmem_create();
1353 RSET rsets[2], rset_comb;
1354 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1355 res_get(zh->res, "setTmpDir"),0 );
1358 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1365 rset_write(rsfd, &key);
1368 rsets[0] = rset_temp;
1369 rsets[1] = rset_dup(sset->rset);
1371 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1373 rsfd = rset_open(rset_comb, RSETF_READ);
1375 while (rset_read(rsfd, &key, &termid))
1379 struct ord_list *ol;
1380 for (ol = termid->ol; ol; ol = ol->next)
1382 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1383 ol->ord, termid->name);
1389 rset_delete(rset_comb);
1396 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1397 const char **basenames, int num_bases,
1399 zint *sysnos, int *no_sysnos)
1401 ZEBRA_RES res = ZEBRA_OK;
1402 int sysnos_offset = 0;
1405 if (!zh->reg->isamb || !zh->m_segment_indexing)
1407 if (sysnos_offset < *no_sysnos)
1413 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1415 const char *database = basenames[i];
1416 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1418 const char *index_type = "w";
1419 const char *use_string = "_ALLRECORDS";
1421 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1422 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1423 index_type, use_string);
1427 int ord_len = key_SU_encode(ord, ord_buf);
1430 ord_buf[ord_len] = '\0';
1432 info = dict_lookup(zh->reg->dict, ord_buf);
1435 if (*info != sizeof(ISAM_P))
1443 struct it_key key_until, key_found;
1447 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1449 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1454 key_until.mem[i++] = recid;
1455 key_until.mem[i++] = 0; /* section_id */
1456 if (zh->m_segment_indexing)
1457 key_until.mem[i++] = 0; /* segment */
1458 key_until.mem[i++] = 0;
1461 r = isamb_pp_forward(pt, &key_found, &key_until);
1462 while (r && key_found.mem[0] == recid)
1464 if (sysnos_offset < *no_sysnos)
1465 sysnos[sysnos_offset++] =
1466 key_found.mem[key_found.len-1];
1467 r = isamb_pp_read(pt, &key_found);
1477 *no_sysnos = sysnos_offset;
1481 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1482 const char *setname,
1484 zint *sysnos, int *no_sysnos)
1486 const char **basenames;
1490 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1491 if (res != ZEBRA_OK)
1494 return zebra_recid_to_sysno(zh, basenames, num_bases,
1495 recid, sysnos, no_sysnos);
1498 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1505 yaz_log(YLOG_DEBUG, "count_set");
1507 rset->hits_limit = approx_limit;
1510 rfd = rset_open(rset, RSETF_READ);
1511 while (rset_read(rfd, &key,0 /* never mind terms */))
1513 if (key.mem[0] != psysno)
1515 psysno = key.mem[0];
1516 if (rfd->counted_items >= rset->hits_limit)
1521 *count = rset->hits_count;
1528 * c-file-style: "Stroustrup"
1529 * indent-tabs-mode: nil
1531 * vim: shiftwidth=4 tabstop=8 expandtab