X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=index%2Fzsets.c;h=f77258b66475da4bd51227507a7f6dcbc003e769;hb=77686142af94172d1887190ebd47aeb53f704057;hp=c6a03c25f63e559a45054115ef7e0bb6176be9b0;hpb=eb2b742588ce07fb4516bbca22c93b938b13e433;p=idzebra-moved-to-github.git diff --git a/index/zsets.c b/index/zsets.c index c6a03c2..f77258b 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,10 +1,26 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.25 2000-03-15 15:00:31 adam + * Revision 1.30 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.29 2001/01/22 10:42:56 adam + * Added numerical sort. + * + * Revision 1.28 2000/07/07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.27 2000/04/05 09:49:36 adam + * On Unix, zebra/z'mbol uses automake. + * + * Revision 1.26 2000/03/20 19:08:36 adam + * Added remote record import using Z39.50 extended services and Segment + * Requests. + * + * Revision 1.25 2000/03/15 15:00:31 adam * First work on threaded version. * * Revision 1.24 1999/11/04 15:00:45 adam @@ -106,6 +122,13 @@ #define SORT_IDX_ENTRYSIZE 64 #define ZSET_SORT_MAX_LEVEL 3 +struct zebra_set_term_entry { + int reg_type; + char *db; + int set; + int use; + char *term; +}; struct zebra_set { char *name; RSET rset; @@ -115,7 +138,10 @@ struct zebra_set { char **basenames; Z_RPNQuery *rpn; struct zset_sort_info *sort_info; + struct zebra_set_term_entry *term_entries; + int term_entries_max; struct zebra_set *next; + int locked; }; struct zset_sort_entry { @@ -144,6 +170,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zebraSet = resultSetAdd (zh, setname, 1); if (!zebraSet) return 0; + zebraSet->locked = 1; zebraSet->rpn = 0; zebraSet->num_bases = num_bases; zebraSet->basenames = basenames; @@ -156,9 +183,37 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zh->hits = zebraSet->hits; if (zebraSet->rset) zebraSet->rpn = rpn; + zebraSet->locked = 0; return zebraSet; } +void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, + const char *db, int set, + int use, const char *term) +{ + if (!s->nmem) + s->nmem = nmem_create (); + if (!s->term_entries) + { + int i; + s->term_entries_max = 1000; + s->term_entries = + nmem_malloc (s->nmem, s->term_entries_max * + sizeof(*s->term_entries)); + for (i = 0; i < s->term_entries_max; i++) + s->term_entries[i].term = 0; + } + if (s->hits < s->term_entries_max) + { + s->term_entries[s->hits].reg_type = reg_type; + s->term_entries[s->hits].db = nmem_strdup (s->nmem, db); + s->term_entries[s->hits].set = set; + s->term_entries[s->hits].use = use; + s->term_entries[s->hits].term = nmem_strdup (s->nmem, term); + } + (s->hits)++; +} + ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) { ZebraSet s; @@ -170,7 +225,7 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) if (s) { logf (LOG_DEBUG, "updating result set %s", name); - if (!ov) + if (!ov || s->locked) return NULL; if (s->rset) rset_delete (s->rset); @@ -198,8 +253,12 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) for (i = 0; i < s->sort_info->max_entries; i++) s->sort_info->entries[i] = s->sort_info->all_entries + i; } + s->locked = 0; + s->term_entries = 0; + s->hits = 0; s->rset = 0; - s->nmem = 0; + s->nmem = 0; + s->rpn = 0; return s; } @@ -210,14 +269,13 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name) for (s = zh->sets; s; s = s->next) if (!strcmp (s->name, name)) { - if (!s->rset && s->rpn) + if (!s->term_entries && !s->rset && s->rpn) { NMEM nmem = nmem_create (); s->rset = rpn_search (zh, nmem, s->rpn, s->num_bases, s->basenames, s->name, s); nmem_destroy (nmem); - } return s; } @@ -259,7 +317,8 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) if (s->nmem) nmem_destroy (s->nmem); - rset_delete (s->rset); + if (s->rset) + rset_delete (s->rset); xfree (s->name); xfree (s); } @@ -272,7 +331,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, int num, int *positions) { ZebraSet sset; - ZebraPosSet sr; + ZebraPosSet sr = 0; RSET rset; int i; struct zset_sort_info *sort_info; @@ -280,75 +339,100 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, if (!(sset = resultSetGet (zh, name))) return NULL; if (!(rset = sset->rset)) - return NULL; - sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); - for (i = 0; isort_info; - if (sort_info) { - int position; - + if (!sset->term_entries) + return 0; + sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); for (i = 0; i 0 && position <= sort_info->num_entries) + int j; + struct zebra_set_term_entry *entry = sset->term_entries; + + sr[i].sysno = 0; + sr[i].score = -1; + sr[i].term = 0; + sr[i].db = 0; + + if (positions[i] <= sset->term_entries_max) { - logf (LOG_DEBUG, "got pos=%d (sorted)", position); - sr[i].sysno = sort_info->entries[position-1]->sysno; - sr[i].score = sort_info->entries[position-1]->score; + sr[i].term = sset->term_entries[positions[i]-1].term; + sr[i].db = sset->term_entries[positions[i]-1].db; } } } - /* did we really get all entries using sort ? */ - for (i = 0; isort_info; if (sort_info) - position = sort_info->num_entries; - while (num_i < num && positions[num_i] < position) - num_i++; - rfd = rset_open (rset, RSETF_READ); - while (num_i < num && rset_read (rset, rfd, &key, &term_index)) { - if (key.sysno != psysno) + int position; + + for (i = 0; i 0 && position <= sort_info->num_entries) { - /* determine we alreay have this in our set */ - for (i = sort_info->num_entries; --i >= 0; ) - if (psysno == sort_info->entries[i]->sysno) - break; - if (i >= 0) - continue; + logf (LOG_DEBUG, "got pos=%d (sorted)", position); + sr[i].sysno = sort_info->entries[position-1]->sysno; + sr[i].score = sort_info->entries[position-1]->score; } - position++; - assert (num_i < num); - if (position == positions[num_i]) + } + } + /* did we really get all entries using sort ? */ + for (i = 0; inum_entries; + while (num_i < num && positions[num_i] < position) + num_i++; + rfd = rset_open (rset, RSETF_READ); + while (num_i < num && rset_read (rset, rfd, &key, &term_index)) + { + if (key.sysno != psysno) { - sr[num_i].sysno = psysno; - logf (LOG_DEBUG, "got pos=%d (unsorted)", position); - sr[num_i].score = -1; - num_i++; + psysno = key.sysno; + if (sort_info) + { + /* determine we alreay have this in our set */ + for (i = sort_info->num_entries; --i >= 0; ) + if (psysno == sort_info->entries[i]->sysno) + break; + if (i >= 0) + continue; + } + position++; + assert (num_i < num); + if (position == positions[num_i]) + { + sr[num_i].sysno = psysno; + logf (LOG_DEBUG, "got pos=%d (unsorted)", position); + sr[num_i].score = -1; + num_i++; + } } } + rset_close (rset, rfd); } - rset_close (rset, rfd); } return sr; } @@ -358,13 +442,14 @@ void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num) xfree (records); } -struct sortKey { +struct sortKeyInfo { int relation; int attrUse; + int numerical; }; void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, - struct sortKey *criteria, int num_criteria, + struct sortKeyInfo *criteria, int num_criteria, int sysno) { struct zset_sort_entry this_entry; @@ -384,8 +469,21 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, int rel = 0; for (j = 0; jentries[i]->buf[j], + if (criteria[j].numerical) + { + double diff = atof(this_entry.buf[j]) - + atof(sort_info->entries[i]->buf[j]); + rel = 0; + if (diff > 0.0) + rel = 1; + else if (diff < 0.0) + rel = -1; + } + else + { + rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j], SORT_IDX_ENTRYSIZE); + } if (rel) break; } @@ -402,20 +500,23 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; for (i = 0; ibuf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE); new_entry->sysno = sysno; @@ -446,20 +547,24 @@ void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; + new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; new_entry->sysno = sysno; new_entry->score = score; } @@ -512,7 +617,7 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, { int i, psysno = 0; struct it_key key; - struct sortKey sort_criteria[3]; + struct sortKeyInfo sort_criteria[3]; int num_criteria; int term_index; RSFD rfd; @@ -563,7 +668,8 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1); sort_criteria[i].attrUse = zebra_maps_sort (zh->service->zebra_maps, - sk->u.sortAttributes); + sk->u.sortAttributes, + &sort_criteria[i].numerical); logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse); if (sort_criteria[i].attrUse == -1) { @@ -591,7 +697,6 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, } rset_close (rset, rfd); - zh->errCode = 0; *sort_status = Z_SortStatus_success; logf (LOG_DEBUG, "resultSetSortSingle end"); }