1 /* $Id: rsmultiandor.c,v 1.19 2005-05-24 11:35:43 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 * This module implements the rsmulti_or and rsmulti_and result sets
27 * rsmultior is based on a heap, from which we find the next hit.
29 * rsmultiand is based on a simple array of rsets, and a linear
30 * search to find the record that exists in all of those rsets.
31 * To speed things up, the array is sorted so that the smallest
32 * rsets come first, they are most likely to have the hits furthest
33 * away, and thus forwarding to them makes the most sense.
42 #include <idzebra/util.h>
43 #include <idzebra/isamc.h>
46 static RSFD r_open_and (RSET ct, int flag);
47 static RSFD r_open_or (RSET ct, int flag);
48 static void r_close (RSFD rfd);
49 static void r_delete (RSET ct);
50 static int r_read_and (RSFD rfd, void *buf, TERMID *term);
51 static int r_read_or (RSFD rfd, void *buf, TERMID *term);
52 static int r_write (RSFD rfd, const void *buf);
53 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
54 const void *untilbuf);
55 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
56 const void *untilbuf);
57 static void r_pos (RSFD rfd, double *current, double *total);
58 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
60 static const struct rset_control control_or =
73 static const struct rset_control control_and =
86 /* The heap structure:
87 * The rset contains a list or rsets we are ORing together
88 * The rfd contains a heap of heap-items, which contain
89 * a rfd opened to those rsets, and a buffer for one key.
90 * They also contain a ptr to the rset list in the rset
91 * itself, for practical reasons.
104 const struct rset_key_control *kctrl;
105 struct heap_item **heap; /* ptrs to the rfd */
107 typedef struct heap *HEAP;
110 struct rset_private {
117 struct heap_item *items; /* we alloc and free them here */
118 HEAP h; /* and move around here */
119 zint hits; /* returned so far */
120 int eof; /* seen the end of it */
121 int tailcount; /* how many items are tailing */
125 static int log_level = 0;
126 static int log_level_initialized = 0;
129 /* Heap functions ***********************/
132 static void heap_dump_item( HEAP h, int i, int level)
137 (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
138 yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
139 &(h->heap[i]), h->heap[i]->buf, cur,tot );
140 heap_dump_item(h, 2*i, level+1);
141 heap_dump_item(h, 2*i+1, level+1);
143 static void heap_dump( HEAP h,char *msg) {
144 yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
145 heap_dump_item(h,1,1);
149 static void heap_swap (HEAP h, int x, int y)
151 struct heap_item *swap;
153 h->heap[x] = h->heap[y];
157 static int heap_cmp(HEAP h, int x, int y)
159 return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf);
162 static int heap_empty(HEAP h)
164 return ( 0==h->heapnum );
167 static void heap_delete (HEAP h)
168 { /* deletes the first item in the heap, and balances the rest */
169 int cur = 1, child = 2;
170 h->heap[1] = 0; /* been deleted */
171 heap_swap (h, 1, h->heapnum--);
172 while (child <= h->heapnum) {
173 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
175 if (heap_cmp(h,cur,child) > 0)
177 heap_swap (h, cur, child);
186 static void heap_balance (HEAP h)
187 { /* The heap root element has changed value (to bigger) */
188 /* swap downwards until the heap is ordered again */
189 int cur = 1, child = 2;
190 while (child <= h->heapnum) {
191 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
193 if (heap_cmp(h,cur,child) > 0)
195 heap_swap (h, cur, child);
205 static void heap_insert (HEAP h, struct heap_item *hi)
209 cur = ++(h->heapnum);
210 assert(cur <= h->heapmax);
213 while (parent && (heap_cmp(h,parent,cur) > 0))
216 heap_swap (h, cur, parent);
224 HEAP heap_create (NMEM nmem, int size, const struct rset_key_control *kctrl)
226 HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h));
228 ++size; /* heap array starts at 1 */
232 h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
233 h->heap[0]=0; /* not used */
237 static void heap_clear( HEAP h)
243 static void heap_destroy (HEAP h)
245 /* nothing to delete, all is nmem'd, and will go away in due time */
248 int compare_ands(const void *x, const void *y)
249 { /* used in qsort to get the multi-and args in optimal order */
250 /* that is, those with fewest occurrences first */
251 const struct heap_item *hx = x;
252 const struct heap_item *hy = y;
253 double cur, totx, toty;
254 rset_pos(hx->fd, &cur, &totx);
255 rset_pos(hy->fd, &cur, &toty);
256 if ( totx > toty +0.5 )
258 if ( totx < toty -0.5 )
260 return 0; /* return totx - toty, except for overflows and rounding */
263 /* Creating and deleting rsets ***********************/
265 static RSET rsmulti_andor_create(NMEM nmem,
266 struct rset_key_control *kcontrol,
267 int scope, TERMID termid,
268 int no_rsets, RSET* rsets,
269 const struct rset_control *ctrl)
271 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
273 struct rset_private *info;
274 if (!log_level_initialized)
276 log_level = yaz_log_module_level("rsmultiandor");
277 log_level_initialized = 1;
279 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
280 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
285 RSET rsmulti_or_create(NMEM nmem, struct rset_key_control *kcontrol,
286 int scope, TERMID termid, int no_rsets, RSET* rsets)
288 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
289 no_rsets, rsets, &control_or);
292 RSET rsmulti_and_create(NMEM nmem, struct rset_key_control *kcontrol,
293 int scope, int no_rsets, RSET* rsets)
295 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
296 no_rsets, rsets, &control_and);
299 static void r_delete (RSET ct)
303 /* Opening and closing fd's on them *********************/
305 static RSFD r_open_andor (RSET ct, int flag, int is_and)
308 struct rfd_private *p;
309 const struct rset_key_control *kctrl = ct->keycontrol;
312 if (flag & RSETF_WRITE)
314 yaz_log (YLOG_FATAL, "multiandor set type is read-only");
317 rfd = rfd_create_base(ct);
319 p = (struct rfd_private *)rfd->priv;
323 /* all other pointers shouls already be allocated, in right sizes! */
326 p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
331 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
333 p->h = heap_create( ct->nmem, ct->no_children, kctrl);
334 p->items = (struct heap_item *)
335 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
336 for (i = 0; i<ct->no_children; i++)
338 p->items[i].rset = ct->children[i];
339 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
347 { /* read the array and sort it */
348 for (i = 0; i<ct->no_children; i++){
349 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
350 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
354 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
356 { /* fill the heap for ORing */
357 for (i = 0; i<ct->no_children; i++){
358 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
359 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
360 heap_insert(p->h, &(p->items[i]));
366 static RSFD r_open_or (RSET ct, int flag)
368 return r_open_andor(ct, flag, 0);
371 static RSFD r_open_and (RSET ct, int flag)
373 return r_open_andor(ct, flag, 1);
377 static void r_close (RSFD rfd)
379 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
384 for (i = 0; i<rfd->rset->no_children; i++)
386 rset_close(p->items[i].fd);
391 static int r_forward_or(RSFD rfd, void *buf,
392 TERMID *term, const void *untilbuf)
393 { /* while heap head behind untilbuf, forward it and rebalance heap */
394 struct rfd_private *p = rfd->priv;
395 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
396 if (heap_empty(p->h))
398 while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
400 if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
401 &p->h->heap[1]->term, untilbuf))
406 if (heap_empty(p->h))
411 return r_read_or(rfd, buf, term);
415 static int r_read_or (RSFD rfd, void *buf, TERMID *term)
417 RSET rset = rfd->rset;
418 struct rfd_private *mrfd = rfd->priv;
419 const struct rset_key_control *kctrl = rset->keycontrol;
420 struct heap_item *it;
422 if (heap_empty(mrfd->h))
424 it = mrfd->h->heap[1];
425 memcpy(buf, it->buf, kctrl->key_size);
435 rdres = rset_read(it->fd, it->buf, &it->term);
437 heap_balance(mrfd->h);
439 heap_delete(mrfd->h);
444 static int r_read_and (RSFD rfd, void *buf, TERMID *term)
445 { /* Has to return all hits where each item points to the */
446 /* same sysno (scope), in order. Keep an extra key (hitkey) */
447 /* as long as all records do not point to hitkey, forward */
448 /* them, and update hitkey to be the highest seen so far. */
449 /* (if any item eof's, mark eof, and return 0 thereafter) */
450 /* Once a hit has been found, scan all items for the smallest */
451 /* value. Mark all as being in the tail. Read next from that */
452 /* item, and if not in the same record, clear its tail bit */
453 struct rfd_private *p = rfd->priv;
455 const struct rset_key_control *kctrl = ct->keycontrol;
461 { /* we are tailing, find lowest tail and return it */
463 while ((mintail<ct->no_children) && !p->tailbits[mintail])
464 mintail++; /* first tail */
465 for (i = mintail+1; i<ct->no_children; i++)
469 cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf);
474 /* return the lowest tail */
475 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
477 *term = p->items[mintail].term;
478 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
479 &p->items[mintail].term))
481 p->eof = 1; /* game over, once tails have been returned */
482 p->tailbits[mintail] = 0;
488 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
489 if (cmp >= rfd->rset->scope){
490 p->tailbits[mintail] = 0;
496 /* not tailing, forward until all reocrds match, and set up */
497 /* as tails. the earlier 'if' will then return the hits */
499 return 0; /* nothing more to see */
500 i = 1; /* assume items[0] is highest up */
501 while (i<ct->no_children) {
502 cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
503 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
504 if (!rset_forward(p->items[0].fd, p->items[0].buf,
505 &p->items[0].term, p->items[i].buf))
507 p->eof = 1; /* game over */
510 i = 0; /* start frowarding from scratch */
511 } else if (cmp>=rfd->rset->scope)
512 { /* [0] was ahead, forward i */
513 if (!rset_forward(p->items[i].fd, p->items[i].buf,
514 &p->items[i].term, p->items[0].buf))
516 p->eof = 1; /* game over */
522 /* if we get this far, all rsets are now within +- scope of [0] */
523 /* ergo, we have a hit. Mark them all as tailing, and let the */
524 /* upper 'if' return the hits in right order */
525 for (i = 0; i<ct->no_children; i++)
527 p->tailcount = ct->no_children;
532 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
533 const void *untilbuf)
535 struct rfd_private *p = rfd->priv;
537 const struct rset_key_control *kctrl = ct->keycontrol;
542 for (i = 0; i<ct->no_children; i++)
544 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
545 if (cmp <= -rfd->rset->scope)
547 killtail = 1; /* we are moving to a different hit */
548 if (!rset_forward(p->items[i].fd, p->items[i].buf,
549 &p->items[i].term, untilbuf))
551 p->eof = 1; /* game over */
559 for (i = 0; i<ct->no_children; i++)
563 return r_read_and(rfd,buf,term);
566 static void r_pos (RSFD rfd, double *current, double *total)
569 struct rfd_private *mrfd =
570 (struct rfd_private *)(rfd->priv);
572 double scur = 0.0, stot = 0.0;
574 for (i = 0; i<ct->no_children; i++){
575 rset_pos(mrfd->items[i].fd, &cur, &tot);
576 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
580 if (stot < 1.0) { /* nothing there */
583 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
587 *current = (double) (mrfd->hits);
588 *total = *current*stot/scur;
589 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
593 static int r_write (RSFD rfd, const void *buf)
595 yaz_log (YLOG_FATAL, "multior set type is read-only");
599 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
602 rset_get_one_term(ct, terms, maxterms, curterm);
605 /* Special case: Some multi-ors have all terms pointing to the same
606 term. We do not want to duplicate those. Other multiors (and ands)
607 have different terms under them. Those we want.
609 int firstterm= *curterm;
612 for (i = 0; i<ct->no_children; i++)
614 rset_getterms(ct->children[i], terms, maxterms, curterm);
615 if ( ( *curterm > firstterm+1 ) &&
616 ( *curterm <= maxterms ) &&
617 ( terms[(*curterm)-1] == terms[firstterm] )
619 (*curterm)--; /* forget the term, seen that before */