1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * \file rsmultiandor.c
23 * \brief This module implements the rsmulti_or and rsmulti_and result sets
25 * rsmultior is based on a heap, from which we find the next hit.
27 * rsmultiand is based on a simple array of rsets, and a linear
28 * search to find the record that exists in all of those rsets.
29 * To speed things up, the array is sorted so that the smallest
30 * rsets come first, they are most likely to have the hits furthest
31 * away, and thus forwarding to them makes the most sense.
43 #include <idzebra/util.h>
44 #include <idzebra/isamc.h>
47 static RSFD r_open_and (RSET ct, int flag);
48 static RSFD r_open_or (RSET ct, int flag);
49 static void r_close (RSFD rfd);
50 static void r_delete (RSET ct);
51 static int r_read_and (RSFD rfd, void *buf, TERMID *term);
52 static int r_read_or (RSFD rfd, void *buf, TERMID *term);
53 static int r_write (RSFD rfd, const void *buf);
54 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
55 const void *untilbuf);
56 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
57 const void *untilbuf);
58 static void r_pos_and(RSFD rfd, double *current, double *total);
59 static void r_pos_or(RSFD rfd, double *current, double *total);
60 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
62 static const struct rset_control control_or =
75 static const struct rset_control control_and =
88 /* The heap structure:
89 * The rset contains a list or rsets we are ORing together
90 * The rfd contains a heap of heap-items, which contain
91 * a rfd opened to those rsets, and a buffer for one key.
92 * They also contain a ptr to the rset list in the rset
93 * itself, for practical reasons.
106 const struct rset_key_control *kctrl;
107 struct heap_item **heap; /* ptrs to the rfd */
109 typedef struct heap *HEAP;
112 struct rset_private {
119 struct heap_item *items; /* we alloc and free them here */
120 HEAP h; /* and move around here */
121 zint hits; /* returned so far */
122 int eof; /* seen the end of it */
123 int tailcount; /* how many items are tailing */
129 static int log_level = 0;
130 static int log_level_initialized = 0;
133 /* Heap functions ***********************/
136 static void heap_dump_item( HEAP h, int i, int level)
141 (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
142 yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
143 &(h->heap[i]), h->heap[i]->buf, cur,tot );
144 heap_dump_item(h, 2*i, level+1);
145 heap_dump_item(h, 2*i+1, level+1);
147 static void heap_dump( HEAP h,char *msg) {
148 yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
149 heap_dump_item(h,1,1);
153 static void heap_swap (HEAP h, int x, int y)
155 struct heap_item *swap;
157 h->heap[x] = h->heap[y];
161 static int heap_cmp(HEAP h, int x, int y)
163 return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf);
166 static int heap_empty(HEAP h)
168 return ( 0==h->heapnum );
171 /** \brief deletes the first item in the heap, and balances the rest
173 static void heap_delete (HEAP h)
175 int cur = 1, child = 2;
176 h->heap[1] = 0; /* been deleted */
177 heap_swap (h, 1, h->heapnum--);
178 while (child <= h->heapnum) {
179 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
181 if (heap_cmp(h,cur,child) > 0)
183 heap_swap (h, cur, child);
192 /** \brief puts item into heap.
193 The heap root element has changed value (to bigger)
194 Swap downwards until the heap is ordered again
196 static void heap_balance (HEAP h)
198 int cur = 1, child = 2;
199 while (child <= h->heapnum) {
200 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
202 if (heap_cmp(h,cur,child) > 0)
204 heap_swap (h, cur, child);
214 static void heap_insert (HEAP h, struct heap_item *hi)
218 cur = ++(h->heapnum);
219 assert(cur <= h->heapmax);
222 while (parent && (heap_cmp(h,parent,cur) > 0))
225 heap_swap (h, cur, parent);
233 HEAP heap_create (NMEM nmem, int size, const struct rset_key_control *kctrl)
235 HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h));
237 ++size; /* heap array starts at 1 */
241 h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
242 h->heap[0]=0; /* not used */
246 static void heap_clear( HEAP h)
252 static void heap_destroy (HEAP h)
254 /* nothing to delete, all is nmem'd, and will go away in due time */
257 /** \brief compare and items for quicksort
258 used in qsort to get the multi-and args in optimal order
259 that is, those with fewest occurrences first
261 int compare_ands(const void *x, const void *y)
262 { const struct heap_item *hx = x;
263 const struct heap_item *hy = y;
264 double cur, totx, toty;
265 rset_pos(hx->fd, &cur, &totx);
266 rset_pos(hy->fd, &cur, &toty);
267 if ( totx > toty +0.5 )
269 if ( totx < toty -0.5 )
271 return 0; /* return totx - toty, except for overflows and rounding */
274 static RSET rsmulti_andor_create(NMEM nmem,
275 struct rset_key_control *kcontrol,
276 int scope, TERMID termid,
277 int no_rsets, RSET* rsets,
278 const struct rset_control *ctrl)
280 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
282 struct rset_private *info;
283 if (!log_level_initialized)
285 log_level = yaz_log_module_level("rsmultiandor");
286 log_level_initialized = 1;
288 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
289 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
294 RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
295 int scope, TERMID termid, int no_rsets, RSET* rsets)
297 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
298 no_rsets, rsets, &control_or);
301 RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
302 int scope, int no_rsets, RSET* rsets)
304 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
305 no_rsets, rsets, &control_and);
308 static void r_delete (RSET ct)
312 static RSFD r_open_andor (RSET ct, int flag, int is_and)
315 struct rfd_private *p;
316 const struct rset_key_control *kctrl = ct->keycontrol;
319 if (flag & RSETF_WRITE)
321 yaz_log (YLOG_FATAL, "multiandor set type is read-only");
324 rfd = rfd_create_base(ct);
326 p = (struct rfd_private *)rfd->priv;
330 /* all other pointers shouls already be allocated, in right sizes! */
334 p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
339 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
341 p->h = heap_create( ct->nmem, ct->no_children, kctrl);
342 p->items = (struct heap_item *)
343 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
344 for (i = 0; i<ct->no_children; i++)
346 p->items[i].rset = ct->children[i];
347 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
355 { /* read the array and sort it */
356 for (i = 0; i<ct->no_children; i++){
357 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
358 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
362 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
365 { /* fill the heap for ORing */
366 for (i = 0; i<ct->no_children; i++){
367 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
368 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
369 heap_insert(p->h, &(p->items[i]));
375 static RSFD r_open_or (RSET ct, int flag)
377 return r_open_andor(ct, flag, 0);
380 static RSFD r_open_and (RSET ct, int flag)
382 return r_open_andor(ct, flag, 1);
386 static void r_close (RSFD rfd)
388 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
393 for (i = 0; i<rfd->rset->no_children; i++)
395 rset_close(p->items[i].fd);
398 static int r_forward_or(RSFD rfd, void *buf,
399 TERMID *term, const void *untilbuf)
400 { /* while heap head behind untilbuf, forward it and rebalance heap */
401 struct rfd_private *p = rfd->priv;
402 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
403 if (heap_empty(p->h))
405 while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
407 if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
408 &p->h->heap[1]->term, untilbuf))
413 if (heap_empty(p->h))
418 return r_read_or(rfd, buf, term);
422 /** \brief reads one item key from an 'or' set
423 \param rfd set handle
424 \param buf resulting item buffer
425 \param term resulting term
427 \retval 1 item could be read
429 static int r_read_or (RSFD rfd, void *buf, TERMID *term)
431 RSET rset = rfd->rset;
432 struct rfd_private *mrfd = rfd->priv;
433 const struct rset_key_control *kctrl = rset->keycontrol;
434 struct heap_item *it;
436 if (heap_empty(mrfd->h))
438 it = mrfd->h->heap[1];
439 memcpy(buf, it->buf, kctrl->key_size);
448 rdres = rset_read(it->fd, it->buf, &it->term);
450 heap_balance(mrfd->h);
452 heap_delete(mrfd->h);
457 /** \brief reads one item key from an 'and' set
458 \param rfd set handle
459 \param buf resulting item buffer
460 \param term resulting term
462 \retval 1 item could be read
464 Has to return all hits where each item points to the
465 same sysno (scope), in order. Keep an extra key (hitkey)
466 as long as all records do not point to hitkey, forward
467 them, and update hitkey to be the highest seen so far.
468 (if any item eof's, mark eof, and return 0 thereafter)
469 Once a hit has been found, scan all items for the smallest
470 value. Mark all as being in the tail. Read next from that
471 item, and if not in the same record, clear its tail bit
473 static int r_read_and (RSFD rfd, void *buf, TERMID *term)
474 { struct rfd_private *p = rfd->priv;
476 const struct rset_key_control *kctrl = ct->keycontrol;
481 { /* we are tailing, find lowest tail and return it */
485 for (i = 0; i<ct->no_children; i++)
491 (p->items[i].buf, p->items[mintail].buf);
497 if (kctrl->get_segment)
498 { /* segments enabled */
499 zint segment = kctrl->get_segment(p->items[i].buf);
500 /* store segment if not stored already */
501 if (!p->segment && segment)
502 p->segment = segment;
504 /* skip rest entirely if segments don't match */
505 if (p->segment && segment && p->segment != segment)
510 /* return the lowest tail */
511 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
513 *term = p->items[mintail].term;
514 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
515 &p->items[mintail].term))
517 p->eof = 1; /* game over, once tails have been returned */
518 p->tailbits[mintail] = 0;
524 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
525 if (cmp >= rfd->rset->scope)
527 p->tailbits[mintail] = 0;
532 continue; /* skip again.. eventually tailcount will be 0 */
533 if (p->tailcount == 0)
537 /* not tailing, forward until all records match, and set up */
538 /* as tails. the earlier 'if' will then return the hits */
540 return 0; /* nothing more to see */
541 i = 1; /* assume items[0] is highest up */
542 while (i < ct->no_children)
544 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
545 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
546 if (!rset_forward(p->items[0].fd, p->items[0].buf,
547 &p->items[0].term, p->items[i].buf))
549 p->eof = 1; /* game over */
552 i = 0; /* start forwarding from scratch */
554 else if (cmp>=rfd->rset->scope)
555 { /* [0] was ahead, forward i */
556 if (!rset_forward(p->items[i].fd, p->items[i].buf,
557 &p->items[i].term, p->items[0].buf))
559 p->eof = 1; /* game over */
566 /* if we get this far, all rsets are now within +- scope of [0] */
567 /* ergo, we have a hit. Mark them all as tailing, and let the */
568 /* upper 'if' return the hits in right order */
569 for (i = 0; i < ct->no_children; i++)
571 p->tailcount = ct->no_children;
578 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
579 const void *untilbuf)
581 struct rfd_private *p = rfd->priv;
583 const struct rset_key_control *kctrl = ct->keycontrol;
588 for (i = 0; i<ct->no_children; i++)
590 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
591 if (cmp <= -rfd->rset->scope)
593 killtail = 1; /* we are moving to a different hit */
594 if (!rset_forward(p->items[i].fd, p->items[i].buf,
595 &p->items[i].term, untilbuf))
597 p->eof = 1; /* game over */
605 for (i = 0; i<ct->no_children; i++)
609 return r_read_and(rfd,buf,term);
612 static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
615 struct rfd_private *mrfd =
616 (struct rfd_private *)(rfd->priv);
617 double ratio = and_op ? 0.0 : 1.0;
619 double sum_cur = 0.0;
620 double sum_tot = 0.0;
621 for (i = 0; i<ct->no_children; i++){
622 double nratio, cur, tot;
623 rset_pos(mrfd->items[i].fd, &cur, &tot);
624 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
638 ratio = sum_cur / sum_tot;
639 if (ratio == 0.0 || ratio == 1.0) { /* nothing there */
642 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
646 *current = (double) (mrfd->hits);
647 *total = *current / ratio;
648 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
652 static void r_pos_and(RSFD rfd, double *current, double *total)
654 r_pos_x(rfd, current, total, 1);
657 static void r_pos_or(RSFD rfd, double *current, double *total)
659 r_pos_x(rfd, current, total, 0);
662 static int r_write (RSFD rfd, const void *buf)
664 yaz_log (YLOG_FATAL, "multior set type is read-only");
668 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
671 rset_get_one_term(ct, terms, maxterms, curterm);
674 /* Special case: Some multi-ors have all terms pointing to the same
675 term. We do not want to duplicate those. Other multiors (and ands)
676 have different terms under them. Those we want.
678 int firstterm= *curterm;
681 for (i = 0; i<ct->no_children; i++)
683 rset_getterms(ct->children[i], terms, maxterms, curterm);
684 if ( ( *curterm > firstterm+1 ) &&
685 ( *curterm <= maxterms ) &&
686 ( terms[(*curterm)-1] == terms[firstterm] )
688 (*curterm)--; /* forget the term, seen that before */
697 * c-file-style: "Stroustrup"
698 * indent-tabs-mode: nil
700 * vim: shiftwidth=4 tabstop=8 expandtab