1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2010 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * \file rsmultiandor.c
23 * \brief This module implements the rsmulti_or and rsmulti_and result sets
25 * rsmultior is based on a heap, from which we find the next hit.
27 * rsmultiand is based on a simple array of rsets, and a linear
28 * search to find the record that exists in all of those rsets.
29 * To speed things up, the array is sorted so that the smallest
30 * rsets come first, they are most likely to have the hits furthest
31 * away, and thus forwarding to them makes the most sense.
40 #include <idzebra/util.h>
41 #include <idzebra/isamc.h>
44 static RSFD r_open_and (RSET ct, int flag);
45 static RSFD r_open_or (RSET ct, int flag);
46 static void r_close (RSFD rfd);
47 static void r_delete (RSET ct);
48 static int r_read_and (RSFD rfd, void *buf, TERMID *term);
49 static int r_read_or (RSFD rfd, void *buf, TERMID *term);
50 static int r_write (RSFD rfd, const void *buf);
51 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
52 const void *untilbuf);
53 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
54 const void *untilbuf);
55 static void r_pos_and(RSFD rfd, double *current, double *total);
56 static void r_pos_or(RSFD rfd, double *current, double *total);
57 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
59 static const struct rset_control control_or =
72 static const struct rset_control control_and =
85 /* The heap structure:
86 * The rset contains a list or rsets we are ORing together
87 * The rfd contains a heap of heap-items, which contain
88 * a rfd opened to those rsets, and a buffer for one key.
89 * They also contain a ptr to the rset list in the rset
90 * itself, for practical reasons.
103 const struct rset_key_control *kctrl;
104 struct heap_item **heap; /* ptrs to the rfd */
106 typedef struct heap *HEAP;
109 struct rset_private {
116 struct heap_item *items; /* we alloc and free them here */
117 HEAP h; /* and move around here */
118 zint hits; /* returned so far */
119 int eof; /* seen the end of it */
120 int tailcount; /* how many items are tailing */
126 static int log_level = 0;
127 static int log_level_initialized = 0;
130 /* Heap functions ***********************/
133 static void heap_dump_item( HEAP h, int i, int level)
138 (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
139 yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
140 &(h->heap[i]), h->heap[i]->buf, cur,tot );
141 heap_dump_item(h, 2*i, level+1);
142 heap_dump_item(h, 2*i+1, level+1);
144 static void heap_dump( HEAP h,char *msg) {
145 yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
146 heap_dump_item(h,1,1);
150 static void heap_swap (HEAP h, int x, int y)
152 struct heap_item *swap;
154 h->heap[x] = h->heap[y];
158 static int heap_cmp(HEAP h, int x, int y)
160 return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf);
163 static int heap_empty(HEAP h)
165 return ( 0==h->heapnum );
168 /** \brief deletes the first item in the heap, and balances the rest
170 static void heap_delete (HEAP h)
172 int cur = 1, child = 2;
173 h->heap[1] = 0; /* been deleted */
174 heap_swap (h, 1, h->heapnum--);
175 while (child <= h->heapnum) {
176 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
178 if (heap_cmp(h,cur,child) > 0)
180 heap_swap (h, cur, child);
189 /** \brief puts item into heap.
190 The heap root element has changed value (to bigger)
191 Swap downwards until the heap is ordered again
193 static void heap_balance (HEAP h)
195 int cur = 1, child = 2;
196 while (child <= h->heapnum) {
197 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
199 if (heap_cmp(h,cur,child) > 0)
201 heap_swap (h, cur, child);
211 static void heap_insert (HEAP h, struct heap_item *hi)
215 cur = ++(h->heapnum);
216 assert(cur <= h->heapmax);
219 while (parent && (heap_cmp(h,parent,cur) > 0))
222 heap_swap (h, cur, parent);
230 HEAP heap_create (NMEM nmem, int size, const struct rset_key_control *kctrl)
232 HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h));
234 ++size; /* heap array starts at 1 */
238 h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
239 h->heap[0]=0; /* not used */
243 static void heap_clear( HEAP h)
249 static void heap_destroy (HEAP h)
251 /* nothing to delete, all is nmem'd, and will go away in due time */
254 /** \brief compare and items for quicksort
255 used in qsort to get the multi-and args in optimal order
256 that is, those with fewest occurrences first
258 int compare_ands(const void *x, const void *y)
259 { const struct heap_item *hx = x;
260 const struct heap_item *hy = y;
261 double cur, totx, toty;
262 rset_pos(hx->fd, &cur, &totx);
263 rset_pos(hy->fd, &cur, &toty);
264 if ( totx > toty +0.5 )
266 if ( totx < toty -0.5 )
268 return 0; /* return totx - toty, except for overflows and rounding */
271 static RSET rsmulti_andor_create(NMEM nmem,
272 struct rset_key_control *kcontrol,
273 int scope, TERMID termid,
274 int no_rsets, RSET* rsets,
275 const struct rset_control *ctrl)
277 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
279 struct rset_private *info;
280 if (!log_level_initialized)
282 log_level = yaz_log_module_level("rsmultiandor");
283 log_level_initialized = 1;
285 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
286 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
291 RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
292 int scope, TERMID termid, int no_rsets, RSET* rsets)
294 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
295 no_rsets, rsets, &control_or);
298 RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
299 int scope, int no_rsets, RSET* rsets)
301 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
302 no_rsets, rsets, &control_and);
305 static void r_delete (RSET ct)
309 static RSFD r_open_andor (RSET ct, int flag, int is_and)
312 struct rfd_private *p;
313 const struct rset_key_control *kctrl = ct->keycontrol;
316 if (flag & RSETF_WRITE)
318 yaz_log (YLOG_FATAL, "multiandor set type is read-only");
321 rfd = rfd_create_base(ct);
323 p = (struct rfd_private *)rfd->priv;
327 /* all other pointers shouls already be allocated, in right sizes! */
331 p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
336 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
338 p->h = heap_create( ct->nmem, ct->no_children, kctrl);
339 p->items = (struct heap_item *)
340 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
341 for (i = 0; i<ct->no_children; i++)
343 p->items[i].rset = ct->children[i];
344 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
352 { /* read the array and sort it */
353 for (i = 0; i<ct->no_children; i++){
354 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
355 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
359 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
362 { /* fill the heap for ORing */
363 for (i = 0; i<ct->no_children; i++){
364 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
365 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
366 heap_insert(p->h, &(p->items[i]));
372 static RSFD r_open_or (RSET ct, int flag)
374 return r_open_andor(ct, flag, 0);
377 static RSFD r_open_and (RSET ct, int flag)
379 return r_open_andor(ct, flag, 1);
383 static void r_close (RSFD rfd)
385 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
390 for (i = 0; i<rfd->rset->no_children; i++)
392 rset_close(p->items[i].fd);
395 static int r_forward_or(RSFD rfd, void *buf,
396 TERMID *term, const void *untilbuf)
397 { /* while heap head behind untilbuf, forward it and rebalance heap */
398 struct rfd_private *p = rfd->priv;
399 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
400 if (heap_empty(p->h))
402 while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
404 if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
405 &p->h->heap[1]->term, untilbuf))
410 if (heap_empty(p->h))
415 return r_read_or(rfd, buf, term);
419 /** \brief reads one item key from an 'or' set
420 \param rfd set handle
421 \param buf resulting item buffer
422 \param term resulting term
424 \retval 1 item could be read
426 static int r_read_or (RSFD rfd, void *buf, TERMID *term)
428 RSET rset = rfd->rset;
429 struct rfd_private *mrfd = rfd->priv;
430 const struct rset_key_control *kctrl = rset->keycontrol;
431 struct heap_item *it;
433 if (heap_empty(mrfd->h))
435 it = mrfd->h->heap[1];
436 memcpy(buf, it->buf, kctrl->key_size);
445 rdres = rset_read(it->fd, it->buf, &it->term);
447 heap_balance(mrfd->h);
449 heap_delete(mrfd->h);
454 /** \brief reads one item key from an 'and' set
455 \param rfd set handle
456 \param buf resulting item buffer
457 \param term resulting term
459 \retval 1 item could be read
461 Has to return all hits where each item points to the
462 same sysno (scope), in order. Keep an extra key (hitkey)
463 as long as all records do not point to hitkey, forward
464 them, and update hitkey to be the highest seen so far.
465 (if any item eof's, mark eof, and return 0 thereafter)
466 Once a hit has been found, scan all items for the smallest
467 value. Mark all as being in the tail. Read next from that
468 item, and if not in the same record, clear its tail bit
470 static int r_read_and (RSFD rfd, void *buf, TERMID *term)
471 { struct rfd_private *p = rfd->priv;
473 const struct rset_key_control *kctrl = ct->keycontrol;
478 { /* we are tailing, find lowest tail and return it */
482 for (i = 0; i<ct->no_children; i++)
488 (p->items[i].buf, p->items[mintail].buf);
494 if (kctrl->get_segment)
495 { /* segments enabled */
496 zint segment = kctrl->get_segment(p->items[i].buf);
497 /* store segment if not stored already */
498 if (!p->segment && segment)
499 p->segment = segment;
501 /* skip rest entirely if segments don't match */
502 if (p->segment && segment && p->segment != segment)
507 /* return the lowest tail */
508 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
510 *term = p->items[mintail].term;
511 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
512 &p->items[mintail].term))
514 p->eof = 1; /* game over, once tails have been returned */
515 p->tailbits[mintail] = 0;
521 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
522 if (cmp >= rfd->rset->scope)
524 p->tailbits[mintail] = 0;
529 continue; /* skip again.. eventually tailcount will be 0 */
530 if (p->tailcount == 0)
534 /* not tailing, forward until all records match, and set up */
535 /* as tails. the earlier 'if' will then return the hits */
537 return 0; /* nothing more to see */
538 i = 1; /* assume items[0] is highest up */
539 while (i < ct->no_children)
541 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
542 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
543 if (!rset_forward(p->items[0].fd, p->items[0].buf,
544 &p->items[0].term, p->items[i].buf))
546 p->eof = 1; /* game over */
549 i = 0; /* start forwarding from scratch */
551 else if (cmp>=rfd->rset->scope)
552 { /* [0] was ahead, forward i */
553 if (!rset_forward(p->items[i].fd, p->items[i].buf,
554 &p->items[i].term, p->items[0].buf))
556 p->eof = 1; /* game over */
563 /* if we get this far, all rsets are now within +- scope of [0] */
564 /* ergo, we have a hit. Mark them all as tailing, and let the */
565 /* upper 'if' return the hits in right order */
566 for (i = 0; i < ct->no_children; i++)
568 p->tailcount = ct->no_children;
575 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
576 const void *untilbuf)
578 struct rfd_private *p = rfd->priv;
580 const struct rset_key_control *kctrl = ct->keycontrol;
585 for (i = 0; i<ct->no_children; i++)
587 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
588 if (cmp <= -rfd->rset->scope)
590 killtail = 1; /* we are moving to a different hit */
591 if (!rset_forward(p->items[i].fd, p->items[i].buf,
592 &p->items[i].term, untilbuf))
594 p->eof = 1; /* game over */
602 for (i = 0; i<ct->no_children; i++)
606 return r_read_and(rfd,buf,term);
609 static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
612 struct rfd_private *mrfd =
613 (struct rfd_private *)(rfd->priv);
614 double ratio = and_op ? 0.0 : 1.0;
616 double sum_cur = 0.0;
617 double sum_tot = 0.0;
618 for (i = 0; i<ct->no_children; i++){
619 double nratio, cur, tot;
620 rset_pos(mrfd->items[i].fd, &cur, &tot);
621 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
635 ratio = sum_cur / sum_tot;
636 if (ratio == 0.0 || ratio == 1.0) { /* nothing there */
639 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
643 *current = (double) (mrfd->hits);
644 *total = *current / ratio;
645 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
649 static void r_pos_and(RSFD rfd, double *current, double *total)
651 r_pos_x(rfd, current, total, 1);
654 static void r_pos_or(RSFD rfd, double *current, double *total)
656 r_pos_x(rfd, current, total, 0);
659 static int r_write (RSFD rfd, const void *buf)
661 yaz_log (YLOG_FATAL, "multior set type is read-only");
665 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
668 rset_get_one_term(ct, terms, maxterms, curterm);
671 /* Special case: Some multi-ors have all terms pointing to the same
672 term. We do not want to duplicate those. Other multiors (and ands)
673 have different terms under them. Those we want.
675 int firstterm= *curterm;
678 for (i = 0; i<ct->no_children; i++)
680 rset_getterms(ct->children[i], terms, maxterms, curterm);
681 if ( ( *curterm > firstterm+1 ) &&
682 ( *curterm <= maxterms ) &&
683 ( terms[(*curterm)-1] == terms[firstterm] )
685 (*curterm)--; /* forget the term, seen that before */
694 * c-file-style: "Stroustrup"
695 * indent-tabs-mode: nil
697 * vim: shiftwidth=4 tabstop=8 expandtab