X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;ds=sidebyside;f=rset%2Frsmultior.c;h=77d2aca909f6d9b037d9da37297f3f04f6b66831;hb=b4470de12aa07f21f5394c19af0d21b196087225;hp=4d9520470baad4ee3f860220aba6fe9b9df8a2d0;hpb=7887042844aef0c6f3d2b711e315abe91c26d211;p=idzebra-moved-to-github.git diff --git a/rset/rsmultior.c b/rset/rsmultior.c index 4d95204..77d2aca 100644 --- a/rset/rsmultior.c +++ b/rset/rsmultior.c @@ -1,4 +1,4 @@ -/* $Id: rsmultior.c,v 1.1 2004-08-16 16:17:49 heikki Exp $ +/* $Id: rsmultior.c,v 1.5 2004-08-23 12:38:53 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -37,8 +37,12 @@ static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); -static int r_read (RSFD rfd, void *buf, int *term_index); +static int r_read (RSFD rfd, void *buf); static int r_write (RSFD rfd, const void *buf); +static int r_forward(RSET ct, RSFD rfd, void *buf, + int (*cmpfunc)(const void *p1, const void *p2), + const void *untilbuf); +static void r_pos (RSFD rfd, double *current, double *total); static const struct rset_control control = { @@ -48,63 +52,123 @@ static const struct rset_control control = r_close, r_delete, r_rewind, - rset_default_forward, /* FIXME */ - rset_default_pos, /* FIXME */ + r_forward, + r_pos, r_read, r_write, }; const struct rset_control *rset_kind_multior = &control; +/* The heap structure: + * The rset contains a list or rsets we are ORing together + * The rfd contains a heap of heap-items, which contain + * a rfd opened to those rsets, and a buffer for one key. + * They also contain a ptr to the rset list in the rset + * itself, for practical reasons. + */ + +struct heap_item { + RSFD fd; + void *buf; + RSET rset; +}; + +struct heap { + int heapnum; + int heapmax; + int keysize; + int (*cmp)(const void *p1, const void *p2); + struct heap_item **heap; /* ptrs to the rfd */ +}; +typedef struct heap *HEAP; + + struct rset_multior_info { int key_size; int no_rec; int (*cmp)(const void *p1, const void *p2); - ISAMC isc; - ISAMC_P *isam_positions; - - int no_isam_positions; - int no_save_positions; - struct rset_mor_rfd *rfd_list; + int no_rsets; + RSET *rsets; + struct rset_multior_rfd *rfd_list; }; struct rset_multior_rfd { int flag; - int position; - int position_max; -/* ISAMC_PP *ispt; */ - struct rset_mor_rfd *next; - struct rset_mor_info *info; - struct trunc_info *ti; - zint *countp; - char *pbuf; + struct heap_item *items; /* we alloc and free them here */ + HEAP h; + struct rset_multior_rfd *next; + struct rset_multior_info *info; + zint hits; /* returned so far */ + char *prevvalue; /* to see if we are in another record */ + /* FIXME - is this really needed? */ }; #if 0 -static void heap_swap (struct trunc_info *ti, int i1, int i2) +static void heap_dump_item( HEAP h, int i, int level) { + double cur,tot; + if (i>h->heapnum) + return; + (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot); + logf(LOG_LOG," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "", + &(h->heap[i]), h->heap[i]->buf, cur,tot ); + heap_dump_item(h, 2*i, level+1); + heap_dump_item(h, 2*i+1, level+1); +} +static void heap_dump( HEAP h,char *msg) { + logf(LOG_LOG, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax); + heap_dump_item(h,1,1); +} +#endif + +static void heap_swap (HEAP h, int x, int y) { - int swap; + struct heap_item *swap; + swap = h->heap[x]; + h->heap[x]=h->heap[y]; + h->heap[y]=swap; +} - swap = ti->ptr[i1]; - ti->ptr[i1] = ti->ptr[i2]; - ti->ptr[i2] = swap; +static int heap_cmp(HEAP h, int x, int y) +{ + return (*h->cmp)(h->heap[x]->buf,h->heap[y]->buf); } -static void heap_delete (struct trunc_info *ti) +static int heap_empty(HEAP h) { + return ( 0==h->heapnum ); +} + +static void heap_delete (HEAP h) +{ /* deletes the first item in the heap, and balances the rest */ int cur = 1, child = 2; + h->heap[1]=0; /* been deleted */ + heap_swap (h, 1, h->heapnum--); + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) + child++; + if (heap_cmp(h,cur,child) > 0) + { + heap_swap (h, cur, child); + cur = child; + child = 2*cur; + } + else + break; + } +} - heap_swap (ti, 1, ti->heapnum--); - while (child <= ti->heapnum) { - if (child < ti->heapnum && - (*ti->cmp)(ti->heap[ti->ptr[child]], - ti->heap[ti->ptr[1+child]]) > 0) +static void heap_balance (HEAP h) +{ /* The heap root element has changed value (to bigger) */ + /* swap downwards until the heap is ordered again */ + int cur = 1, child = 2; + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) child++; - if ((*ti->cmp)(ti->heap[ti->ptr[cur]], - ti->heap[ti->ptr[child]]) > 0) + if (heap_cmp(h,cur,child) > 0) { - heap_swap (ti, cur, child); + heap_swap (h, cur, child); cur = child; child = 2*cur; } @@ -113,147 +177,99 @@ static void heap_delete (struct trunc_info *ti) } } -static void heap_insert (struct trunc_info *ti, const char *buf, int indx) + +static void heap_insert (HEAP h, struct heap_item *hi) { int cur, parent; - cur = ++(ti->heapnum); - memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize); - ti->indx[ti->ptr[cur]] = indx; + cur = ++(h->heapnum); + assert(cur <= h->heapmax); + h->heap[cur]=hi; parent = cur/2; - while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]], - ti->heap[ti->ptr[cur]]) > 0) + while (parent && (heap_cmp(h,parent,cur) > 0)) { - heap_swap (ti, cur, parent); + assert(parent>0); + heap_swap (h, cur, parent); cur = parent; parent = cur/2; } } + static -struct trunc_info *heap_init (int size, int key_size, - int (*cmp)(const void *p1, const void *p2)) +HEAP heap_create (int size, int key_size, + int (*cmp)(const void *p1, const void *p2)) { - struct trunc_info *ti = (struct trunc_info *) xmalloc (sizeof(*ti)); - int i; - - ++size; - ti->heapnum = 0; - ti->keysize = key_size; - ti->cmp = cmp; - ti->indx = (int *) xmalloc (size * sizeof(*ti->indx)); - ti->heap = (char **) xmalloc (size * sizeof(*ti->heap)); - ti->ptr = (int *) xmalloc (size * sizeof(*ti->ptr)); - ti->swapbuf = (char *) xmalloc (ti->keysize); - ti->tmpbuf = (char *) xmalloc (ti->keysize); - ti->buf = (char *) xmalloc (size * ti->keysize); - for (i = size; --i >= 0; ) - { - ti->ptr[i] = i; - ti->heap[i] = ti->buf + ti->keysize * i; - } - return ti; + HEAP h = (HEAP) xmalloc (sizeof(*h)); + + ++size; /* heap array starts at 1 */ + h->heapnum = 0; + h->heapmax = size; + h->keysize = key_size; + h->cmp = cmp; + h->heap = (struct heap_item**) xmalloc((size)*sizeof(*h->heap)); + h->heap[0]=0; /* not used */ + return h; } -static void heap_close (struct trunc_info *ti) +static void heap_destroy (HEAP h) { - xfree (ti->buf); - xfree (ti->ptr); - xfree (ti->indx); - xfree (ti->heap); - xfree (ti->swapbuf); - xfree (ti->tmpbuf); - xfree (ti); + xfree (h->heap); /* safe, they all point to the rfd */ + xfree (h); } -#endif + static void *r_create (RSET ct, const struct rset_control *sel, void *parms) { - return 0; -/* rset_multior_parms *r_parms = (rset_multior_parms *) parms; - struct rset_mor_info *info; - - ct->flags |= RSET_FLAG_VOLATILE; - info = (struct rset_mor_info *) xmalloc (sizeof(*info)); + struct rset_multior_info *info; + info = (struct rset_multior_info *) xmalloc (sizeof(*info)); info->key_size = r_parms->key_size; assert (info->key_size > 1); info->cmp = r_parms->cmp; - - info->no_save_positions = r_parms->no_save_positions; - - info->isc = r_parms->isc; - info->no_isam_positions = r_parms->no_isam_positions; - info->isam_positions = (ISAMC_P *) - xmalloc (sizeof(*info->isam_positions) * info->no_isam_positions); - memcpy (info->isam_positions, r_parms->isam_positions, - sizeof(*info->isam_positions) * info->no_isam_positions); - info->rfd_list = NULL; - - ct->no_rset_terms = 1; - ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); - ct->rset_terms[0] = r_parms->rset_term; + info->no_rsets= r_parms->no_rsets; + info->rsets=r_parms->rsets; /* now we own it! */ + info->rfd_list=0; return info; - */ } static RSFD r_open (RSET ct, int flag) { - return 0; - /* - struct rset_mor_rfd *rfd; - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; + struct rset_multior_rfd *rfd; + struct rset_multior_info *info = (struct rset_multior_info *) ct->buf; int i; if (flag & RSETF_WRITE) { - logf (LOG_FATAL, "m_or set type is read-only"); - return NULL; + logf (LOG_FATAL, "multior set type is read-only"); + return NULL; } - rfd = (struct rset_mor_rfd *) xmalloc (sizeof(*rfd)); + rfd = (struct rset_multior_rfd *) xmalloc (sizeof(*rfd)); rfd->flag = flag; rfd->next = info->rfd_list; rfd->info = info; info->rfd_list = rfd; - - rfd->ispt = (ISAMC_PP *) - xmalloc (sizeof(*rfd->ispt) * info->no_isam_positions); - - rfd->ti = heap_init (info->no_isam_positions, info->key_size, info->cmp); - - ct->rset_terms[0]->nn = 0; - for (i = 0; ino_isam_positions; i++) - { - rfd->ispt[i] = isc_pp_open (info->isc, info->isam_positions[i]); - - ct->rset_terms[0]->nn += isc_pp_num (rfd->ispt[i]); - - if (isc_pp_read (rfd->ispt[i], rfd->ti->tmpbuf)) - heap_insert (rfd->ti, rfd->ti->tmpbuf, i); - else - { - isc_pp_close (rfd->ispt[i]); - rfd->ispt[i] = NULL; - } + rfd->h = heap_create( info->no_rsets, info->key_size, info->cmp); + rfd->prevvalue=0; + rfd->hits=0; + rfd->items=(struct heap_item *) xmalloc(info->no_rsets*sizeof(*rfd->items)); + for (i=0; ino_rsets; i++){ + rfd->items[i].rset=info->rsets[i]; + rfd->items[i].buf=xmalloc(info->key_size); + rfd->items[i].fd=rset_open(info->rsets[i],RSETF_READ); +/* if (item_readbuf(&(rfd->items[i]))) */ + if ( rset_read(rfd->items[i].rset, rfd->items[i].fd, + rfd->items[i].buf) ) + heap_insert(rfd->h, &(rfd->items[i])); } - rfd->position = info->no_save_positions; - - if (ct->no_rset_terms == 1) - rfd->countp = &ct->rset_terms[0]->count; - else - rfd->countp = 0; - rfd->pbuf = xmalloc (info->key_size); - - r_rewind (rfd); return rfd; - */ } static void r_close (RSFD rfd) { - /* - struct rset_mor_info *info = ((struct rset_mor_rfd*)rfd)->info; - struct rset_mor_rfd **rfdp; + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; + struct rset_multior_info *info = mrfd->info; + struct rset_multior_rfd **rfdp; int i; for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) @@ -261,101 +277,96 @@ static void r_close (RSFD rfd) { *rfdp = (*rfdp)->next; - heap_close (((struct rset_mor_rfd *) rfd)->ti); - for (i = 0; ino_isam_positions; i++) - if (((struct rset_mor_rfd *) rfd)->ispt[i]) - isc_pp_close (((struct rset_mor_rfd *) rfd)->ispt[i]); - xfree (((struct rset_mor_rfd *)rfd)->ispt); - xfree (((struct rset_mor_rfd *)rfd)->pbuf); - xfree (rfd); + heap_destroy (mrfd->h); + for (i = 0; ino_rsets; i++) { + if (mrfd->items[i].fd) + rset_close(info->rsets[i],mrfd->items[i].fd); + xfree(mrfd->items[i].buf); + } + xfree(mrfd->items); + if (mrfd->prevvalue) + xfree(mrfd->prevvalue); + xfree(mrfd); return; } logf (LOG_FATAL, "r_close but no rfd match!"); assert (0); - */ } static void r_delete (RSET ct) { - /* - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; + struct rset_multior_info *info = (struct rset_multior_info *) ct->buf; int i; assert (info->rfd_list == NULL); - xfree (info->isam_positions); - - for (i = 0; ino_rset_terms; i++) - rset_term_destroy (ct->rset_terms[i]); - */ /* FIXME - Watch out! */ - /* - xfree (ct->rset_terms); - - xfree (info); - */ + for(i=0;ino_rsets;i++) + rset_delete(info->rsets[i]); + xfree(info->rsets); + xfree(info); } static void r_rewind (RSFD rfd) { + assert(!"rewind not implemented yet"); } -static int r_read (RSFD rfd, void *buf, int *term_index) +static int r_forward(RSET ct, RSFD rfd, void *buf, + int (*cmpfunc)(const void *p1, const void *p2), + const void *untilbuf) { + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; + struct rset_multior_info *info = mrfd->info; + struct heap_item it; + int rdres; + if (heap_empty(mrfd->h)) return 0; - /* - struct rset_mor_rfd *mrfd = (struct rset_mor_rfd *) rfd; - struct trunc_info *ti = mrfd->ti; - int n = ti->indx[ti->ptr[1]]; + if (cmpfunc) + assert(cmpfunc==mrfd->info->cmp); + it = *(mrfd->h->heap[1]); + memcpy(buf,it.buf, info->key_size); + (mrfd->hits)++; + if (untilbuf) + rdres=rset_forward(it.rset, it.fd, it.buf, cmpfunc,untilbuf); + else + rdres=rset_read(it.rset, it.fd, it.buf); + if ( rdres ) + heap_balance(mrfd->h); + else + heap_delete(mrfd->h); + return 1; - if (!ti->heapnum) - return 0; - *term_index = 0; - memcpy (buf, ti->heap[ti->ptr[1]], ti->keysize); - if (((struct rset_mor_rfd *) rfd)->position) - { - if (isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) - ((struct rset_mor_rfd *) rfd)->position--; - heap_insert (ti, ti->tmpbuf, n); - } - else - heap_delete (ti); - if (mrfd->countp && ( - *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) - { - memcpy (mrfd->pbuf, buf, ti->keysize); - (*mrfd->countp)++; - } - return 1; - } - while (1) - { - if (!isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - break; - } - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) - { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); - break; - } +} + +static int r_read (RSFD rfd, void *buf) +{ + return r_forward(0,rfd, buf,0,0); +} + +static void r_pos (RSFD rfd, double *current, double *total) +{ + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; + struct rset_multior_info *info = mrfd->info; + double cur, tot; + double scur=0.0, stot=0.0; + int i; + for (i=0; ino_rsets; i++){ + rset_pos(mrfd->items[i].rset, mrfd->items[i].fd, &cur, &tot); + logf(LOG_LOG, "r_pos: %d %0.1f %0.1f", i, cur,tot); + scur += cur; + stot += tot; } - if (mrfd->countp && ( - *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) - { - memcpy (mrfd->pbuf, buf, ti->keysize); - (*mrfd->countp)++; + if (stot <1.0) { /* nothing there */ + *current=0; + *total=0; + return; } - return 1; - */ + *current=mrfd->hits; + *total=*current*stot/scur; } static int r_write (RSFD rfd, const void *buf) { - logf (LOG_FATAL, "mor set type is read-only"); + logf (LOG_FATAL, "multior set type is read-only"); return -1; }