2 * Copyright (c) 1996-1998, Index Data.
3 * See the file LICENSE for details.
6 * merge-d.c: merge routines for isamd
10 * - single-entry optimizing
11 * - study and optimize block sizes (later)
17 * There is aconfusion about the block addresses. cat or type is the category,
18 * pos or block is the block number. pp structures keep these two separate,
19 * and combine when saving the pp. The next pointer in the pp structure is
20 * also a combined address, but needs to be combined every time it is needed,
21 * and separated when the partss are needed... This is done with the isamd_
22 * _block, _type, and _addr macros. The _addr takes block and type as args,
23 * in that order. This conflicts with the order these are often mentioned in
24 * the debug log calls, and other places, leading to small mistakes here
33 #include "../index/index.h"
47 static char *hexdump(unsigned char *p, int len, char *buff) {
48 static char localbuff[128];
50 if (!buff) buff=localbuff;
53 sprintf(bytebuff,"%02x",*p);
55 strcat(buff,bytebuff);
56 if (len) strcat(buff,",");
62 static int separateDiffBlock(ISAMD_PP pp)
64 return ( 0 != pp->next);
68 static void getDiffInfo(ISAMD_PP pp, int diffidx)
69 { /* builds the diff info structures from a diffblock */
70 int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1;
71 /* Each diff takes at least 5 bytes. Probably more, but this is safe */
72 int i=1; /* [0] is used for the main data */
73 int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s);
74 pp->diffinfo = xmalloc( diffsz );
75 memset(pp->diffinfo,'\0',diffsz);
76 if (pp->is->method->debug > 4)
77 logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d",
78 isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos);
82 if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize )
84 if (pp->is->method->debug > 4)
85 logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d",
87 return; /* whole block done */
89 memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) );
90 if (0==pp->diffinfo[i].maxidx)
92 if (pp->is->method->debug > 4)
93 logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d",
95 return; /* end marker */
97 diffidx += sizeof(int);
98 pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE);
99 pp->diffinfo[i].diffidx = diffidx;
100 if (pp->is->method->debug > 4)
101 logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s",
102 i,diffidx-sizeof(int),pp->diffinfo[i].maxidx,
103 hexdump((char *)&pp->diffbuf[diffidx-4],8,0) );
104 diffidx=pp->diffinfo[i].maxidx;
105 if ( diffidx > pp->is->method->filecat[pp->cat].bsize )
106 return; /* whole block done */
109 assert ("too many diff sequences in the block");
112 static void loadDiffs(ISAMD_PP pp)
113 { /* assumes pp is a firstblock! */
117 return; /* no diffs to talk about */
119 { /* separate diff block, load it */
120 pp->diffbuf= xmalloc( pp->is->method->filecat[pp->cat].bsize);
121 diffaddr=pp->diffs/2;
122 isamd_read_block (pp->is, isamd_type(diffaddr),
123 isamd_block(diffaddr), pp->diffbuf );
124 diffidx= ISAMD_BLOCK_OFFSET_N;
127 { /* integrated block, just set the pointers */
128 pp->diffbuf = pp->buf;
129 diffidx = pp->size; /* size is the beginning of diffs, diffidx the end*/
131 getDiffInfo(pp,diffidx);
135 void isamd_free_diffs(ISAMD_PP pp)
140 for (i=0;pp->diffinfo[i].decodeData;i++)
141 (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData);
143 if (pp->diffbuf != pp->buf)
145 } /* isamd_free_diffs */
148 /* Reads one item and corrects for the diffs, if any */
149 /* return 1 for ok, 0 for eof */
150 int isamd_read_item (ISAMD_PP pp, char **dst)
155 int winner=0; /* which diff holds the day */
156 int i; /* looping diffs */
159 if (pp->diffs==0) /* no diffs, just read the thing */
160 return isamd_read_main_item(pp,dst);
167 if (0==pp->diffinfo[0].key.sysno)
168 { /* 0 is special case, main data. */
169 keyptr=(char*) &(pp->diffinfo[0].key);
170 pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
171 if (pp->is->method->debug > 4)
172 logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)",
173 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
174 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
175 } /* get main data */
177 for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++)
179 if (pp->is->method->debug > 4)
180 logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d",
181 i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
182 pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx);
184 if ( (0==pp->diffinfo[i].key.sysno) &&
185 (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx))
186 {/* read a new one, if possible */
187 codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]);
188 keyptr=(char *)&(pp->diffinfo[i].key);
189 (*pp->is->method->code_item)(ISAMD_DECODE,
190 pp->diffinfo[i].decodeData, &keyptr, &codeptr);
191 pp->diffinfo[i].diffidx += codeptr-codestart;
192 pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;
193 pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ;
194 if (pp->is->method->debug > 4)
195 logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i,
196 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
197 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
199 if ( 0!= pp->diffinfo[i].key.sysno)
200 { /* got a key, compare */
201 cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key);
202 if (0==pp->diffinfo[winner].key.sysno)
203 cmp=-1; /* end of main sequence, take all diffs */
206 if (pp->is->method->debug > 4)
207 logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)",
209 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
210 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
211 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
212 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
213 if (pp->diffinfo[i].mode) /* insert diff, should always be */
216 assert("delete diff for nonexisting item");
217 /* is an assert too steep here?*/
221 if (!pp->diffinfo[i].mode) /* delete diff. should always be */
223 if (pp->is->method->debug > 4)
224 logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)",
226 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
227 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
228 pp->diffinfo[winner].key.sysno=0; /* delete it */
231 if (pp->is->method->debug > 4)
232 logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)",
234 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
235 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
236 /* skip the insert, since we already have it in the base */
237 pp->diffinfo[i].key.sysno=0; /* done with the delete */
238 retry=1; /* start all over again */
240 /* else it is a later key, its turn will come */
242 } /* for each diff */
245 if ( pp->diffinfo[winner].key.sysno)
247 if (pp->is->method->debug > 4)
248 logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)",
250 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
251 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
252 memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) );
253 *dst += sizeof(struct it_key);
254 pp->diffinfo[winner].key.sysno=0; /* used that one up */
259 if (pp->is->method->debug > 4)
260 logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)",
262 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
263 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
264 assert(winner==0); /* if nothing found, nothing comes from a diff */
269 } /* isamd_read_item */
272 static void isamd_reduceblock(ISAMD_PP pp)
273 /* takes a large block, and reduces its category if possible */
274 /* Presumably the first block in an isam-list */
277 return; /* existing block, do not touch */
278 if (pp->is->method->debug > 2)
279 logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d",
280 pp->pos, pp->cat, pp->size);
281 while ( ( pp->cat > 0 ) && (!pp->next) &&
282 (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) )
284 pp->pos = isamd_alloc_block(pp->is, pp->cat);
285 if (pp->is->method->debug > 2)
286 logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d",
287 pp->pos, pp->cat, pp->size);
291 static int isamd_build_first_block(ISAMD is, ISAMD_I data)
293 char i_item[128]; /* one input item */
296 int i_mode; /* 0 for delete, 1 for insert */
304 char *c_ptr = codebuff;
311 firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat));
312 firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;
313 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
314 maxsize = is->method->filecat[is->max_cat].bsize;
316 if (is->method->debug >3)
317 logf(LOG_LOG,"isamd_bld start: p=%d c=%d sz=%d maxsz=%d ",
318 pp->pos, pp->cat, pp->size, maxsize);
320 /* read first input */
322 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
324 if (is->method->debug >3)
325 logf(LOG_LOG,"isamd: build_fi start: m=%d %s",
326 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
331 { /* ignore deletes here, should not happen */
335 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
336 codelen = c_ptr - codebuff;
337 assert ( (codelen<128) && (codelen>0));
338 if (is->method->debug >3)
339 logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)",
340 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
342 if (pp->offset + codelen > maxsize )
343 { /* oops, block full - get a new one */
345 { /* special case: it was the first block. Save much later */
347 { /* firstpp not allocated yet, do so now, */
348 /* to keep blocks in order. Don't save yet, though */
349 firstpp->pos = isamd_alloc_block(is, firstpp->cat);
351 newblock = isamd_alloc_block(is, firstpp->cat);
352 firstpp->next = isamd_addr(newblock,firstpp->cat);
353 /* keep the largest category */
354 pp=isamd_pp_open(is,isamd_addr(0,firstpp->cat));/*don't load*/
356 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
358 if (is->method->debug >3)
359 logf(LOG_LOG,"isamd_build: Alloc2 f=%d (%d:%d) n=%d(%d:%d)",
360 isamd_addr(firstpp->pos,firstpp->cat),
361 firstpp->cat, firstpp->pos,
362 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos );
365 { /* it was not the first block */
366 newblock = isamd_alloc_block(is, firstpp->cat);
367 pp->next = isamd_addr(newblock,firstpp->cat);
368 isamd_buildlaterblock(pp);
369 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
370 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
372 pp->cat = firstpp->cat;
373 pp->pos = isamd_block(firstpp->next);
375 /* reset encoging and code again */
376 (*is->method->code_reset)(encoder_data);
379 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
380 codelen = c_ptr - codebuff;
381 assert ( (codelen<128) && (codelen>0));
382 if (is->method->debug >3)
383 logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)",
384 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
387 /* write the data into pp, now we have room */
388 memcpy(&(pp->buf[pp->offset]),codebuff,codelen);
389 pp->offset += codelen;
394 /* (try to) read the next item */
396 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
398 if ( (i_more) && (is->method->debug >3) )
399 logf(LOG_LOG,"isamd: build_fi start: m=%d %s",
400 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
405 /* order of things: Better to save firstpp first, if there are just two */
406 /* blocks, but last if there are blocks in between, as these have already */
407 /* been saved... optimise later */
409 /* save the first block */
410 isamd_reduceblock(firstpp);
411 isamd_buildfirstblock(firstpp);
412 isamd_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf);
413 retval = isamd_addr(firstpp->pos,firstpp->cat);
415 if (firstpp!=pp){ /* and the last one */
416 pp->next = 0;/* just to be sure */
417 isamd_buildlaterblock(pp);
418 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
422 isamd_pp_close(firstpp);
425 } /* build_first_block */
429 static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
431 struct it_key i_key; /* one input item */
432 char *i_item = (char *) &i_key; /* same as chars */
435 int i_mode; /* 0 for delete, 1 for insert */
441 int retval=ipos; /* by default we do not change the firstblock addr */
446 char *c_ptr = codebuff;
449 pp=firstpp=isamd_pp_open(is, ipos);
451 /* TODO: Turn these ifs around! Check first diffs==0, and create new */
452 /* according to separateDiffBlock. if !=0, read according to its type */
453 /* bit. Much more robust this way around! */
455 if (separateDiffBlock(firstpp))
456 { /* multi-block item, get the diff block */
457 if (firstpp->diffs==0)
458 { /* allocate first diff block */
459 pp=isamd_pp_open(is,isamd_addr(0,firstpp->cat));
460 pp->pos = isamd_alloc_block(pp->is, pp->cat);
461 firstpp->diffs = pp->pos*2 +1;
462 diffidx = pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
463 if (is->method->debug >3)
464 logf(LOG_LOG,"isamd_appd: alloc diff (%d) %d %d:%d ix=%d",
466 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
470 { /* find pointers within the existing block */
471 pp=isamd_pp_open(is, firstpp->diffs/2);
472 diffidx = pp->offset= pp->size;
473 if (is->method->debug >3)
474 logf(LOG_LOG,"isamd_appd: load diff (%d) %d (%d:%d) ix=%d",
476 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
481 { /* single-block item, get idx right */
482 diffidx= pp->diffs/2;
483 if (is->method->debug >3)
484 logf(LOG_LOG,"isamd_appd: diffs in head %d (%d:%d) ix=%d sz=%d",
485 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
488 { /* no diffs yet, make them */
490 pp->diffs = diffidx *2 +0;
494 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
495 maxsize = is->method->filecat[pp->cat].bsize;
497 difflenidx = diffidx;
498 diffidx+=sizeof(int); /* difflen will be stored here */
500 /* read first input */
502 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
504 if (is->method->debug >3)
505 logf(LOG_LOG,"isamd_appd: start with m=%d %s",
506 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
510 assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */
511 i_key.seqno = i_key.seqno * 2 + i_mode;
514 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
515 codelen = c_ptr - codebuff;
516 assert ( (codelen<128) && (codelen>0));
517 if (is->method->debug >3)
518 logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)",
519 codelen, hexdump(codebuff, codelen,hexbuff),
520 firstpp->numKeys,diffidx);
522 if (diffidx + codelen > maxsize )
524 logf(LOG_LOG,"isamd_appd: block full (ix=%d mx=%d)",
526 return 0; /*!*/ /* do something about it!!! */
530 memcpy(&(pp->buf[diffidx]),codebuff,codelen);
533 firstpp->numKeys++; /* insert diff */
535 firstpp->numKeys--; /* delete diff */
536 memcpy(&(pp->buf[difflenidx]),&diffidx,sizeof(diffidx));
538 firstpp->diffs =diffidx*2+0;
542 /* try to read the next input */
544 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
545 if ( (i_more) && (is->method->debug >3) )
546 logf(LOG_LOG,"isamd_appd: got m=%d %s",
547 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
550 /* clear the next difflen, if room for such */
551 difflenidx = diffidx;
552 while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidx<maxsize))
553 pp->buf[difflenidx++]='\0';
555 /* ok, save the block(s) */
557 { /* save the diff block */
558 pp->next = 0; /* just to be sure */
559 isamd_buildlaterblock(pp);
560 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
564 isamd_buildfirstblock(firstpp);
565 isamd_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf);
566 retval = isamd_addr(firstpp->pos,firstpp->cat);
567 isamd_pp_close(firstpp);
574 ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
579 retval = isamd_build_first_block(is,data);
581 retval = append_diffs(is,ipos,data);
588 * $Log: merge-d.c,v $
589 * Revision 1.3 1999-07-21 14:24:50 heikki
590 * isamd write and read functions ok, except when diff block full.
591 * (merge not yet done)
593 * Revision 1.1 1999/07/14 13:14:47 heikki