From 6feeb67bdd353384cc16289c7db4fcb2e62cb74f Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Thu, 11 Jul 2002 16:16:00 +0000 Subject: [PATCH] Fixed a bug in isamd, failed to store a single key when its bits did not fit into a singleton. --- index/invstat.c | 47 +++++++++++++++++++++++++++++++++++++++++++---- isamc/isamd.c | 17 +++++++++++------ isamc/merge-d.c | 27 ++++++++++++++++++++++++--- test/dmoz/zebra-b.cfg | 5 ++++- test/dmoz/zebra-c.cfg | 5 ++++- test/dmoz/zebra-d.cfg | 8 +++++++- 6 files changed, 93 insertions(+), 16 deletions(-) diff --git a/index/invstat.c b/index/invstat.c index f5bc6aa..27636ae 100644 --- a/index/invstat.c +++ b/index/invstat.c @@ -30,7 +30,8 @@ struct inv_stat_info { #define SINGLETON_TYPE 8 /* the type to use for singletons that */ /* have no block and no block type */ -static void print_dict_item (ZebraMaps zm, const char *s, int count ) +static void print_dict_item (ZebraMaps zm, const char *s, int count, + int firstsys, int firstseq, int lastsys, int lastseq ) { int reg_type = s[1]; char keybuf[IT_MAX_WORD+1]; @@ -48,7 +49,8 @@ static void print_dict_item (ZebraMaps zm, const char *s, int count ) } *to = '\0'; /* yaz_log (LOG_LOG, "%s", keybuf); */ - printf("%10d %s\n",count, keybuf); + printf("%10d %s %d.%d - %d.%d\n",count, keybuf, + firstsys,firstseq, lastsys,lastseq); } static int inv_stat_handle (char *name, const char *info, int pos, @@ -58,6 +60,10 @@ static int inv_stat_handle (char *name, const char *info, int pos, int i = 0; struct inv_stat_info *stat_info = (struct inv_stat_info*) client; ISAMS_P isam_p; + int firstsys=-1; + int firstseq=-1; + int lastsys=-1; + int lastseq=-1; stat_info->no_dict_entries++; stat_info->no_dict_bytes += strlen(name); @@ -79,6 +85,13 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->cksum = stat_info->cksum * 65509 + key.sysno + 11 * key.seqno; occurx++; + if (-1==firstsys) + { + firstseq=key.seqno; + firstsys=key.sysno; + } + lastsys=key.sysno; + lastseq=key.seqno; } assert (occurx == occur); stat_info->no_isam_entries[0] += occur; @@ -106,6 +119,13 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->cksum = stat_info->cksum * 65509 + key.sysno + 11 * key.seqno; occurx++; + if (-1==firstsys) + { + firstseq=key.seqno; + firstsys=key.sysno; + } + lastsys=key.sysno; + lastseq=key.seqno; } assert (occurx == occur); stat_info->no_isam_entries[isc_type(isam_p)] += occur; @@ -125,6 +145,13 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->cksum = stat_info->cksum * 65509 + key.sysno + 11 * key.seqno; occurx++; + if (-1==firstsys) + { + firstseq=key.seqno; + firstsys=key.sysno; + } + lastsys=key.sysno; + lastseq=key.seqno; if ( pp->is->method->debug >8 ) logf (LOG_LOG,"sysno=%d seqno=%d (%x/%x) oc=%d/%d ofs=%d ", key.sysno, key.seqno, @@ -160,6 +187,13 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->cksum = stat_info->cksum * 65509 + key.sysno + 11 * key.seqno; occur++; + if (-1==firstsys) + { + firstseq=key.seqno; + firstsys=key.sysno; + } + lastsys=key.sysno; + lastseq=key.seqno; } isamb_pp_close_x (pp, &size, &blocks); stat_info->isamb_blocks[cat] += blocks; @@ -174,7 +208,8 @@ static int inv_stat_handle (char *name, const char *info, int pos, i++; ++(stat_info->isam_occurrences[i]); if (stat_info->dumpwords) - print_dict_item(stat_info->zh->reg->zebra_maps, name, occur); + print_dict_item(stat_info->zh->reg->zebra_maps, name, occur, + firstsys,firstseq, lastsys, lastseq); return 0; } @@ -341,7 +376,11 @@ void zebra_register_statistics (ZebraHandle zh, int dumpdict) /* * * $Log: invstat.c,v $ - * Revision 1.30 2002-07-11 13:03:01 heikki + * Revision 1.31 2002-07-11 16:16:00 heikki + * Fixed a bug in isamd, failed to store a single key when its bits + * did not fit into a singleton. + * + * Revision 1.30 2002/07/11 13:03:01 heikki * Added dumpdict command line option to dump the * dictionary before doing the usual stats * diff --git a/isamc/isamd.c b/isamc/isamd.c index 13754fb..f06cda0 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. - * $Id: isamd.c,v 1.20 2002-06-19 10:29:18 adam Exp $ + * $Id: isamd.c,v 1.21 2002-07-11 16:16:00 heikki Exp $ * * Isamd - isam with diffs * Programmed by: Heikki Levanto @@ -547,12 +547,13 @@ void isamd_pp_close (ISAMD_PP pp) (*is->method->code_stop)(ISAMD_DECODE, pp->decodeClientData); isamd_free_diffs(pp); /* see merge-d.h */ - xfree (pp->buf); - xfree (pp); if (is->method->debug > 5) - logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d", + logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d nk=%d", pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, - pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + pp->next, isamd_type(pp->next), isamd_block(pp->next), + pp->numKeys ); + xfree (pp->buf); + xfree (pp); } @@ -850,7 +851,11 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) /* * $Log: isamd.c,v $ - * Revision 1.20 2002-06-19 10:29:18 adam + * Revision 1.21 2002-07-11 16:16:00 heikki + * Fixed a bug in isamd, failed to store a single key when its bits + * did not fit into a singleton. + * + * Revision 1.20 2002/06/19 10:29:18 adam * align block sizes for isam sys. Better plot for test * * Revision 1.19 1999/11/30 13:48:04 adam diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 29fc630..ba10b9e 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.25 1999-11-30 13:48:04 adam Exp $ + * $Id: merge-d.c,v 1.26 2002-07-11 16:16:00 heikki Exp $ * * bugs * sinleton-bit has to be in the high end, not low, so as not to confuse @@ -232,7 +232,18 @@ static int filter_only_one(FILTER F) return ( (0 != F->r1) && (0 == F->r2)); } - +/* We may need backfilling, if we read a lonely key to make */ +/* a singleton, but its bitw will not fit in. Then we need to */ +/* process it normally, which means reading it again. So we */ +/* need to unread it first. Luckily the filter is empty at that */ +/* point */ +static void filter_backfill(FILTER F, struct it_key *k, int mode) +{ + assert(F->r1 == FILTER_NOTYET ); /* not overwriting data! */ + F->k1=*k; + F->m1=mode; + F->r1=1; /* ok read */ +} /*************************************************************** @@ -1091,6 +1102,12 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) filter_read(F,&k,&mode); assert(mode); rc = singleton_encode(&k); + if (!rc) + { + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton didn't fit, backfilling"); + filter_backfill(F,&k, mode); + } if (is->method->debug >9) logf(LOG_LOG,"isamd_appd: singleton %d (%x)", rc,rc); @@ -1122,7 +1139,11 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.25 1999-11-30 13:48:04 adam + * Revision 1.26 2002-07-11 16:16:00 heikki + * Fixed a bug in isamd, failed to store a single key when its bits + * did not fit into a singleton. + * + * Revision 1.25 1999/11/30 13:48:04 adam * Improved installation. Updated for inclusion of YAZ header files. * * Revision 1.24 1999/10/05 09:57:40 heikki diff --git a/test/dmoz/zebra-b.cfg b/test/dmoz/zebra-b.cfg index 44a2bb4..b3c99b3 100644 --- a/test/dmoz/zebra-b.cfg +++ b/test/dmoz/zebra-b.cfg @@ -1,4 +1,3 @@ -# $Id: zebra-b.cfg,v 1.2 2002-06-19 08:32:34 adam Exp $ profilePath: .:../../tab:../../../yaz/tab # Files that describe the attribute sets supported. @@ -11,4 +10,8 @@ recordtype: grs.sgml #storekeys: 1 #storedata: 1 #recordId: (bib1,identifier-standard) + +notimestamps: 1 + isam: b +register: reg-b:100M diff --git a/test/dmoz/zebra-c.cfg b/test/dmoz/zebra-c.cfg index dd8ddca..7533073 100644 --- a/test/dmoz/zebra-c.cfg +++ b/test/dmoz/zebra-c.cfg @@ -1,4 +1,3 @@ -# $Id: zebra-c.cfg,v 1.2 2002-06-19 08:32:34 adam Exp $ profilePath: .:../../tab:../../../yaz/tab # Files that describe the attribute sets supported. @@ -11,4 +10,8 @@ recordtype: grs.sgml #storekeys: 1 #storedata: 1 #recordId: (bib1,identifier-standard) + +notimestamps: 1 + isam: c +register: reg-c:100M diff --git a/test/dmoz/zebra-d.cfg b/test/dmoz/zebra-d.cfg index 1eb083f..679c733 100644 --- a/test/dmoz/zebra-d.cfg +++ b/test/dmoz/zebra-d.cfg @@ -1,4 +1,3 @@ -# $Id: zebra-d.cfg,v 1.1 2002-06-19 09:00:28 adam Exp $ profilePath: .:../../tab:../../../yaz/tab # Files that describe the attribute sets supported. @@ -11,4 +10,11 @@ recordtype: grs.sgml #storekeys: 1 #storedata: 1 #recordId: (bib1,identifier-standard) + +notimestamps: 1 + isam: d +register: reg-d:100M + +isamddebug:6 + -- 1.7.10.4