From 8fa59ec675af0ea4f15f157401c61121b5cb5782 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 23 Nov 2007 13:52:52 +0000 Subject: [PATCH] Factor records system mgt into recindex.c, records.c. --- index/Makefile.am | 5 +- index/recindex.c | 952 ++--------------------------------------------------- index/recindex.h | 13 +- index/recindxp.h | 9 +- index/records.c | 934 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 981 insertions(+), 932 deletions(-) create mode 100644 index/records.c diff --git a/index/Makefile.am b/index/Makefile.am index a4c84b6..2391b2e 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.64 2007-11-01 14:56:07 adam Exp $ +## $Id: Makefile.am,v 1.65 2007-11-23 13:52:52 adam Exp $ aux_libs = \ ../rset/libidzebra-rset.la \ @@ -93,7 +93,8 @@ libidzebra_2_0_la_SOURCES = \ limit.c \ orddict.c orddict.h \ rank.h rank1.c ranksimilarity.c rankstatic.c \ - recindex.c recindex.h recindxp.h reckeys.c reckeys.h recstat.c retrieve.c \ + records.c recindex.c recindex.h recindxp.h reckeys.c reckeys.h \ + recstat.c retrieve.c \ rpnscan.c rpnsearch.c rpnfacet.c sortidx.c symtab.c stream.c \ update_path.c update_file.c trunc.c untrans.c isam_methods.c \ zaptterm.c zebraapi.c zinfo.c zinfo.h zsets.c key_block.c key_block.h \ diff --git a/index/recindex.c b/index/recindex.c index 8de2ca8..a8f35df 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,4 +1,4 @@ -/* $Id: recindex.c,v 1.57 2007-11-23 13:11:08 adam Exp $ +/* $Id: recindex.c,v 1.58 2007-11-23 13:52:52 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -20,70 +20,57 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#define RIDX_CHUNK 128 - -/* - * Format of first block - * next (8 bytes) - * ref_count (2 bytes) - * block (500 bytes) - * - * Format of subsequent blocks - * next (8 bytes) - * block (502 bytes) - * - * Format of each record - * sysno - * (length, data) - pairs - * length = 0 if same as previous - */ #include #include #include #include #include -#include "recindxp.h" +#include "recindex.h" -#if HAVE_BZLIB_H -#include -#endif +#define RIDX_CHUNK 128 -/* Modify argument to if below: 1=normal, 0=sysno testing */ -#if 1 -/* If this is used sysno are not converted (no testing) */ -#define FAKE_OFFSET 0 -#define USUAL_RANGE 6000000000LL -#else -/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */ -#define FAKE_OFFSET 6000000000LL -#define USUAL_RANGE 2000000000LL -#endif +struct recindex { + char *index_fname; + BFile index_BFile; +}; -static zint rec_sysno_to_ext(zint sysno) +recindex_t recindex_open(BFiles bfs, int rw) { - assert(sysno >= 0 && sysno <= USUAL_RANGE); - return sysno + FAKE_OFFSET; + recindex_t p = xmalloc(sizeof(*p)); + p->index_fname = "reci"; + p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); + if (p->index_BFile == NULL) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); + xfree(p); + return 0; + } + return p; } -zint rec_sysno_to_int(zint sysno) +void recindex_close(recindex_t p) { - assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE); - return sysno - FAKE_OFFSET; + if (p) + { + if (p->index_BFile) + bf_close(p->index_BFile); + xfree(p); + } } -static int rec_read_head(recindex_t p, void *buf) +int recindex_read_head(recindex_t p, void *buf) { return bf_read(p->index_BFile, 0, 0, 0, buf); } -static const char *recindex_get_fname(recindex_t p) +const char *recindex_get_fname(recindex_t p) { return p->index_fname; } -static ZEBRA_RES rec_write_head(recindex_t p, const void *buf, size_t len) +ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len) { int r; @@ -99,20 +86,8 @@ static ZEBRA_RES rec_write_head(recindex_t p, const void *buf, size_t len) return ZEBRA_OK; } -static void rec_tmp_expand(Records p, int size) -{ - if (p->tmp_size < size + 2048 || - p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) - { - xfree(p->tmp_buf); - p->tmp_size = size + (int) - (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048; - p->tmp_buf = (char *) xmalloc(p->tmp_size); - } -} - -static int read_indx(recindex_t p, zint sysno, void *buf, int itemsize, - int ignoreError) +int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, + int ignoreError) { int r; zint pos = (sysno-1)*itemsize; @@ -134,7 +109,7 @@ static int read_indx(recindex_t p, zint sysno, void *buf, int itemsize, return r; } -static void write_indx(recindex_t p, zint sysno, void *buf, int itemsize) +void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize) { zint pos = (sysno-1)*itemsize; int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); @@ -149,871 +124,6 @@ static void write_indx(recindex_t p, zint sysno, void *buf, int itemsize) (char*) buf + sz1); } -static ZEBRA_RES rec_release_blocks(Records p, zint sysno) -{ - struct record_index_entry entry; - zint freeblock; - char block_and_ref[sizeof(zint) + sizeof(short)]; - int dst_type; - int first = 1; - - if (read_indx(p->recindex, sysno, &entry, sizeof(entry), 1) != 1) - return ZEBRA_FAIL; - - freeblock = entry.next; - assert(freeblock > 0); - dst_type = CAST_ZINT_TO_INT(freeblock & 7); - assert(dst_type < REC_BLOCK_TYPES); - freeblock = freeblock / 8; - while (freeblock) - { - if (bf_read(p->data_BFile[dst_type], freeblock, 0, - first ? sizeof(block_and_ref) : sizeof(zint), - block_and_ref) != 1) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single"); - return ZEBRA_FAIL; - } - if (first) - { - short ref; - memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); - --ref; - memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); - if (ref) - { - if (bf_write(p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref)) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); - return ZEBRA_FAIL; - } - return ZEBRA_OK; - } - first = 0; - } - - if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), - &p->head.block_free[dst_type])) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); - return ZEBRA_FAIL; - } - p->head.block_free[dst_type] = freeblock; - memcpy(&freeblock, block_and_ref, sizeof(freeblock)); - - p->head.block_used[dst_type]--; - } - p->head.total_bytes -= entry.size; - return ZEBRA_OK; -} - -static ZEBRA_RES rec_delete_single(Records p, Record rec) -{ - struct record_index_entry entry; - - /* all data in entry must be reset, since it's written verbatim */ - memset(&entry, '\0', sizeof(entry)); - if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK) - return ZEBRA_FAIL; - - entry.next = p->head.index_free; - entry.size = 0; - p->head.index_free = rec_sysno_to_int(rec->sysno); - write_indx(p->recindex, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry)); - return ZEBRA_OK; -} - -static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos) -{ - struct record_index_entry entry; - int no_written = 0; - char *cptr = p->tmp_buf; - zint block_prev = -1, block_free; - int dst_type = 0; - int i; - - /* all data in entry must be reset, since it's written verbatim */ - memset(&entry, '\0', sizeof(entry)); - - for (i = 1; i= p->head.block_move[i]) - dst_type = i; - while (no_written < size) - { - block_free = p->head.block_free[dst_type]; - if (block_free) - { - if (bf_read(p->data_BFile[dst_type], - block_free, 0, sizeof(*p->head.block_free), - &p->head.block_free[dst_type]) != 1) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " - ZINT_FORMAT, - p->data_fname[dst_type], block_free); - return ZEBRA_FAIL; - } - } - else - block_free = p->head.block_last[dst_type]++; - if (block_prev == -1) - { - entry.next = block_free*8 + dst_type; - entry.size = size; - p->head.total_bytes += size; - while (*sysnos > 0) - { - write_indx(p->recindex, *sysnos, &entry, sizeof(entry)); - sysnos++; - } - } - else - { - memcpy(cptr, &block_free, sizeof(block_free)); - bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr); - cptr = p->tmp_buf + no_written; - } - block_prev = block_free; - no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) - - sizeof(zint); - p->head.block_used[dst_type]++; - } - assert(block_prev != -1); - block_free = 0; - memcpy(cptr, &block_free, sizeof(block_free)); - bf_write(p->data_BFile[dst_type], block_prev, 0, - sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); - return ZEBRA_OK; -} - -recindex_t recindex_open(BFiles bfs, int rw) -{ - recindex_t p = xmalloc(sizeof(*p)); - p->index_fname = "reci"; - p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); - if (p->index_BFile == NULL) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); - xfree(p); - return 0; - } - return p; -} - -void recindex_close(recindex_t p) -{ - if (p) - { - if (p->index_BFile) - bf_close(p->index_BFile); - xfree(p); - } -} - - -Records rec_open(BFiles bfs, int rw, int compression_method) -{ - Records p; - int i, r; - int version; - ZEBRA_RES ret = ZEBRA_OK; - - p = (Records) xmalloc(sizeof(*p)); - memset(&p->head, '\0', sizeof(p->head)); - p->compression_method = compression_method; - p->rw = rw; - p->tmp_size = 1024; - p->recindex = recindex_open(bfs, rw); - p->tmp_buf = (char *) xmalloc(p->tmp_size); - r = rec_read_head(p->recindex, p->tmp_buf); - switch (r) - { - case 0: - memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); - sprintf(p->head.version, "%3d", REC_VERSION); - p->head.index_free = 0; - p->head.index_last = 1; - p->head.no_records = 0; - p->head.total_bytes = 0; - for (i = 0; ihead.block_free[i] = 0; - p->head.block_last[i] = 1; - p->head.block_used[i] = 0; - } - p->head.block_size[0] = 128; - p->head.block_move[0] = 0; - for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 24; - } - if (rw) - { - if (rec_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK) - ret = ZEBRA_FAIL; - } - break; - case 1: - memcpy(&p->head, p->tmp_buf, sizeof(p->head)); - if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) - { - yaz_log(YLOG_FATAL, "file %s has bad format", - recindex_get_fname(p->recindex)); - ret = ZEBRA_FAIL; - } - version = atoi(p->head.version); - if (version != REC_VERSION) - { - yaz_log(YLOG_FATAL, "file %s is version %d, but version" - " %d is required", - recindex_get_fname(p->recindex), version, REC_VERSION); - ret = ZEBRA_FAIL; - } - break; - } - for (i = 0; idata_fname[i] = (char *) xmalloc(strlen(str)+1); - strcpy(p->data_fname[i], str); - p->data_BFile[i] = NULL; - } - for (i = 0; idata_BFile[i] = - bf_open(bfs, p->data_fname[i], - CAST_ZINT_TO_INT(p->head.block_size[i]), rw))) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]); - ret = ZEBRA_FAIL; - break; - } - } - p->cache_max = 400; - p->cache_cur = 0; - p->record_cache = (struct record_cache_entry *) - xmalloc(sizeof(*p->record_cache)*p->cache_max); - zebra_mutex_init(&p->mutex); - if (ret == ZEBRA_FAIL) - rec_close(&p); - return p; -} - -static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len) -{ - (*len) = 0; - while (n > 127) - { - buf[*len] = 128 + (n & 127); - n = n >> 7; - (*len)++; - } - buf[*len] = n; - (*len)++; -} - -static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) -{ - unsigned n = 0; - unsigned w = 1; - (*len) = 0; - - while (buf[*len] > 127) - { - n += w*(buf[*len] & 127); - w = w << 7; - (*len)++; - } - n += w * buf[*len]; - (*len)++; - *np = n; -} - -static void rec_encode_zint(zint n, unsigned char *buf, int *len) -{ - (*len) = 0; - while (n > 127) - { - buf[*len] = (unsigned) (128 + (n & 127)); - n = n >> 7; - (*len)++; - } - buf[*len] = (unsigned) n; - (*len)++; -} - -static void rec_decode_zint(zint *np, unsigned char *buf, int *len) -{ - zint n = 0; - zint w = 1; - (*len) = 0; - - while (buf[*len] > 127) - { - n += w*(buf[*len] & 127); - w = w << 7; - (*len)++; - } - n += w * buf[*len]; - (*len)++; - *np = n; -} - -static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, - char **out_buf, int *out_size, - int *out_offset) -{ - int i; - int len; - - for (i = 0; isize[i]) + 20 > *out_size) - { - int new_size = *out_offset + rec->size[i] + 65536; - char *np = (char *) xmalloc(new_size); - if (*out_offset) - memcpy(np, *out_buf, *out_offset); - xfree(*out_buf); - *out_size = new_size; - *out_buf = np; - } - if (i == 0) - { - rec_encode_zint(rec_sysno_to_int(rec->sysno), - (unsigned char *) *out_buf + *out_offset, &len); - (*out_offset) += len; - } - if (rec->size[i] == 0) - { - rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - } - else if (last_rec && rec->size[i] == last_rec->size[i] && - !memcmp(rec->info[i], last_rec->info[i], rec->size[i])) - { - rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - } - else - { - rec_encode_unsigned(rec->size[i]+1, - (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]); - (*out_offset) += rec->size[i]; - } - } -} - -static ZEBRA_RES rec_write_multiple(Records p, int saveCount) -{ - int i; - short ref_count = 0; - char compression_method; - Record last_rec = 0; - int out_size = 1000; - int out_offset = 0; - char *out_buf = (char *) xmalloc(out_size); - zint *sysnos = (zint *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1)); - zint *sysnop = sysnos; - ZEBRA_RES ret = ZEBRA_OK; - - for (i = 0; icache_cur - saveCount; i++) - { - struct record_cache_entry *e = p->record_cache + i; - switch (e->flag) - { - case recordFlagNew: - rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = rec_sysno_to_int(e->rec->sysno); - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagWrite: - if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno)) - != ZEBRA_OK) - ret = ZEBRA_FAIL; - - rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = rec_sysno_to_int(e->rec->sysno); - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagDelete: - if (rec_delete_single(p, e->rec) != ZEBRA_OK) - ret = ZEBRA_FAIL; - - e->flag = recordFlagNop; - break; - default: - break; - } - } - - *sysnop = -1; - if (ref_count) - { - unsigned int csize = 0; /* indicate compression "not performed yet" */ - compression_method = p->compression_method; - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - csize = out_offset + (out_offset >> 6) + 620; - rec_tmp_expand(p, csize); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffCompress -#else - i = bzBuffToBuffCompress -#endif - (p->tmp_buf+sizeof(zint)+sizeof(short)+ - sizeof(char), - &csize, out_buf, out_offset, 1, 0, 30); - if (i != BZ_OK) - { - yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); - csize = 0; - } - yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, - csize); -#endif - break; - case REC_COMPRESS_NONE: - break; - } - if (!csize) - { - /* either no compression or compression not supported ... */ - csize = out_offset; - rec_tmp_expand(p, csize); - memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), - out_buf, out_offset); - csize = out_offset; - compression_method = REC_COMPRESS_NONE; - } - memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); - memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), - &compression_method, sizeof(compression_method)); - - /* -------- compression */ - if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) - != ZEBRA_OK) - ret = ZEBRA_FAIL; - } - xfree(out_buf); - xfree(sysnos); - return ret; -} - -static ZEBRA_RES rec_cache_flush(Records p, int saveCount) -{ - int i, j; - ZEBRA_RES ret; - - if (saveCount >= p->cache_cur) - saveCount = 0; - - ret = rec_write_multiple(p, saveCount); - - for (i = 0; icache_cur - saveCount; i++) - { - struct record_cache_entry *e = p->record_cache + i; - rec_free(&e->rec); - } - /* i still being used ... */ - for (j = 0; jrecord_cache+j, p->record_cache+i, - sizeof(*p->record_cache)); - p->cache_cur = saveCount; - return ret; -} - -static Record *rec_cache_lookup(Records p, zint sysno, - enum recordCacheFlag flag) -{ - int i; - for (i = 0; icache_cur; i++) - { - struct record_cache_entry *e = p->record_cache + i; - if (e->rec->sysno == sysno) - { - if (flag != recordFlagNop && e->flag == recordFlagNop) - e->flag = flag; - return &e->rec; - } - } - return NULL; -} - -static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag) -{ - struct record_cache_entry *e; - ZEBRA_RES ret = ZEBRA_OK; - - if (p->cache_cur == p->cache_max) - ret = rec_cache_flush(p, 1); - else if (p->cache_cur > 0) - { - int i, j; - int used = 0; - for (i = 0; icache_cur; i++) - { - Record r = (p->record_cache + i)->rec; - for (j = 0; jsize[j]; - } - if (used > 90000) - ret = rec_cache_flush(p, 1); - } - assert(p->cache_cur < p->cache_max); - - e = p->record_cache + (p->cache_cur)++; - e->flag = flag; - e->rec = rec_cp(rec); - return ret; -} - -ZEBRA_RES rec_close(Records *pp) -{ - Records p = *pp; - int i; - ZEBRA_RES ret = ZEBRA_OK; - - if (!p) - return ret; - - zebra_mutex_destroy(&p->mutex); - if (rec_cache_flush(p, 0) != ZEBRA_OK) - ret = ZEBRA_FAIL; - - xfree(p->record_cache); - - if (p->rw) - { - if (rec_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK) - ret = ZEBRA_FAIL; - } - - recindex_close(p->recindex); - - for (i = 0; idata_BFile[i]) - bf_close(p->data_BFile[i]); - xfree(p->data_fname[i]); - } - xfree(p->tmp_buf); - xfree(p); - *pp = NULL; - return ret; -} - -static Record rec_get_int(Records p, zint sysno) -{ - int i, in_size, r; - Record rec, *recp; - struct record_index_entry entry; - zint freeblock; - int dst_type; - char *nptr, *cptr; - char *in_buf = 0; - char *bz_buf = 0; -#if HAVE_BZLIB_H - unsigned int bz_size; -#endif - char compression_method; - - assert(sysno > 0); - assert(p); - - if ((recp = rec_cache_lookup(p, sysno, recordFlagNop))) - return rec_cp(*recp); - - if (read_indx(p->recindex, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1) - return NULL; /* record is not there! */ - - if (!entry.size) - return NULL; /* record is deleted */ - - dst_type = (int) (entry.next & 7); - assert(dst_type < REC_BLOCK_TYPES); - freeblock = entry.next / 8; - - assert(freeblock > 0); - - rec_tmp_expand(p, entry.size); - - cptr = p->tmp_buf; - r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy(&freeblock, cptr, sizeof(freeblock)); - - while (freeblock) - { - zint tmp; - - cptr += p->head.block_size[dst_type] - sizeof(freeblock); - - memcpy(&tmp, cptr, sizeof(tmp)); - r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy(&freeblock, cptr, sizeof(freeblock)); - memcpy(cptr, &tmp, sizeof(tmp)); - } - - rec = (Record) xmalloc(sizeof(*rec)); - rec->sysno = sysno; - memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), - sizeof(compression_method)); - in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); - in_size = entry.size - sizeof(short) - sizeof(char); - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - bz_size = entry.size * 20 + 100; - while (1) - { - bz_buf = (char *) xmalloc(bz_size); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffDecompress -#else - i = bzBuffToBuffDecompress -#endif - (bz_buf, &bz_size, in_buf, in_size, 0, 0); - yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); - if (i == BZ_OK) - break; - yaz_log(YLOG_LOG, "failed"); - xfree(bz_buf); - bz_size *= 2; - } - in_buf = bz_buf; - in_size = bz_size; -#else - yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); - return 0; -#endif - break; - case REC_COMPRESS_NONE: - break; - } - for (i = 0; iinfo[i] = 0; - - nptr = in_buf; /* skip ref count */ - while (nptr < in_buf + in_size) - { - zint this_sysno; - int len; - rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len); - nptr += len; - - for (i = 0; i < REC_NO_INFO; i++) - { - unsigned int this_size; - rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len); - nptr += len; - - if (this_size == 0) - continue; - rec->size[i] = this_size-1; - - if (rec->size[i]) - { - rec->info[i] = nptr; - nptr += rec->size[i]; - } - else - rec->info[i] = NULL; - } - if (this_sysno == rec_sysno_to_int(sysno)) - break; - } - for (i = 0; iinfo[i] && rec->size[i]) - { - char *np = xmalloc(rec->size[i]+1); - memcpy(np, rec->info[i], rec->size[i]); - np[rec->size[i]] = '\0'; - rec->info[i] = np; - } - else - { - assert(rec->info[i] == 0); - assert(rec->size[i] == 0); - } - } - xfree(bz_buf); - if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK) - return 0; - return rec; -} - -Record rec_get(Records p, zint sysno) -{ - Record rec; - zebra_mutex_lock(&p->mutex); - - rec = rec_get_int(p, sysno); - zebra_mutex_unlock(&p->mutex); - return rec; -} - -Record rec_get_root(Records p) -{ - return rec_get(p, rec_sysno_to_ext(1)); -} - -static Record rec_new_int(Records p) -{ - int i; - zint sysno; - Record rec; - - assert(p); - rec = (Record) xmalloc(sizeof(*rec)); - if (1 || p->head.index_free == 0) - sysno = (p->head.index_last)++; - else - { - struct record_index_entry entry; - - if (read_indx(p->recindex, p->head.index_free, &entry, sizeof(entry), 0) < 1) - { - xfree(rec); - return 0; - } - sysno = p->head.index_free; - p->head.index_free = entry.next; - } - (p->head.no_records)++; - rec->sysno = rec_sysno_to_ext(sysno); - for (i = 0; i < REC_NO_INFO; i++) - { - rec->info[i] = NULL; - rec->size[i] = 0; - } - rec_cache_insert(p, rec, recordFlagNew); - return rec; -} - -Record rec_new(Records p) -{ - Record rec; - zebra_mutex_lock(&p->mutex); - - rec = rec_new_int(p); - zebra_mutex_unlock(&p->mutex); - return rec; -} - -ZEBRA_RES rec_del(Records p, Record *recpp) -{ - Record *recp; - ZEBRA_RES ret = ZEBRA_OK; - - zebra_mutex_lock(&p->mutex); - (p->head.no_records)--; - if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete))) - { - rec_free(recp); - *recp = *recpp; - } - else - { - ret = rec_cache_insert(p, *recpp, recordFlagDelete); - rec_free(recpp); - } - zebra_mutex_unlock(&p->mutex); - *recpp = NULL; - return ret; -} - -ZEBRA_RES rec_put(Records p, Record *recpp) -{ - Record *recp; - ZEBRA_RES ret = ZEBRA_OK; - - zebra_mutex_lock(&p->mutex); - if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite))) - { - rec_free(recp); - *recp = *recpp; - } - else - { - ret = rec_cache_insert(p, *recpp, recordFlagWrite); - rec_free(recpp); - } - zebra_mutex_unlock(&p->mutex); - *recpp = NULL; - return ret; -} - -void rec_free(Record *recpp) -{ - int i; - - if (!*recpp) - return ; - for (i = 0; i < REC_NO_INFO; i++) - xfree((*recpp)->info[i]); - xfree(*recpp); - *recpp = NULL; -} - -Record rec_cp(Record rec) -{ - Record n; - int i; - - n = (Record) xmalloc(sizeof(*n)); - n->sysno = rec->sysno; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - { - n->info[i] = NULL; - n->size[i] = 0; - } - else - { - n->size[i] = rec->size[i]; - n->info[i] = (char *) xmalloc(rec->size[i]+1); - memcpy(n->info[i], rec->info[i], rec->size[i]); - n->info[i][rec->size[i]] = '\0'; - } - return n; -} - - -char *rec_strdup(const char *s, size_t *len) -{ - char *p; - - if (!s) - { - *len = 0; - return NULL; - } - *len = strlen(s)+1; - p = (char *) xmalloc(*len); - strcpy(p, s); - return p; -} /* * Local variables: diff --git a/index/recindex.h b/index/recindex.h index fce488f..b92bea7 100644 --- a/index/recindex.h +++ b/index/recindex.h @@ -1,4 +1,4 @@ -/* $Id: recindex.h,v 1.31 2007-01-15 20:08:25 adam Exp $ +/* $Id: recindex.h,v 1.32 2007-11-23 13:52:52 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -107,6 +107,17 @@ enum { recInfo_sortKeys }; +typedef struct recindex *recindex_t; + +recindex_t recindex_open(BFiles bfs, int rw); +void recindex_close(recindex_t p); +int recindex_read_head(recindex_t p, void *buf); +const char *recindex_get_fname(recindex_t p); +ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len); +int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, + int ignoreError); +void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize); + YAZ_END_CDECL #endif /* diff --git a/index/recindxp.h b/index/recindxp.h index 13452b4..a38ceb4 100644 --- a/index/recindxp.h +++ b/index/recindxp.h @@ -1,4 +1,4 @@ -/* $Id: recindxp.h,v 1.22 2007-11-23 13:11:08 adam Exp $ +/* $Id: recindxp.h,v 1.23 2007-11-23 13:52:52 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -30,13 +30,6 @@ YAZ_BEGIN_CDECL #define REC_HEAD_MAGIC "recindex" #define REC_VERSION 5 -struct recindex { - char *index_fname; - BFile index_BFile; -}; - -typedef struct recindex *recindex_t; - struct records_info { int rw; int compression_method; diff --git a/index/records.c b/index/records.c new file mode 100644 index 0000000..4a24e35 --- /dev/null +++ b/index/records.c @@ -0,0 +1,934 @@ +/* $Id: records.c,v 1.1 2007-11-23 13:52:52 adam Exp $ + Copyright (C) 1995-2007 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +/* + * Format of first block + * next (8 bytes) + * ref_count (2 bytes) + * block (500 bytes) + * + * Format of subsequent blocks + * next (8 bytes) + * block (502 bytes) + * + * Format of each record + * sysno + * (length, data) - pairs + * length = 0 if same as previous + */ +#include +#include +#include +#include + +#include +#include "recindxp.h" + +#if HAVE_BZLIB_H +#include +#endif + +/* Modify argument to if below: 1=normal, 0=sysno testing */ +#if 1 +/* If this is used sysno are not converted (no testing) */ +#define FAKE_OFFSET 0 +#define USUAL_RANGE 6000000000LL + +#else +/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */ +#define FAKE_OFFSET 6000000000LL +#define USUAL_RANGE 2000000000LL +#endif + +static zint rec_sysno_to_ext(zint sysno) +{ + assert(sysno >= 0 && sysno <= USUAL_RANGE); + return sysno + FAKE_OFFSET; +} + +zint rec_sysno_to_int(zint sysno) +{ + assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE); + return sysno - FAKE_OFFSET; +} + +static void rec_tmp_expand(Records p, int size) +{ + if (p->tmp_size < size + 2048 || + p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) + { + xfree(p->tmp_buf); + p->tmp_size = size + (int) + (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048; + p->tmp_buf = (char *) xmalloc(p->tmp_size); + } +} + +static ZEBRA_RES rec_release_blocks(Records p, zint sysno) +{ + struct record_index_entry entry; + zint freeblock; + char block_and_ref[sizeof(zint) + sizeof(short)]; + int dst_type; + int first = 1; + + if (recindex_read_indx(p->recindex, sysno, &entry, sizeof(entry), 1) != 1) + return ZEBRA_FAIL; + + freeblock = entry.next; + assert(freeblock > 0); + dst_type = CAST_ZINT_TO_INT(freeblock & 7); + assert(dst_type < REC_BLOCK_TYPES); + freeblock = freeblock / 8; + while (freeblock) + { + if (bf_read(p->data_BFile[dst_type], freeblock, 0, + first ? sizeof(block_and_ref) : sizeof(zint), + block_and_ref) != 1) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single"); + return ZEBRA_FAIL; + } + if (first) + { + short ref; + memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); + --ref; + memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); + if (ref) + { + if (bf_write(p->data_BFile[dst_type], freeblock, 0, + sizeof(block_and_ref), block_and_ref)) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); + return ZEBRA_FAIL; + } + return ZEBRA_OK; + } + first = 0; + } + + if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), + &p->head.block_free[dst_type])) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); + return ZEBRA_FAIL; + } + p->head.block_free[dst_type] = freeblock; + memcpy(&freeblock, block_and_ref, sizeof(freeblock)); + + p->head.block_used[dst_type]--; + } + p->head.total_bytes -= entry.size; + return ZEBRA_OK; +} + +static ZEBRA_RES rec_delete_single(Records p, Record rec) +{ + struct record_index_entry entry; + + /* all data in entry must be reset, since it's written verbatim */ + memset(&entry, '\0', sizeof(entry)); + if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK) + return ZEBRA_FAIL; + + entry.next = p->head.index_free; + entry.size = 0; + p->head.index_free = rec_sysno_to_int(rec->sysno); + recindex_write_indx(p->recindex, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry)); + return ZEBRA_OK; +} + +static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos) +{ + struct record_index_entry entry; + int no_written = 0; + char *cptr = p->tmp_buf; + zint block_prev = -1, block_free; + int dst_type = 0; + int i; + + /* all data in entry must be reset, since it's written verbatim */ + memset(&entry, '\0', sizeof(entry)); + + for (i = 1; i= p->head.block_move[i]) + dst_type = i; + while (no_written < size) + { + block_free = p->head.block_free[dst_type]; + if (block_free) + { + if (bf_read(p->data_BFile[dst_type], + block_free, 0, sizeof(*p->head.block_free), + &p->head.block_free[dst_type]) != 1) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " + ZINT_FORMAT, + p->data_fname[dst_type], block_free); + return ZEBRA_FAIL; + } + } + else + block_free = p->head.block_last[dst_type]++; + if (block_prev == -1) + { + entry.next = block_free*8 + dst_type; + entry.size = size; + p->head.total_bytes += size; + while (*sysnos > 0) + { + recindex_write_indx(p->recindex, *sysnos, &entry, sizeof(entry)); + sysnos++; + } + } + else + { + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr); + cptr = p->tmp_buf + no_written; + } + block_prev = block_free; + no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) + - sizeof(zint); + p->head.block_used[dst_type]++; + } + assert(block_prev != -1); + block_free = 0; + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, + sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); + return ZEBRA_OK; +} + +Records rec_open(BFiles bfs, int rw, int compression_method) +{ + Records p; + int i, r; + int version; + ZEBRA_RES ret = ZEBRA_OK; + + p = (Records) xmalloc(sizeof(*p)); + memset(&p->head, '\0', sizeof(p->head)); + p->compression_method = compression_method; + p->rw = rw; + p->tmp_size = 1024; + p->recindex = recindex_open(bfs, rw); + p->tmp_buf = (char *) xmalloc(p->tmp_size); + r = recindex_read_head(p->recindex, p->tmp_buf); + switch (r) + { + case 0: + memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); + sprintf(p->head.version, "%3d", REC_VERSION); + p->head.index_free = 0; + p->head.index_last = 1; + p->head.no_records = 0; + p->head.total_bytes = 0; + for (i = 0; ihead.block_free[i] = 0; + p->head.block_last[i] = 1; + p->head.block_used[i] = 0; + } + p->head.block_size[0] = 128; + p->head.block_move[0] = 0; + for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; + p->head.block_move[i] = p->head.block_size[i] * 24; + } + if (rw) + { + if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK) + ret = ZEBRA_FAIL; + } + break; + case 1: + memcpy(&p->head, p->tmp_buf, sizeof(p->head)); + if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) + { + yaz_log(YLOG_FATAL, "file %s has bad format", + recindex_get_fname(p->recindex)); + ret = ZEBRA_FAIL; + } + version = atoi(p->head.version); + if (version != REC_VERSION) + { + yaz_log(YLOG_FATAL, "file %s is version %d, but version" + " %d is required", + recindex_get_fname(p->recindex), version, REC_VERSION); + ret = ZEBRA_FAIL; + } + break; + } + for (i = 0; idata_fname[i] = (char *) xmalloc(strlen(str)+1); + strcpy(p->data_fname[i], str); + p->data_BFile[i] = NULL; + } + for (i = 0; idata_BFile[i] = + bf_open(bfs, p->data_fname[i], + CAST_ZINT_TO_INT(p->head.block_size[i]), rw))) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]); + ret = ZEBRA_FAIL; + break; + } + } + p->cache_max = 400; + p->cache_cur = 0; + p->record_cache = (struct record_cache_entry *) + xmalloc(sizeof(*p->record_cache)*p->cache_max); + zebra_mutex_init(&p->mutex); + if (ret == ZEBRA_FAIL) + rec_close(&p); + return p; +} + +static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len) +{ + (*len) = 0; + while (n > 127) + { + buf[*len] = 128 + (n & 127); + n = n >> 7; + (*len)++; + } + buf[*len] = n; + (*len)++; +} + +static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) +{ + unsigned n = 0; + unsigned w = 1; + (*len) = 0; + + while (buf[*len] > 127) + { + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; + } + n += w * buf[*len]; + (*len)++; + *np = n; +} + +static void rec_encode_zint(zint n, unsigned char *buf, int *len) +{ + (*len) = 0; + while (n > 127) + { + buf[*len] = (unsigned) (128 + (n & 127)); + n = n >> 7; + (*len)++; + } + buf[*len] = (unsigned) n; + (*len)++; +} + +static void rec_decode_zint(zint *np, unsigned char *buf, int *len) +{ + zint n = 0; + zint w = 1; + (*len) = 0; + + while (buf[*len] > 127) + { + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; + } + n += w * buf[*len]; + (*len)++; + *np = n; +} + +static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, + char **out_buf, int *out_size, + int *out_offset) +{ + int i; + int len; + + for (i = 0; isize[i]) + 20 > *out_size) + { + int new_size = *out_offset + rec->size[i] + 65536; + char *np = (char *) xmalloc(new_size); + if (*out_offset) + memcpy(np, *out_buf, *out_offset); + xfree(*out_buf); + *out_size = new_size; + *out_buf = np; + } + if (i == 0) + { + rec_encode_zint(rec_sysno_to_int(rec->sysno), + (unsigned char *) *out_buf + *out_offset, &len); + (*out_offset) += len; + } + if (rec->size[i] == 0) + { + rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + } + else if (last_rec && rec->size[i] == last_rec->size[i] && + !memcmp(rec->info[i], last_rec->info[i], rec->size[i])) + { + rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + } + else + { + rec_encode_unsigned(rec->size[i]+1, + (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]); + (*out_offset) += rec->size[i]; + } + } +} + +static ZEBRA_RES rec_write_multiple(Records p, int saveCount) +{ + int i; + short ref_count = 0; + char compression_method; + Record last_rec = 0; + int out_size = 1000; + int out_offset = 0; + char *out_buf = (char *) xmalloc(out_size); + zint *sysnos = (zint *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1)); + zint *sysnop = sysnos; + ZEBRA_RES ret = ZEBRA_OK; + + for (i = 0; icache_cur - saveCount; i++) + { + struct record_cache_entry *e = p->record_cache + i; + switch (e->flag) + { + case recordFlagNew: + rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, + &out_size, &out_offset); + *sysnop++ = rec_sysno_to_int(e->rec->sysno); + ref_count++; + e->flag = recordFlagNop; + last_rec = e->rec; + break; + case recordFlagWrite: + if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno)) + != ZEBRA_OK) + ret = ZEBRA_FAIL; + + rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, + &out_size, &out_offset); + *sysnop++ = rec_sysno_to_int(e->rec->sysno); + ref_count++; + e->flag = recordFlagNop; + last_rec = e->rec; + break; + case recordFlagDelete: + if (rec_delete_single(p, e->rec) != ZEBRA_OK) + ret = ZEBRA_FAIL; + + e->flag = recordFlagNop; + break; + default: + break; + } + } + + *sysnop = -1; + if (ref_count) + { + unsigned int csize = 0; /* indicate compression "not performed yet" */ + compression_method = p->compression_method; + switch (compression_method) + { + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + rec_tmp_expand(p, csize); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress +#else + i = bzBuffToBuffCompress +#endif + (p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &csize, out_buf, out_offset, 1, 0, 30); + if (i != BZ_OK) + { + yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); + csize = 0; + } + yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, + csize); +#endif + break; + case REC_COMPRESS_NONE: + break; + } + if (!csize) + { + /* either no compression or compression not supported ... */ + csize = out_offset; + rec_tmp_expand(p, csize); + memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), + out_buf, out_offset); + csize = out_offset; + compression_method = REC_COMPRESS_NONE; + } + memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); + memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), + &compression_method, sizeof(compression_method)); + + /* -------- compression */ + if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) + != ZEBRA_OK) + ret = ZEBRA_FAIL; + } + xfree(out_buf); + xfree(sysnos); + return ret; +} + +static ZEBRA_RES rec_cache_flush(Records p, int saveCount) +{ + int i, j; + ZEBRA_RES ret; + + if (saveCount >= p->cache_cur) + saveCount = 0; + + ret = rec_write_multiple(p, saveCount); + + for (i = 0; icache_cur - saveCount; i++) + { + struct record_cache_entry *e = p->record_cache + i; + rec_free(&e->rec); + } + /* i still being used ... */ + for (j = 0; jrecord_cache+j, p->record_cache+i, + sizeof(*p->record_cache)); + p->cache_cur = saveCount; + return ret; +} + +static Record *rec_cache_lookup(Records p, zint sysno, + enum recordCacheFlag flag) +{ + int i; + for (i = 0; icache_cur; i++) + { + struct record_cache_entry *e = p->record_cache + i; + if (e->rec->sysno == sysno) + { + if (flag != recordFlagNop && e->flag == recordFlagNop) + e->flag = flag; + return &e->rec; + } + } + return NULL; +} + +static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag) +{ + struct record_cache_entry *e; + ZEBRA_RES ret = ZEBRA_OK; + + if (p->cache_cur == p->cache_max) + ret = rec_cache_flush(p, 1); + else if (p->cache_cur > 0) + { + int i, j; + int used = 0; + for (i = 0; icache_cur; i++) + { + Record r = (p->record_cache + i)->rec; + for (j = 0; jsize[j]; + } + if (used > 90000) + ret = rec_cache_flush(p, 1); + } + assert(p->cache_cur < p->cache_max); + + e = p->record_cache + (p->cache_cur)++; + e->flag = flag; + e->rec = rec_cp(rec); + return ret; +} + +ZEBRA_RES rec_close(Records *pp) +{ + Records p = *pp; + int i; + ZEBRA_RES ret = ZEBRA_OK; + + if (!p) + return ret; + + zebra_mutex_destroy(&p->mutex); + if (rec_cache_flush(p, 0) != ZEBRA_OK) + ret = ZEBRA_FAIL; + + xfree(p->record_cache); + + if (p->rw) + { + if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK) + ret = ZEBRA_FAIL; + } + + recindex_close(p->recindex); + + for (i = 0; idata_BFile[i]) + bf_close(p->data_BFile[i]); + xfree(p->data_fname[i]); + } + xfree(p->tmp_buf); + xfree(p); + *pp = NULL; + return ret; +} + +static Record rec_get_int(Records p, zint sysno) +{ + int i, in_size, r; + Record rec, *recp; + struct record_index_entry entry; + zint freeblock; + int dst_type; + char *nptr, *cptr; + char *in_buf = 0; + char *bz_buf = 0; +#if HAVE_BZLIB_H + unsigned int bz_size; +#endif + char compression_method; + + assert(sysno > 0); + assert(p); + + if ((recp = rec_cache_lookup(p, sysno, recordFlagNop))) + return rec_cp(*recp); + + if (recindex_read_indx(p->recindex, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1) + return NULL; /* record is not there! */ + + if (!entry.size) + return NULL; /* record is deleted */ + + dst_type = (int) (entry.next & 7); + assert(dst_type < REC_BLOCK_TYPES); + freeblock = entry.next / 8; + + assert(freeblock > 0); + + rec_tmp_expand(p, entry.size); + + cptr = p->tmp_buf; + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); + + while (freeblock) + { + zint tmp; + + cptr += p->head.block_size[dst_type] - sizeof(freeblock); + + memcpy(&tmp, cptr, sizeof(tmp)); + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); + memcpy(cptr, &tmp, sizeof(tmp)); + } + + rec = (Record) xmalloc(sizeof(*rec)); + rec->sysno = sysno; + memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), + sizeof(compression_method)); + in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); + in_size = entry.size - sizeof(short) - sizeof(char); + switch (compression_method) + { + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + bz_size = entry.size * 20 + 100; + while (1) + { + bz_buf = (char *) xmalloc(bz_size); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffDecompress +#else + i = bzBuffToBuffDecompress +#endif + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); + if (i == BZ_OK) + break; + yaz_log(YLOG_LOG, "failed"); + xfree(bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; +#else + yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); + return 0; +#endif + break; + case REC_COMPRESS_NONE: + break; + } + for (i = 0; iinfo[i] = 0; + + nptr = in_buf; /* skip ref count */ + while (nptr < in_buf + in_size) + { + zint this_sysno; + int len; + rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len); + nptr += len; + + for (i = 0; i < REC_NO_INFO; i++) + { + unsigned int this_size; + rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len); + nptr += len; + + if (this_size == 0) + continue; + rec->size[i] = this_size-1; + + if (rec->size[i]) + { + rec->info[i] = nptr; + nptr += rec->size[i]; + } + else + rec->info[i] = NULL; + } + if (this_sysno == rec_sysno_to_int(sysno)) + break; + } + for (i = 0; iinfo[i] && rec->size[i]) + { + char *np = xmalloc(rec->size[i]+1); + memcpy(np, rec->info[i], rec->size[i]); + np[rec->size[i]] = '\0'; + rec->info[i] = np; + } + else + { + assert(rec->info[i] == 0); + assert(rec->size[i] == 0); + } + } + xfree(bz_buf); + if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK) + return 0; + return rec; +} + +Record rec_get(Records p, zint sysno) +{ + Record rec; + zebra_mutex_lock(&p->mutex); + + rec = rec_get_int(p, sysno); + zebra_mutex_unlock(&p->mutex); + return rec; +} + +Record rec_get_root(Records p) +{ + return rec_get(p, rec_sysno_to_ext(1)); +} + +static Record rec_new_int(Records p) +{ + int i; + zint sysno; + Record rec; + + assert(p); + rec = (Record) xmalloc(sizeof(*rec)); + if (1 || p->head.index_free == 0) + sysno = (p->head.index_last)++; + else + { + struct record_index_entry entry; + + if (recindex_read_indx(p->recindex, p->head.index_free, &entry, sizeof(entry), 0) < 1) + { + xfree(rec); + return 0; + } + sysno = p->head.index_free; + p->head.index_free = entry.next; + } + (p->head.no_records)++; + rec->sysno = rec_sysno_to_ext(sysno); + for (i = 0; i < REC_NO_INFO; i++) + { + rec->info[i] = NULL; + rec->size[i] = 0; + } + rec_cache_insert(p, rec, recordFlagNew); + return rec; +} + +Record rec_new(Records p) +{ + Record rec; + zebra_mutex_lock(&p->mutex); + + rec = rec_new_int(p); + zebra_mutex_unlock(&p->mutex); + return rec; +} + +ZEBRA_RES rec_del(Records p, Record *recpp) +{ + Record *recp; + ZEBRA_RES ret = ZEBRA_OK; + + zebra_mutex_lock(&p->mutex); + (p->head.no_records)--; + if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete))) + { + rec_free(recp); + *recp = *recpp; + } + else + { + ret = rec_cache_insert(p, *recpp, recordFlagDelete); + rec_free(recpp); + } + zebra_mutex_unlock(&p->mutex); + *recpp = NULL; + return ret; +} + +ZEBRA_RES rec_put(Records p, Record *recpp) +{ + Record *recp; + ZEBRA_RES ret = ZEBRA_OK; + + zebra_mutex_lock(&p->mutex); + if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite))) + { + rec_free(recp); + *recp = *recpp; + } + else + { + ret = rec_cache_insert(p, *recpp, recordFlagWrite); + rec_free(recpp); + } + zebra_mutex_unlock(&p->mutex); + *recpp = NULL; + return ret; +} + +void rec_free(Record *recpp) +{ + int i; + + if (!*recpp) + return ; + for (i = 0; i < REC_NO_INFO; i++) + xfree((*recpp)->info[i]); + xfree(*recpp); + *recpp = NULL; +} + +Record rec_cp(Record rec) +{ + Record n; + int i; + + n = (Record) xmalloc(sizeof(*n)); + n->sysno = rec->sysno; + for (i = 0; i < REC_NO_INFO; i++) + if (!rec->info[i]) + { + n->info[i] = NULL; + n->size[i] = 0; + } + else + { + n->size[i] = rec->size[i]; + n->info[i] = (char *) xmalloc(rec->size[i]+1); + memcpy(n->info[i], rec->info[i], rec->size[i]); + n->info[i][rec->size[i]] = '\0'; + } + return n; +} + + +char *rec_strdup(const char *s, size_t *len) +{ + char *p; + + if (!s) + { + *len = 0; + return NULL; + } + *len = strlen(s)+1; + p = (char *) xmalloc(*len); + strcpy(p, s); + return p; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4