isamc.h isamd.h isams.h mfile.h passwddb.h recctrl.h res.h rsbool.h rset.h \
rsisam.h rsisamc.h rsisams.h rsisamd.h rsm_or.h rsnull.h rstemp.h set.h \
sortidx.h str.h zebra-lock.h zebramap.h zebrautl.h zebraver.h isamb.h \
- rsbetween.h
+ rsbetween.h rsisamb.h
/*
- * $Id: isamb.h,v 1.1 2000-10-17 12:37:09 adam Exp $
+ * Copyright (C) 2000-2002, Index Data
+ * All rights reserved.
+ * $Id: isamb.h,v 1.2 2002-04-16 22:31:42 adam Exp $
*/
#ifndef ISAMB_H
#include <isamc.h>
typedef struct ISAMB_s *ISAMB;
+typedef struct ISAMB_PP_s *ISAMB_PP;
+typedef ISAMC_P ISAMB_P;
-ISAMB isamb_open (BFiles bfs, const char *name, ISAMC_M method);
+ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method);
void isamb_close (ISAMB isamb);
+ISAMB_P isamb_merge (ISAMB b, ISAMB_P pos, ISAMC_I data);
+
+ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos);
+
+int isamb_pp_read (ISAMB_PP pp, void *buf);
+
+void isamb_pp_close (ISAMB_PP pp);
+
+int isamb_pp_num (ISAMB_PP pp);
+
#endif
--- /dev/null
+/*
+ * Copyright (C) 2001-2002, Index Data
+ * All rights reserved.
+ *
+ * $Id: rsisamb.h,v 1.1 2002-04-16 22:31:42 adam Exp $
+ */
+
+#ifndef RSET_ISAMB_H
+#define RSET_ISAMB_H
+
+#include <rset.h>
+#include <isamb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const struct rset_control *rset_kind_isamb;
+
+typedef struct rset_isamb_parms
+{
+ int (*cmp)(const void *p1, const void *p2);
+ int key_size;
+ ISAMB is;
+ ISAMB_P pos;
+ RSET_TERM rset_term;
+} rset_isamb_parms;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
* Copyright (C) 1995-2002, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
- * $Id: index.h,v 1.81 2002-04-15 14:05:43 adam Exp $
+ * $Id: index.h,v 1.82 2002-04-16 22:31:42 adam Exp $
*/
#ifndef INDEX_H
#include <isam.h>
#include <isamc.h>
#include <isamd.h>
+#include <isamb.h>
#define ISAM_DEFAULT "c"
#include <yaz/data1.h>
#include <recctrl.h>
ISAM isam;
ISAMC isamc;
ISAMD isamd;
+ ISAMB isamb;
Dict dict;
Dict matchDict;
SortIdx sortIdx;
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
*
- * $Id: kinput.c,v 1.48 2002-04-05 08:46:26 adam Exp $
+ * $Id: kinput.c,v 1.49 2002-04-16 22:31:42 adam Exp $
*
* Bugs
* - Allocates a lot of memory for the merge process, but never releases it.
#define INP_BUF_START 60000
#define INP_BUF_ADD 400000
-static int no_diffs = 0;
-static int no_updates = 0;
-static int no_deletions = 0;
-static int no_insertions = 0;
-static int no_iterations = 0;
struct key_file {
int no; /* file no */
int heapnum;
int *ptr;
int (*cmp)(const void *p1, const void *p2);
- Dict dict;
- ISAMS isams;
- ISAM isam;
- ISAMC isamc;
- ISAMD isamd;
+ struct zebra_register *reg;
+
+ int no_diffs;
+ int no_updates;
+ int no_deletions;
+ int no_insertions;
+ int no_iterations;
};
struct heap_info *key_heap_init (int nkeys,
hi->ptr[i] = i;
hi->info.buf[i] = (char *) xmalloc (INP_NAME_MAX);
}
+ hi->no_diffs = 0;
+ hi->no_diffs = 0;
+ hi->no_updates = 0;
+ hi->no_deletions = 0;
+ hi->no_insertions = 0;
+ hi->no_iterations = 0;
return hi;
}
key_heap_delete (hi);
if ((r = key_file_read (kf, rbuf)))
key_heap_insert (hi, rbuf, r, kf);
- no_iterations++;
+ hi->no_iterations++;
return 1;
}
strcpy (this_name, hci.cur_name);
assert (hci.cur_name[1]);
- no_diffs++;
- if ((dict_info = dict_lookup (hi->dict, hci.cur_name)))
+ hi->no_diffs++;
+ if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
+ {
+ memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P));
+ isamc_p2 = isc_merge (hi->reg->isamc, isamc_p, isamc_i);
+ if (!isamc_p2)
+ {
+ hi->no_deletions++;
+ if (!dict_delete (hi->reg->dict, this_name))
+ abort();
+ }
+ else
+ {
+ hi->no_updates++;
+ if (isamc_p2 != isamc_p)
+ dict_insert (hi->reg->dict, this_name,
+ sizeof(ISAMC_P), &isamc_p2);
+ }
+ }
+ else
+ {
+ isamc_p = isc_merge (hi->reg->isamc, 0, isamc_i);
+ hi->no_insertions++;
+ dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p);
+ }
+ }
+ xfree (isamc_i);
+ xfree (hci.key);
+ return 0;
+}
+
+int heap_inpb (struct heap_info *hi)
+{
+ struct heap_cread_info hci;
+ ISAMC_I isamc_i = (ISAMC_I) xmalloc (sizeof(*isamc_i));
+
+ hci.key = (char *) xmalloc (KEY_SIZE);
+ hci.mode = 1;
+ hci.hi = hi;
+ hci.more = heap_read_one (hi, hci.cur_name, hci.key);
+
+ isamc_i->clientData = &hci;
+ isamc_i->read_item = heap_cread_item;
+
+ while (hci.more)
+ {
+ char this_name[INP_NAME_MAX];
+ ISAMC_P isamc_p, isamc_p2;
+ char *dict_info;
+
+ strcpy (this_name, hci.cur_name);
+ assert (hci.cur_name[1]);
+ hi->no_diffs++;
+ if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
{
memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P));
- isamc_p2 = isc_merge (hi->isamc, isamc_p, isamc_i);
+ isamc_p2 = isamb_merge (hi->reg->isamb, isamc_p, isamc_i);
if (!isamc_p2)
{
- no_deletions++;
- if (!dict_delete (hi->dict, this_name))
+ hi->no_deletions++;
+ if (!dict_delete (hi->reg->dict, this_name))
abort();
}
else
{
- no_updates++;
+ hi->no_updates++;
if (isamc_p2 != isamc_p)
- dict_insert (hi->dict, this_name,
+ dict_insert (hi->reg->dict, this_name,
sizeof(ISAMC_P), &isamc_p2);
}
}
else
{
- isamc_p = isc_merge (hi->isamc, 0, isamc_i);
- no_insertions++;
- dict_insert (hi->dict, this_name, sizeof(ISAMC_P), &isamc_p);
+ isamc_p = isamb_merge (hi->reg->isamb, 0, isamc_i);
+ hi->no_insertions++;
+ dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p);
}
}
xfree (isamc_i);
strcpy (this_name, hci.cur_name);
assert (hci.cur_name[1]);
- no_diffs++;
- if ((dict_info = dict_lookup (hi->dict, hci.cur_name)))
+ hi->no_diffs++;
+ if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
{
memcpy (&isamd_p, dict_info+1, sizeof(ISAMD_P));
- isamd_p2 = isamd_append (hi->isamd, isamd_p, isamd_i);
+ isamd_p2 = isamd_append (hi->reg->isamd, isamd_p, isamd_i);
if (!isamd_p2)
{
- no_deletions++;
- if (!dict_delete (hi->dict, this_name))
+ hi->no_deletions++;
+ if (!dict_delete (hi->reg->dict, this_name))
abort();
}
else
{
- no_updates++;
+ hi->no_updates++;
if (isamd_p2 != isamd_p)
- dict_insert (hi->dict, this_name,
+ dict_insert (hi->reg->dict, this_name,
sizeof(ISAMD_P), &isamd_p2);
}
}
else
{
- isamd_p = isamd_append (hi->isamd, 0, isamd_i);
- no_insertions++;
- dict_insert (hi->dict, this_name, sizeof(ISAMD_P), &isamd_p);
+ isamd_p = isamd_append (hi->reg->isamd, 0, isamd_i);
+ hi->no_insertions++;
+ dict_insert (hi->reg->dict, this_name, sizeof(ISAMD_P), &isamd_p);
}
}
xfree (isamd_i);
key_buf = new_key_buf;
}
}
- no_diffs++;
+ hi->no_diffs++;
nmemb = key_buf_ptr / KEY_SIZE;
assert (nmemb * (int) KEY_SIZE == key_buf_ptr);
- if ((info = dict_lookup (hi->dict, cur_name)))
+ if ((info = dict_lookup (hi->reg->dict, cur_name)))
{
ISAM_P isam_p, isam_p2;
memcpy (&isam_p, info+1, sizeof(ISAM_P));
- isam_p2 = is_merge (hi->isam, isam_p, nmemb, key_buf);
+ isam_p2 = is_merge (hi->reg->isam, isam_p, nmemb, key_buf);
if (!isam_p2)
{
- no_deletions++;
- if (!dict_delete (hi->dict, cur_name))
+ hi->no_deletions++;
+ if (!dict_delete (hi->reg->dict, cur_name))
abort ();
}
else
{
- no_updates++;
+ hi->no_updates++;
if (isam_p2 != isam_p)
- dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p2);
+ dict_insert (hi->reg->dict, cur_name,
+ sizeof(ISAM_P), &isam_p2);
}
}
else
{
ISAM_P isam_p;
- no_insertions++;
- isam_p = is_merge (hi->isam, 0, nmemb, key_buf);
- dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p);
+ hi->no_insertions++;
+ isam_p = is_merge (hi->reg->isam, 0, nmemb, key_buf);
+ dict_insert (hi->reg->dict, cur_name, sizeof(ISAM_P), &isam_p);
}
memcpy (key_buf, next_key, KEY_SIZE);
strcpy (cur_name, next_name);
strcpy (this_name, hci.cur_name);
assert (hci.cur_name[1]);
- no_diffs++;
- if (!(dict_info = dict_lookup (hi->dict, hci.cur_name)))
+ hi->no_diffs++;
+ if (!(dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
{
- isams_p = isams_merge (hi->isams, isams_i);
- no_insertions++;
- dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p);
+ isams_p = isams_merge (hi->reg->isams, isams_i);
+ hi->no_insertions++;
+ dict_insert (hi->reg->dict, this_name, sizeof(ISAMS_P), &isams_p);
}
else
{
progressInfo.totalOffset += kf[i]->buf_size;
}
hi = key_heap_init (nkeys, key_qsort_compare);
- hi->dict = zh->reg->dict;
- hi->isams = zh->reg->isams;
- hi->isam = zh->reg->isam;
- hi->isamc = zh->reg->isamc;
- hi->isamd = zh->reg->isamd;
+ hi->reg = zh->reg;
for (i = 1; i<=nkeys; i++)
if ((r = key_file_read (kf[i], rbuf)))
key_heap_insert (hi, rbuf, r, kf[i]);
if (zh->reg->isams)
heap_inps (hi);
- else if (zh->reg->isamc)
+ if (zh->reg->isamc)
heap_inpc (hi);
- else if (zh->reg->isam)
+ if (zh->reg->isam)
heap_inp (hi);
- else if (zh->reg->isamd)
+ if (zh->reg->isamd)
heap_inpd (hi);
+ if (zh->reg->isamb)
+ heap_inpb (hi);
for (i = 1; i<=nkeys; i++)
{
extract_get_fname_tmp (zh, rbuf, i);
unlink (rbuf);
}
- logf (LOG_LOG, "Iterations . . .%7d", no_iterations);
- logf (LOG_LOG, "Distinct words .%7d", no_diffs);
- logf (LOG_LOG, "Updates. . . . .%7d", no_updates);
- logf (LOG_LOG, "Deletions. . . .%7d", no_deletions);
- logf (LOG_LOG, "Insertions . . .%7d", no_insertions);
+ logf (LOG_LOG, "Iterations . . .%7d", hi->no_iterations);
+ logf (LOG_LOG, "Distinct words .%7d", hi->no_diffs);
+ logf (LOG_LOG, "Updates. . . . .%7d", hi->no_updates);
+ logf (LOG_LOG, "Deletions. . . .%7d", hi->no_deletions);
+ logf (LOG_LOG, "Insertions . . .%7d", hi->no_insertions);
zh->reg->key_file_no = 0;
key_heap_destroy (hi, nkeys);
/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2002, Index Data
* All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
+ * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
*
- * $Log: trunc.c,v $
- * Revision 1.23 2002-04-12 14:40:42 adam
- * Work on XPATH
- *
- * Revision 1.22 2002/04/05 08:46:26 adam
- * Zebra with full functionality
- *
- * Revision 1.21 2002/04/04 14:14:13 adam
- * Multiple registers (alpha early)
- *
- * Revision 1.20 2002/03/20 20:24:29 adam
- * Hits per term. Returned in SearchResult-1
- *
- * Revision 1.19 2001/01/16 16:56:15 heikki
- * Searching in my isam-d
- *
- * Revision 1.18 2000/05/18 12:01:36 adam
- * System call times(2) used again. More 64-bit fixes.
- *
- * Revision 1.17 2000/03/15 15:00:30 adam
- * First work on threaded version.
- *
- * Revision 1.16 1999/11/30 13:48:03 adam
- * Improved installation. Updated for inclusion of YAZ header files.
- *
- * Revision 1.15 1999/07/20 13:59:18 adam
- * Fixed bug that occurred when phrases had 0 hits.
- *
- * Revision 1.14 1999/05/26 07:49:13 adam
- * C++ compilation.
- *
- * Revision 1.13 1999/05/12 13:08:06 adam
- * First version of ISAMS.
- *
- * Revision 1.12 1999/02/02 14:51:10 adam
- * Updated WIN32 code specific sections. Changed header.
- *
- * Revision 1.11 1998/03/25 13:48:02 adam
- * Fixed bug in rset_trunc_r.
- *
- * Revision 1.10 1998/03/05 08:45:13 adam
- * New result set model and modular ranking system. Moved towards
- * descent server API. System information stored as "SGML" records.
- *
- * Revision 1.9 1998/01/12 15:04:09 adam
- * The test option (-s) only uses read-lock (and not write lock).
- *
- * Revision 1.8 1997/10/31 12:34:27 adam
- * Bug fix: memory leak.
- *
- * Revision 1.7 1997/09/29 09:07:29 adam
- * Minor change.
- *
- * Revision 1.6 1997/09/22 12:39:06 adam
- * Added get_pos method for the ranked result sets.
- *
- * Revision 1.5 1997/09/17 12:19:17 adam
- * Zebra version corresponds to YAZ version 1.4.
- * Changed Zebra server so that it doesn't depend on global common_resource.
- *
- * Revision 1.4 1996/12/23 15:30:44 adam
- * Work on truncation.
- * Bug fix: result sets weren't deleted after server shut down.
- *
- * Revision 1.3 1996/12/20 11:07:14 adam
- * Multi-or result set.
- *
- * Revision 1.2 1996/11/08 11:10:28 adam
- * Buffers used during file match got bigger.
- * Compressed ISAM support everywhere.
- * Bug fixes regarding masking characters in queries.
- * Redesigned Regexp-2 queries.
- *
- * Revision 1.1 1996/11/04 14:07:40 adam
- * Moved truncation code to trunc.c.
+ * $Id: trunc.c,v 1.24 2002-04-16 22:31:42 adam Exp $
*
*/
#include <stdio.h>
#include <rsisam.h>
#include <rsisamc.h>
#include <rsisamd.h>
+#include <rsisamb.h>
#if NEW_TRUNC
#include <rsm_or.h>
#endif
int n = ti->indx[ti->ptr[1]];
rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);
-#if 1
+ if (preserve_position)
+ {
/* section that preserve all keys */
- heap_delete (ti);
- if (is_readkey (ispt[n], ti->tmpbuf))
- heap_insert (ti, ti->tmpbuf, n);
+ heap_delete (ti);
+ if (is_readkey (ispt[n], ti->tmpbuf))
+ heap_insert (ti, ti->tmpbuf, n);
+ else
+ is_pt_free (ispt[n]);
+ }
else
- is_pt_free (ispt[n]);
-#else
-/* section that preserve all keys with unique sysnos */
- while (1)
{
- if (!is_readkey (ispt[n], ti->tmpbuf))
- {
- heap_delete (ti);
- is_pt_free (ispt[n]);
- break;
- }
- if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+/* section that preserve all keys with unique sysnos */
+ while (1)
{
- heap_delete (ti);
- heap_insert (ti, ti->tmpbuf, n);
- break;
+ if (!is_readkey (ispt[n], ti->tmpbuf))
+ {
+ heap_delete (ti);
+ is_pt_free (ispt[n]);
+ break;
+ }
+ if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+ {
+ heap_delete (ti);
+ heap_insert (ti, ti->tmpbuf, n);
+ break;
+ }
}
}
-#endif
}
heap_close (ti);
xfree (ispt);
heap_close (ti);
xfree (ispt);
}
-
else if (zi->reg->isams)
{
ISAMS_PP *ispt;
heap_close (ti);
xfree (ispt);
}
+ else if (zi->reg->isamb)
+ {
+ ISAMB_PP *ispt;
+ int i;
+ struct trunc_info *ti;
+
+ ispt = (ISAMB_PP *) xmalloc (sizeof(*ispt) * (to-from));
+
+ ti = heap_init (to-from, sizeof(struct it_key),
+ key_compare_it);
+ for (i = to-from; --i >= 0; )
+ {
+ ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i]);
+ if (isamb_pp_read (ispt[i], ti->tmpbuf))
+ heap_insert (ti, ti->tmpbuf, i);
+ else
+ isamb_pp_close (ispt[i]);
+ }
+ while (ti->heapnum)
+ {
+ int n = ti->indx[ti->ptr[1]];
+
+ rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);
+
+ if (preserve_position)
+ {
+ heap_delete (ti);
+ if (isamb_pp_read (ispt[n], ti->tmpbuf))
+ heap_insert (ti, ti->tmpbuf, n);
+ else
+ isamb_pp_close (ispt[n]);
+ }
+ else
+ {
+ while (1)
+ {
+ if (!isamb_pp_read (ispt[n], ti->tmpbuf))
+ {
+ heap_delete (ti);
+ isamb_pp_close (ispt[n]);
+ break;
+ }
+ if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+ {
+ heap_delete (ti);
+ heap_insert (ti, ti->tmpbuf, n);
+ break;
+ }
+ }
+ }
+ }
+ heap_close (ti);
+ xfree (ispt);
+ }
else
logf (LOG_WARN, "Unknown isam set in rset_trunc_r");
#endif
qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp);
}
+ else if (zi->reg->isamb)
+ {
+ if (no == 1)
+ {
+ rset_isamb_parms parms;
+
+ parms.key_size = sizeof(struct it_key);
+ parms.cmp = key_compare_it;
+ parms.pos = *isam_p;
+ parms.is = zi->reg->isamb;
+ parms.rset_term = rset_term_create (term, length, flags);
+ return rset_create (rset_kind_isamb, &parms);
+ }
+ qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp);
+ }
else
{
logf (LOG_WARN, "Unknown isam set in rset_trunc");
* Copyright (C) 1995-2002, Index Data
* All rights reserved.
*
- * $Id: zebraapi.c,v 1.56 2002-04-15 14:05:43 adam Exp $
+ * $Id: zebraapi.c,v 1.57 2002-04-16 22:31:42 adam Exp $
*/
#include <assert.h>
reg->isam = 0;
reg->isamc = 0;
reg->isamd = 0;
+ reg->isamb = 0;
reg->zei = 0;
reg->matchDict = 0;
return 0;
}
}
- else if (res_get_match (res, "isam", "i", ISAM_DEFAULT))
+ if (res_get_match (res, "isam", "i", ISAM_DEFAULT))
{
if (!(reg->isam = is_open (reg->bfs, FNAME_ISAM, key_compare, rw,
sizeof (struct it_key), res)))
return 0;
}
}
- else if (res_get_match (res, "isam", "c", ISAM_DEFAULT))
+ if (res_get_match (res, "isam", "c", ISAM_DEFAULT))
{
struct ISAMC_M_s isamc_m;
if (!(reg->isamc = isc_open (reg->bfs, FNAME_ISAMC,
return 0;
}
}
- else if (res_get_match (res, "isam", "d", ISAM_DEFAULT))
+ if (res_get_match (res, "isam", "d", ISAM_DEFAULT))
{
struct ISAMD_M_s isamd_m;
return 0;
}
}
+ if (res_get_match (res, "isam", "b", ISAM_DEFAULT))
+ {
+ struct ISAMC_M_s isamc_m;
+
+ if (!(reg->isamb = isamb_open (reg->bfs, "isamb",
+ rw, key_isamc_m(res, &isamc_m))))
+ {
+ logf (LOG_WARN, "isamb_open");
+ return 0;
+ }
+ }
reg->zei = zebraExplain_open (reg->records, reg->dh,
res, rw, reg,
explain_extract);
isc_close (reg->isamc);
if (reg->isamd)
isamd_close (reg->isamd);
+ if (reg->isamb)
+ isamb_close (reg->isamb);
rec_close (®->records);
}
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
- * $Id: zsets.c,v 1.34 2002-04-04 14:14:13 adam Exp $
+ * $Id: zsets.c,v 1.35 2002-04-16 22:31:42 adam Exp $
*/
#include <stdio.h>
#include <assert.h>
do
{
kno++;
+ yaz_log (LOG_LOG, "%d:%d", key.sysno, key.seqno);
if (key.sysno != psysno)
{
score = (*rc->calc) (handle, psysno);
#include <yaz/xmalloc.h>
+#include <yaz/log.h>
#include <isamb.h>
+#include <assert.h>
+
+struct ISAMB_head {
+ int first_block;
+ int last_block;
+ int block_size;
+};
+
+#define ISAMB_DATA_OFFSET 3
struct ISAMB_s {
BFiles bfs;
+ BFile bf;
ISAMC_M method;
+ int head_dirty;
+
+ struct ISAMB_head head;
+};
+
+struct ISAMB_block {
+ int pos;
+ int size;
+ int leaf;
+ int dirty;
+ int offset;
+ unsigned char *bytes;
+ void *decodeClientData;
+};
+
+struct ISAMB_PP_s {
+ ISAMB isamb;
+ int level;
+ struct ISAMB_block **block;
};
-typedef unsigned char *Bpage;
+void encode_ptr (char **dst, int pos)
+{
+ memcpy (*dst, &pos, sizeof(pos));
+ (*dst) += sizeof(pos);
+}
-ISAMB isamb_open (BFiles bfs, const char *name, ISAMC_M method)
+void decode_ptr (char **src, int *pos)
+{
+ memcpy (pos, *src, sizeof(*pos));
+ (*src) += sizeof(*pos);
+}
+
+ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method)
{
ISAMB isamb = xmalloc (sizeof(*isamb));
isamb->bfs = bfs;
isamb->method = (ISAMC_M) xmalloc (sizeof(*method));
memcpy (isamb->method, method, sizeof(*method));
+
+ isamb->head.first_block = 1;
+ isamb->head.last_block = 1;
+ isamb->head.block_size = 1024;
+ isamb->head_dirty = 0;
+
+ isamb->bf = bf_open (bfs, name, isamb->head.block_size, writeflag);
+
+ bf_read (isamb->bf, 0, 0, sizeof(struct ISAMB_head),
+ &isamb->head);
return isamb;
}
void isamb_close (ISAMB isamb)
{
+ if (isamb->head_dirty)
+ bf_write (isamb->bf, 0, 0, sizeof(struct ISAMB_head), &isamb->head);
xfree (isamb->method);
xfree (isamb);
}
-#if 0
-/* read page at pos */
-void isamb_get_block (ISAMB is, ISAMB_pos pos, Bpage *page)
+struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos)
{
+ struct ISAMB_block *p;
+ if (!pos)
+ return 0;
+ p = xmalloc (sizeof(*p));
+ p->pos = pos;
+ p->bytes = xmalloc (b->head.block_size);
+ bf_read (b->bf, pos, 0, 0, p->bytes);
+ p->leaf = p->bytes[0];
+ p->size = p->bytes[1] + 256 * p->bytes[2];
+ p->offset = ISAMB_DATA_OFFSET;
+ p->dirty = 0;
+ p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);
+ return p;
}
-/* alloc page */
-ISAMB_pos isamb_alloc_block (ISAMB is, int block_size, Bpage *page)
+struct ISAMB_block *new_block (ISAMB b, int leaf)
{
+ struct ISAMB_block *p;
+
+ p = xmalloc (sizeof(*p));
+ p->pos = b->head.last_block++;
+ b->head_dirty = 1;
+ p->bytes = xmalloc (b->head.block_size);
+ memset (p->bytes, 0, b->head.block_size);
+ p->leaf = leaf;
+ p->size = ISAMB_DATA_OFFSET;
+ p->dirty = 1;
+ p->offset = ISAMB_DATA_OFFSET;
+ p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);
+ return p;
}
-#define isamb_page_set_leaf (p) 0[p] = 1
-#define isamb_page_set_noleaf (p) 0[p] = 0
-#define isamb_page_datalist (4+p)
-
-static void isamb_page_set_no(Bpage page, int no)
+void close_block (ISAMB b, struct ISAMB_block *p)
{
- page[1] = no & 255;
- page[2] = (no >> 8) & 255;
- page[3] = (no >> 16) & 255;
+ if (!p)
+ return;
+ if (p->dirty)
+ {
+ p->bytes[0] = p->leaf;
+ p->bytes[1] = p->size & 255;
+ p->bytes[2] = p->size >> 8;
+ bf_write (b->bf, p->pos, 0, 0, p->bytes);
+ }
+ (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData);
+ xfree (p->bytes);
+ xfree (p);
}
-static int isamb_page_get_no(Bpage page)
+void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item,
+ struct ISAMB_block **sp,
+ void *sub_item, int *sub_size);
+
+void insert_leaf (ISAMB b, struct ISAMB_block *p, const void *new_item,
+ struct ISAMB_block **sp,
+ void *sub_item, int *sub_size)
{
- return page[1] + 256*page[2] + 65536*page[3];
+ char dst_buf[2048];
+ char *dst = dst_buf;
+ char *src = p->bytes + ISAMB_DATA_OFFSET;
+ char *endp = p->bytes + p->size;
+ void *c1 = (*b->method->code_start)(ISAMC_DECODE);
+ void *c2 = (*b->method->code_start)(ISAMC_ENCODE);
+ char *half1 = 0;
+ char *half2 = 0;
+ char *cut = dst_buf + p->size / 2;
+ char cut_item_buf[256];
+ int cut_item_size = 0;
+
+ while (src != endp)
+ {
+ char file_item_buf[256];
+ char *file_item = file_item_buf;
+
+ (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src);
+ if (new_item)
+ {
+ int d = (*b->method->compare_item)(file_item_buf, new_item);
+ if (d > 0)
+ {
+ char *item_ptr = (char*) new_item;
+ (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
+ new_item = 0;
+ p->dirty = 1;
+ }
+ else if (d == 0)
+ {
+ new_item = 0;
+ }
+ }
+
+ if (!half1 && dst > cut)
+ {
+ half1 = dst; /* candidate for splitting */
+
+ file_item = file_item_buf;
+ (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item);
+
+ cut_item_size = file_item - file_item_buf;
+ memcpy (cut_item_buf, file_item_buf, cut_item_size);
+
+ half2 = dst;
+ }
+ else
+ {
+ file_item = file_item_buf;
+ (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item);
+ }
+ }
+ if (new_item)
+ {
+ char *item_ptr = (char*) new_item;
+ (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
+ new_item = 0;
+ p->dirty = 1;
+ }
+ p->size = dst - dst_buf + ISAMB_DATA_OFFSET;
+ if (p->size > b->head.block_size)
+ {
+ char *first_dst;
+ char *cut_item = cut_item_buf;
+
+ /* first half */
+ p->size = half1 - dst_buf + ISAMB_DATA_OFFSET;
+ memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, half1 - dst_buf);
+
+ /* second half */
+ *sp = new_block (b, 1);
+
+ (*b->method->code_reset)(c2);
+
+ first_dst = (*sp)->bytes + ISAMB_DATA_OFFSET;
+
+ (*b->method->code_item)(ISAMC_ENCODE, c2, &first_dst, &cut_item);
+
+ memcpy (first_dst, half2, dst - half2);
+
+ (*sp)->size = (first_dst - (char*) (*sp)->bytes) + (dst - half2);
+ (*sp)->dirty = 1;
+ p->dirty = 1;
+ memcpy (sub_item, cut_item_buf, cut_item_size);
+ *sub_size = cut_item_size;
+
+ yaz_log (LOG_LOG, "l split %d / %d", p->size, (*sp)->size);
+
+ }
+ else
+ {
+ assert (p->size > ISAMB_DATA_OFFSET);
+ assert (p->size <= b->head.block_size);
+ memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, dst - dst_buf);
+ *sp = 0;
+ }
+ (*b->method->code_stop)(ISAMC_DECODE, c1);
+ (*b->method->code_stop)(ISAMC_ENCODE, c2);
}
-void isamb_insert_sub(ISAMB is, ISAMB_pos *pos, const void *data)
+void insert_int (ISAMB b, struct ISAMB_block *p, const void *new_item,
+ struct ISAMB_block **sp,
+ void *split_item, int *split_size)
{
- const char *src;
- char dst[200];
- int no, i;
-
- isamb_get_block (is, *pos, &page);
- if (!isamb_page_isleaf (page))
+ char *startp = p->bytes + ISAMB_DATA_OFFSET;
+ char *src = startp;
+ char *endp = p->bytes + p->size;
+ int pos;
+ struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;
+ char sub_item[256];
+ int sub_size;
+
+ *sp = 0;
+
+ decode_ptr (&src, &pos);
+ while (src != endp)
+ {
+ int item_len;
+ int d;
+ decode_ptr (&src, &item_len);
+ d = (*b->method->compare_item)(src, new_item);
+ if (d > 0)
+ {
+ sub_p1 = open_block (b, pos);
+ assert (sub_p1);
+ insert_sub (b, sub_p1, new_item, &sub_p2,
+ sub_item, &sub_size);
+ break;
+ }
+ src += item_len;
+ decode_ptr (&src, &pos);
+ }
+ if (!sub_p1)
{
- ISAMB_pos subptr;
- src = isamb_page_datalist (page);
- no = isamb_page_get_no (page);
- decodeClientData = (*is->method->code_start)(ISAMC_DECODE);
-
- isamb_read_subptr (&subptr, &src);
- for (i = 0; i<no; i++)
- {
- const char *src0 = src;
-
- (*is->method->code_item)(ISAMC_DECODE, decodeClientData,
- dst, &src);
- if ((*is->method->compare_item)(data, dst) < 0)
- break;
-
- isamb_read_subptr (&subptr, src);
- }
- isamb_insert_sub (is, subptr, data);
- *pos = subptr;
- (*is->method->code_stop)(ISAMC_DECODE, decodeClientData);
+ sub_p1 = open_block (b, pos);
+ assert (sub_p1);
+ insert_sub (b, sub_p1, new_item, &sub_p2,
+ sub_item, &sub_size);
}
+ if (sub_p2)
+ {
+ char dst_buf[2048];
+ char *dst = dst_buf;
+
+ assert (sub_size < 20);
+
+ memcpy (dst, startp, src - startp);
+
+ dst += src - startp;
+
+ encode_ptr (&dst, sub_size); /* sub length and item */
+ memcpy (dst, sub_item, sub_size);
+ dst += sub_size;
+
+ encode_ptr (&dst, sub_p2->pos); /* pos */
+
+ if (endp - src) /* remaining data */
+ {
+ memcpy (dst, src, endp - src);
+ dst += endp - src;
+ }
+ p->size = dst - dst_buf + ISAMB_DATA_OFFSET;
+ if (p->size <= b->head.block_size)
+ {
+ memcpy (startp, dst_buf, dst - dst_buf);
+ }
+ else
+ {
+ int p_new_size;
+ char *half;
+ src = dst_buf;
+ endp = dst;
+
+ half = src + b->head.block_size/2;
+ decode_ptr (&src, &pos);
+ while (src <= half)
+ {
+ decode_ptr (&src, split_size);
+ src += *split_size;
+ decode_ptr (&src, &pos);
+ }
+ p_new_size = src - dst_buf;
+ memcpy (p->bytes + ISAMB_DATA_OFFSET, dst_buf, p_new_size);
+ p_new_size += ISAMB_DATA_OFFSET;
+
+ decode_ptr (&src, split_size);
+ memcpy (split_item, src, *split_size);
+ src += *split_size;
+
+ *sp = new_block (b, 0);
+ (*sp)->size = endp - src;
+ memcpy ((*sp)->bytes+ISAMB_DATA_OFFSET, src, (*sp)->size);
+ (*sp)->size += ISAMB_DATA_OFFSET;
+
+ yaz_log (LOG_LOG, "i split %d -> %d %d",
+ p->size, p_new_size, (*sp)->size);
+ p->size = p_new_size;
+ }
+ p->dirty = 1;
+ close_block (b, sub_p2);
+ }
+ close_block (b, sub_p1);
+}
+
+void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item,
+ struct ISAMB_block **sp,
+ void *sub_item, int *sub_size)
+{
+ if (p->leaf)
+ insert_leaf (b, p, new_item, sp, sub_item, sub_size);
+ else
+ insert_int (b, p, new_item, sp, sub_item, sub_size);
+}
+
+int isamb_insert_one (ISAMB b, const void *item, ISAMC_P pos)
+{
+ struct ISAMB_block *p, *sp = 0;
+ char sub_item[256];
+ int sub_size;
+
+ if (!pos)
+ p = new_block (b, 1);
else
+ p = open_block (b, pos);
+ if (!p)
+ return -1;
+
+ insert_sub (b, p, item, &sp, sub_item, &sub_size);
+ if (sp)
+ { /* increase level of tree by one */
+ struct ISAMB_block *p2 = new_block (b, 0);
+ char *dst = p2->bytes + p2->size;
+
+ encode_ptr (&dst, p->pos);
+ assert (sub_size < 20);
+ encode_ptr (&dst, sub_size);
+ memcpy (dst, sub_item, sub_size);
+ dst += sub_size;
+ encode_ptr (&dst, sp->pos);
+
+ p2->size = dst - (char*) p2->bytes;
+ pos = p2->pos; /* return new super page */
+ close_block (b, sp);
+ close_block (b, p2);
+ }
+ else
+ pos = p->pos; /* return current one (again) */
+ close_block (b, p);
+ return pos;
+}
+
+ISAMB_P isamb_merge (ISAMB b, ISAMB_P pos, ISAMC_I data)
+{
+ int i_mode;
+ char item_buf[256];
+ char *item_ptr = item_buf;
+ while ((*data->read_item)(data->clientData, &item_ptr, &i_mode))
{
- src = isamb_page_datalist (page);
- no = isamb_page_get_no (page);
- decodeClientData = (*is->method->code_start)(ISAMC_DECODE);
- diff = -1;
- for (i = 0; i<no; i++)
- {
- int diff;
- (*is->method->code_item)(ISAMC_DECODE, decodeClientData,
- dst, &src);
- diff = (*is->method->compare_item)(data, dst);
- if (diff <= 0)
- break;
- }
- if (diff < 0)
- {
- int j;
- src = isamb_page_datalist (page);
- page2 = isamb_page_dup (is, page);
- dst2 = isamb_page_datalist (page2);
- src2 = data;
- for (j = 0; j <= no; j++)
- {
- if ( i == j)
- (*is->method->code_item)(ISAMC_ENCODE, encodeClientData,
- &dst2, &src2);
- if (j < no)
- {
- char *dst0 = dst;
- (*is->method->code_item)(ISAMC_DECODE, decodeClientData,
- &dst, &src);
- (*is->method->code_item)(ISAMC_ENCODE, encodeClientData,
- &dst2, &dst0);
- }
- }
- }
+ item_ptr = item_buf;
+ pos = isamb_insert_one (b, item_buf, pos);
}
+ return pos;
}
-/* insert data(input) in table is(input) at pos(input/output) */
-int isamb_insert (ISAMB is, ISAMB_pos *pos, const void *data)
+
+ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos)
{
- void *decodeClientData;
+ ISAMB_PP pp = xmalloc (sizeof(*pp));
- Bpage page;
- if (*pos == 0)
+ pp->isamb = isamb;
+ pp->block = xmalloc (10 * sizeof(*pp->block));
+
+ pp->level = 0;
+ while (1)
{
- *pos = isamb_alloc_block (is, 1024, &page);
- isamb_page_set_leaf (page);
- isamb_page_set_no (page, 0);
+ struct ISAMB_block *p = open_block (isamb, pos);
+ char *src = p->bytes + p->offset;
+ pp->block[pp->level] = p;
+
+ if (p->bytes[0]) /* leaf */
+ break;
+
+ decode_ptr (&src, &pos);
+ p->offset = src - (char*) p->bytes;
+ pp->level++;
}
- else /* find leaf ... */
+ pp->block[pp->level+1] = 0;
+ return pp;
+}
+
+void isamb_pp_close (ISAMB_PP pp)
+{
+ int i;
+ if (!pp)
+ return;
+ for (i = 0; i <= pp->level; i++)
+ close_block (pp->isamb, pp->block[i]);
+ xfree (pp->block);
+ xfree (pp);
+}
+
+int isamb_pp_read (ISAMB_PP pp, void *buf)
+{
+ char *dst = buf;
+ char *src;
+ struct ISAMB_block *p = pp->block[pp->level];
+ if (!p)
+ return 0;
+
+ while (p->offset == p->size)
{
- isamb_insert_sub (is, pos, const void *data);
+ int pos, item_len;
+ while (p->offset == p->size)
+ {
+ if (pp->level == 0)
+ return 0;
+ close_block (pp->isamb, pp->block[pp->level]);
+ pp->block[pp->level] = 0;
+ (pp->level)--;
+ p = pp->block[pp->level];
+ assert (p->bytes[0] == 0); /* must be int */
+ }
+ src = p->bytes + p->offset;
+
+ decode_ptr (&src, &item_len);
+ src += item_len;
+ decode_ptr (&src, &pos);
+
+ p->offset = src - (char*) p->bytes;
+ ++(pp->level);
+
+ while (1)
+ {
+ pp->block[pp->level] = p = open_block (pp->isamb, pos);
+
+ if (p->bytes[0]) /* leaf */
+ {
+ break;
+ }
+ src = p->bytes + p->offset;
+ decode_ptr (&src, &pos);
+ p->offset = src - (char*) p->bytes;
+ pp->level++;
+ }
}
+ assert (p->offset < p->size);
+ assert (p->bytes[0]);
+ src = p->bytes + p->offset;
+ (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData,
+ &dst, &src);
+ p->offset = src - (char*) p->bytes;
+ return 1;
+}
+
+int isamb_pp_num (ISAMB_PP pp)
+{
+ return 1;
}
-#endif
-## $Id: Makefile.am,v 1.3 2002-04-09 15:24:13 heikki Exp $
+## $Id: Makefile.am,v 1.4 2002-04-16 22:31:42 adam Exp $
noinst_LIBRARIES = librset.a
librset_a_SOURCES = rset.c rstemp.c rsisam.c rsnull.c rsbool.c rsbetween.c \
- rsisamc.c rsm_or.c rsisams.c rsisamd.c
+ rsisamc.c rsm_or.c rsisams.c rsisamd.c rsisamb.c
INCLUDES = -I$(srcdir)/../include @YAZINC@
--- /dev/null
+/*
+ * Copyright (C) 1994-2002, Index Data
+ * All rights reserved.
+ *
+ * $Id: rsisamb.c,v 1.1 2002-04-16 22:31:42 adam Exp $
+ */
+
+
+#include <stdio.h>
+#include <assert.h>
+#include <zebrautl.h>
+#include <rsisamb.h>
+
+static void *r_create(RSET ct, const struct rset_control *sel, void *parms);
+static RSFD r_open (RSET ct, int flag);
+static void r_close (RSFD rfd);
+static void r_delete (RSET ct);
+static void r_rewind (RSFD rfd);
+static int r_count (RSET ct);
+static int r_read (RSFD rfd, void *buf, int *term_index);
+static int r_write (RSFD rfd, const void *buf);
+
+static const struct rset_control control =
+{
+ "isamc",
+ r_create,
+ r_open,
+ r_close,
+ r_delete,
+ r_rewind,
+ r_count,
+ r_read,
+ r_write,
+};
+
+const struct rset_control *rset_kind_isamb = &control;
+
+struct rset_pp_info {
+ ISAMB_PP pt;
+ struct rset_pp_info *next;
+ struct rset_isamb_info *info;
+ int *countp;
+ void *buf;
+};
+
+struct rset_isamb_info {
+ ISAMB is;
+ ISAMB_P pos;
+ int key_size;
+ int (*cmp)(const void *p1, const void *p2);
+ struct rset_pp_info *ispt_list;
+};
+
+static void *r_create(RSET ct, const struct rset_control *sel, void *parms)
+{
+ rset_isamb_parms *pt = (rset_isamb_parms *) parms;
+ struct rset_isamb_info *info;
+
+ ct->flags |= RSET_FLAG_VOLATILE;
+ info = (struct rset_isamb_info *) xmalloc (sizeof(*info));
+ info->is = pt->is;
+ info->pos = pt->pos;
+ info->key_size = pt->key_size;
+ info->cmp = pt->cmp;
+ info->ispt_list = NULL;
+ ct->no_rset_terms = 1;
+ ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms));
+ ct->rset_terms[0] = pt->rset_term;
+ return info;
+}
+
+RSFD r_open (RSET ct, int flag)
+{
+ struct rset_isamb_info *info = (struct rset_isamb_info *) ct->buf;
+ struct rset_pp_info *ptinfo;
+
+ logf (LOG_DEBUG, "risamb_open");
+ if (flag & RSETF_WRITE)
+ {
+ logf (LOG_FATAL, "ISAMB set type is read-only");
+ return NULL;
+ }
+ ptinfo = (struct rset_pp_info *) xmalloc (sizeof(*ptinfo));
+ ptinfo->next = info->ispt_list;
+ info->ispt_list = ptinfo;
+ ptinfo->pt = isamb_pp_open (info->is, info->pos);
+ ptinfo->info = info;
+ if (ct->rset_terms[0]->nn < 0)
+ ct->rset_terms[0]->nn = isamb_pp_num (ptinfo->pt);
+ ct->rset_terms[0]->count = 0;
+ ptinfo->countp = &ct->rset_terms[0]->count;
+ ptinfo->buf = xmalloc (info->key_size);
+ return ptinfo;
+}
+
+static void r_close (RSFD rfd)
+{
+ struct rset_isamb_info *info = ((struct rset_pp_info*) rfd)->info;
+ struct rset_pp_info **ptinfop;
+
+ for (ptinfop = &info->ispt_list; *ptinfop; ptinfop = &(*ptinfop)->next)
+ if (*ptinfop == rfd)
+ {
+ xfree ((*ptinfop)->buf);
+ isamb_pp_close ((*ptinfop)->pt);
+ *ptinfop = (*ptinfop)->next;
+ xfree (rfd);
+ return;
+ }
+ logf (LOG_FATAL, "r_close but no rfd match!");
+ assert (0);
+}
+
+static void r_delete (RSET ct)
+{
+ struct rset_isamb_info *info = (struct rset_isamb_info *) ct->buf;
+
+ logf (LOG_DEBUG, "rsisamb_delete");
+ assert (info->ispt_list == NULL);
+ rset_term_destroy (ct->rset_terms[0]);
+ xfree (ct->rset_terms);
+ xfree (info);
+}
+
+static void r_rewind (RSFD rfd)
+{
+ logf (LOG_DEBUG, "rsisamb_rewind");
+ abort ();
+}
+
+static int r_count (RSET ct)
+{
+ return 0;
+}
+
+static int r_read (RSFD rfd, void *buf, int *term_index)
+{
+ struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd;
+ int r;
+ *term_index = 0;
+ r = isamb_pp_read(pinfo->pt, buf);
+ if (r > 0)
+ {
+ if (*pinfo->countp == 0 || (*pinfo->info->cmp)(buf, pinfo->buf) > 1)
+ {
+ memcpy (pinfo->buf, buf, pinfo->info->key_size);
+ (*pinfo->countp)++;
+ }
+ }
+ return r;
+}
+
+static int r_write (RSFD rfd, const void *buf)
+{
+ logf (LOG_FATAL, "ISAMB set type is read-only");
+ return -1;
+}