/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2000, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
*
* (log at the end)
+ *
+ * Bugs
+ * - Allocates a lot of memory for the merge process, but never releases it.
+ * Doesn't matter, as the program terminates soon after.
*/
#include <assert.h>
#include "index.h"
+#include "zserver.h"
#define KEY_SIZE (1+sizeof(struct it_key))
#define INP_NAME_MAX 768
/* handler invoked in each read */
void (*readHandler)(struct key_file *keyp, void *rinfo);
void *readInfo;
+ Res res;
};
-void getFnameTmp (char *fname, int no)
+void getFnameTmp (Res res, char *fname, int no)
{
const char *pre;
- pre = res_get_def (common_resource, "keyTmpDir", ".");
+ pre = res_get_def (res, "keyTmpDir", ".");
+ sprintf (fname, "%s/key%d.tmp", pre, no);
+}
+
+void extract_get_fname_tmp (ZebraHandle zh, char *fname, int no)
+{
+ const char *pre;
+
+ pre = res_get_def (zh->service->res, "keyTmpDir", ".");
sprintf (fname, "%s/key%d.tmp", pre, no);
}
{
int nr = 0, r = 0, fd;
char fname[1024];
- getFnameTmp (fname, f->no);
+ getFnameTmp (f->res, fname, f->no);
fd = open (fname, O_BINARY|O_RDONLY);
+
+ f->buf_ptr = 0;
+ f->buf_size = 0;
if (fd == -1)
{
- logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", fname);
- exit (1);
+ logf (LOG_WARN|LOG_ERRNO, "cannot open %s", fname);
+ return ;
}
if (!f->length)
{
if ((f->length = lseek (fd, 0L, SEEK_END)) == (off_t) -1)
{
- logf (LOG_FATAL|LOG_ERRNO, "cannot seek %s", fname);
- exit (1);
+ logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname);
+ close (fd);
+ return ;
}
}
if (lseek (fd, f->offset, SEEK_SET) == -1)
{
- logf (LOG_FATAL|LOG_ERRNO, "cannot seek %s", fname);
- exit (1);
+ logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname);
+ close(fd);
+ return ;
}
while (f->chunk - nr > 0)
{
}
if (r == -1)
{
- logf (LOG_FATAL|LOG_ERRNO, "read of %s", fname);
- exit (1);
+ logf (LOG_WARN|LOG_ERRNO, "read of %s", fname);
+ close (fd);
+ return;
}
f->buf_size = nr;
- f->buf_ptr = 0;
if (f->readHandler)
(*f->readHandler)(f, f->readInfo);
close (fd);
}
-struct key_file *key_file_init (int no, int chunk)
+struct key_file *key_file_init (int no, int chunk, Res res)
{
struct key_file *f;
f = (struct key_file *) xmalloc (sizeof(*f));
+ f->res = res;
f->sysno = 0;
f->seqno = 0;
f->no = no;
int *ptr;
int (*cmp)(const void *p1, const void *p2);
Dict dict;
+ ISAMS isams;
+#if ZMBOL
ISAM isam;
ISAMC isamc;
- ISAMS isams;
- ISAMH isamh;
+ ISAMD isamd;
+#endif
};
struct heap_info *key_heap_init (int nkeys,
return 1;
}
+#if ZMBOL
int heap_inpc (struct heap_info *hi)
{
struct heap_cread_info hci;
return 0;
}
-int heap_inps (struct heap_info *hi)
+int heap_inpd (struct heap_info *hi)
{
struct heap_cread_info hci;
- ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i));
+ ISAMD_I isamd_i = (ISAMD_I) xmalloc (sizeof(*isamd_i));
hci.key = (char *) xmalloc (KEY_SIZE);
hci.mode = 1;
hci.hi = hi;
hci.more = heap_read_one (hi, hci.cur_name, hci.key);
- isams_i->clientData = &hci;
- isams_i->read_item = heap_cread_item;
+ isamd_i->clientData = &hci;
+ isamd_i->read_item = heap_cread_item;
while (hci.more)
{
char this_name[INP_NAME_MAX];
- ISAMS_P isams_p;
- char *dict_info;
-
- strcpy (this_name, hci.cur_name);
- assert (hci.cur_name[1]);
- no_diffs++;
- if (!(dict_info = dict_lookup (hi->dict, hci.cur_name)))
- {
- isams_p = isams_merge (hi->isams, isams_i);
- no_insertions++;
- dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p);
- }
- else
- abort();
- }
- xfree (isams_i);
- return 0;
-}
-
-int heap_inph (struct heap_info *hi)
-{
- struct heap_cread_info hci;
- ISAMH_I isamh_i = (ISAMH_I) xmalloc (sizeof(*isamh_i));
-
- hci.key = (char *) xmalloc (KEY_SIZE);
- hci.mode = 1;
- hci.hi = hi;
- hci.more = heap_read_one (hi, hci.cur_name, hci.key);
-
- isamh_i->clientData = &hci;
- isamh_i->read_item = heap_cread_item;
-
- while (hci.more)
- {
- char this_name[INP_NAME_MAX];
- ISAMH_P isamh_p, isamh_p2;
+ ISAMD_P isamd_p, isamd_p2;
char *dict_info;
strcpy (this_name, hci.cur_name);
no_diffs++;
if ((dict_info = dict_lookup (hi->dict, hci.cur_name)))
{
- memcpy (&isamh_p, dict_info+1, sizeof(ISAMH_P));
- isamh_p2 = isamh_append (hi->isamh, isamh_p, isamh_i);
- if (!isamh_p2)
+ memcpy (&isamd_p, dict_info+1, sizeof(ISAMD_P));
+ isamd_p2 = isamd_append (hi->isamd, isamd_p, isamd_i);
+ if (!isamd_p2)
{
no_deletions++;
if (!dict_delete (hi->dict, this_name))
else
{
no_updates++;
- if (isamh_p2 != isamh_p)
+ if (isamd_p2 != isamd_p)
dict_insert (hi->dict, this_name,
- sizeof(ISAMH_P), &isamh_p2);
+ sizeof(ISAMD_P), &isamd_p2);
}
}
else
{
- isamh_p = isamh_append (hi->isamh, 0, isamh_i);
+ isamd_p = isamd_append (hi->isamd, 0, isamd_i);
no_insertions++;
- dict_insert (hi->dict, this_name, sizeof(ISAMH_P), &isamh_p);
+ dict_insert (hi->dict, this_name, sizeof(ISAMD_P), &isamd_p);
}
}
- xfree (isamh_i);
+ xfree (isamd_i);
return 0;
}
return 0;
}
+#endif
+
+int heap_inps (struct heap_info *hi)
+{
+ struct heap_cread_info hci;
+ ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i));
+
+ hci.key = (char *) xmalloc (KEY_SIZE);
+ hci.mode = 1;
+ hci.hi = hi;
+ hci.more = heap_read_one (hi, hci.cur_name, hci.key);
+
+ isams_i->clientData = &hci;
+ isams_i->read_item = heap_cread_item;
+
+ while (hci.more)
+ {
+ char this_name[INP_NAME_MAX];
+ ISAMS_P isams_p;
+ char *dict_info;
+
+ strcpy (this_name, hci.cur_name);
+ assert (hci.cur_name[1]);
+ no_diffs++;
+ if (!(dict_info = dict_lookup (hi->dict, hci.cur_name)))
+ {
+ isams_p = isams_merge (hi->isams, isams_i);
+ no_insertions++;
+ dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p);
+ }
+ else
+ {
+ logf (LOG_FATAL, "isams doesn't support this kind of update");
+ break;
+ }
+ }
+ xfree (isams_i);
+ return 0;
+}
+
struct progressInfo {
time_t startTime;
time_t lastTime;
#define R_OK 4
#endif
-void key_input (BFiles bfs, int nkeys, int cache)
+void zebra_index_merge (ZebraHandle zh)
+{
+ struct key_file **kf;
+ char rbuf[1024];
+ int i, r;
+ struct heap_info *hi;
+ struct progressInfo progressInfo;
+ int nkeys = zh->key_file_no;
+
+ if (nkeys < 0)
+ {
+ char fname[1024];
+ nkeys = 0;
+ while (1)
+ {
+ extract_get_fname_tmp (zh, fname, nkeys+1);
+ if (access (fname, R_OK) == -1)
+ break;
+ nkeys++;
+ }
+ if (!nkeys)
+ return ;
+ }
+ kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf));
+ progressInfo.totalBytes = 0;
+ progressInfo.totalOffset = 0;
+ time (&progressInfo.startTime);
+ time (&progressInfo.lastTime);
+ for (i = 1; i<=nkeys; i++)
+ {
+ kf[i] = key_file_init (i, 8192, zh->service->res);
+ kf[i]->readHandler = progressFunc;
+ kf[i]->readInfo = &progressInfo;
+ progressInfo.totalBytes += kf[i]->length;
+ progressInfo.totalOffset += kf[i]->buf_size;
+ }
+ hi = key_heap_init (nkeys, key_qsort_compare);
+ hi->dict = zh->service->dict;
+ hi->isams = zh->service->isams;
+#if ZMBOL
+ hi->isam = zh->service->isam;
+ hi->isamc = zh->service->isamc;
+ hi->isamd = zh->service->isamd;
+#endif
+
+ for (i = 1; i<=nkeys; i++)
+ if ((r = key_file_read (kf[i], rbuf)))
+ key_heap_insert (hi, rbuf, r, kf[i]);
+ if (zh->service->isams)
+ heap_inps (hi);
+#if ZMBOL
+ else if (zh->service->isamc)
+ heap_inpc (hi);
+ else if (zh->service->isam)
+ heap_inp (hi);
+ else if (zh->service->isamd)
+ heap_inpd (hi);
+#endif
+
+ for (i = 1; i<=nkeys; i++)
+ {
+ extract_get_fname_tmp (zh, rbuf, i);
+ unlink (rbuf);
+ }
+ logf (LOG_LOG, "Iterations . . .%7d", no_iterations);
+ logf (LOG_LOG, "Distinct words .%7d", no_diffs);
+ logf (LOG_LOG, "Updates. . . . .%7d", no_updates);
+ logf (LOG_LOG, "Deletions. . . .%7d", no_deletions);
+ logf (LOG_LOG, "Insertions . . .%7d", no_insertions);
+ zh->key_file_no = 0;
+}
+
+void key_input (BFiles bfs, int nkeys, int cache, Res res)
{
Dict dict;
+ ISAMS isams = NULL;
+#if ZMBOL
ISAM isam = NULL;
ISAMC isamc = NULL;
- ISAMS isams = NULL;
- ISAMH isamh = NULL;
+ ISAMD isamd = NULL;
+#endif
struct key_file **kf;
char rbuf[1024];
int i, r;
nkeys = 0;
while (1)
{
- getFnameTmp (fname, nkeys+1);
+ getFnameTmp (res, fname, nkeys+1);
if (access (fname, R_OK) == -1)
break;
nkeys++;
logf (LOG_FATAL, "dict_open fail");
exit (1);
}
- if (res_get_match (common_resource, "isam", "s", NULL))
+ if (res_get_match (res, "isam", "s", ISAM_DEFAULT))
{
+ struct ISAMS_M_s isams_m;
isams = isams_open (bfs, FNAME_ISAMS, 1,
- key_isams_m (common_resource));
+ key_isams_m (res, &isams_m));
if (!isams)
{
logf (LOG_FATAL, "isams_open fail");
exit (1);
}
+ logf (LOG_LOG, "isams opened");
}
- else if (res_get_match (common_resource, "isam", "i", NULL))
+#if ZMBOL
+ else if (res_get_match (res, "isam", "i", ISAM_DEFAULT))
{
isam = is_open (bfs, FNAME_ISAM, key_compare, 1,
- sizeof(struct it_key), common_resource);
+ sizeof(struct it_key), res);
if (!isam)
{
logf (LOG_FATAL, "is_open fail");
exit (1);
}
}
- else if (res_get_match (common_resource, "isam", "h", NULL))
+ else if (res_get_match (res, "isam", "d", ISAM_DEFAULT))
{
- isamh = isamh_open (bfs, FNAME_ISAMH, 1,
- key_isamh_m (common_resource));
- if (!isamh)
+ struct ISAMD_M_s isamd_m;
+ isamd = isamd_open (bfs, FNAME_ISAMD, 1,
+ key_isamd_m (res,&isamd_m));
+ if (!isamd)
{
- logf (LOG_FATAL, "isamh_open fail");
+ logf (LOG_FATAL, "isamd_open fail");
exit (1);
}
}
- else
+ else if (res_get_match (res, "isam", "c", ISAM_DEFAULT))
{
+ struct ISAMC_M_s isamc_m;
isamc = isc_open (bfs, FNAME_ISAMC, 1,
- key_isamc_m (common_resource));
+ key_isamc_m (res, &isamc_m));
if (!isamc)
{
logf (LOG_FATAL, "isc_open fail");
exit (1);
}
}
+#endif
kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf));
progressInfo.totalBytes = 0;
progressInfo.totalOffset = 0;
time (&progressInfo.lastTime);
for (i = 1; i<=nkeys; i++)
{
- kf[i] = key_file_init (i, 32768);
+ kf[i] = key_file_init (i, 8192, res);
kf[i]->readHandler = progressFunc;
kf[i]->readInfo = &progressInfo;
progressInfo.totalBytes += kf[i]->length;
}
hi = key_heap_init (nkeys, key_qsort_compare);
hi->dict = dict;
+ hi->isams = isams;
+#if ZMBOL
hi->isam = isam;
hi->isamc = isamc;
- hi->isams = isams;
- hi->isamh = isamh;
+ hi->isamd = isamd;
+#endif
for (i = 1; i<=nkeys; i++)
if ((r = key_file_read (kf[i], rbuf)))
key_heap_insert (hi, rbuf, r, kf[i]);
- if (isamc)
- heap_inpc (hi);
- else if (isams)
+ if (isams)
heap_inps (hi);
+#if ZMBOL
+ else if (isamc)
+ heap_inpc (hi);
else if (isam)
heap_inp (hi);
- else if (isamh)
- heap_inph (hi);
+ else if (isamd)
+ heap_inpd (hi);
+#endif
dict_close (dict);
+ if (isams)
+ isams_close (isams);
+#if ZMBOL
if (isam)
is_close (isam);
if (isamc)
isc_close (isamc);
- if (isams)
- isams_close (isams);
- if (isamh)
- isamh_close (isamh);
+ if (isamd)
+ isamd_close (isamd);
+#endif
for (i = 1; i<=nkeys; i++)
{
- getFnameTmp (rbuf, i);
+ getFnameTmp (res, rbuf, i);
unlink (rbuf);
}
logf (LOG_LOG, "Iterations . . .%7d", no_iterations);
logf (LOG_LOG, "Updates. . . . .%7d", no_updates);
logf (LOG_LOG, "Deletions. . . .%7d", no_deletions);
logf (LOG_LOG, "Insertions . . .%7d", no_insertions);
+
+ /* xmalloc_trav("unfreed"); while hunting leaks */
}
/*
* $Log: kinput.c,v $
- * Revision 1.35 1999-06-30 15:07:23 heikki
+ * Revision 1.44 2000-05-18 12:01:36 adam
+ * System call times(2) used again. More 64-bit fixes.
+ *
+ * Revision 1.43 2000/03/20 19:08:36 adam
+ * Added remote record import using Z39.50 extended services and Segment
+ * Requests.
+ *
+ * Revision 1.42 1999/12/01 21:58:48 adam
+ * Proper handle of illegal use of isams.
+ *
+ * Revision 1.41 1999/11/30 13:48:03 adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.40 1999/09/08 12:12:39 adam
+ * Removed log message.
+ *
+ * Revision 1.39 1999/08/18 10:39:20 heikki
+ * Added a comment on memory leaks
+ *
+ * Revision 1.38 1999/08/18 08:38:04 heikki
+ * Memory leak hunting
+ *
+ * Revision 1.37 1999/07/14 13:21:34 heikki
+ * Added isam-d files. Compiles (almost) clean. Doesn't work at all
+ *
+ * Revision 1.36 1999/07/14 10:59:26 adam
+ * Changed functions isc_getmethod, isams_getmethod.
+ * Improved fatal error handling (such as missing EXPLAIN schema).
+ *
+ * Revision 1.35 1999/06/30 15:07:23 heikki
* Adding isamh stuff
*
* Revision 1.34 1999/05/26 07:49:13 adam