* Sebastian Hammer, Adam Dickmeiss
*
* $Log: extract.c,v $
- * Revision 1.97 1999-07-06 12:28:04 adam
+ * Revision 1.104 2000-09-05 14:04:05 adam
+ * Updates for prefix 'yaz_' for YAZ log functions.
+ *
+ * Revision 1.103 2000/05/18 12:01:36 adam
+ * System call times(2) used again. More 64-bit fixes.
+ *
+ * Revision 1.102 2000/05/15 15:32:33 adam
+ * Added 64 bit file input.
+ *
+ * Revision 1.101 2000/05/15 13:02:39 adam
+ * Minor change.
+ *
+ * Revision 1.100 2000/03/20 19:08:36 adam
+ * Added remote record import using Z39.50 extended services and Segment
+ * Requests.
+ *
+ * Revision 1.99 2000/02/24 10:57:02 adam
+ * Sequence number incremented after each incomplete-field.
+ *
+ * Revision 1.98 1999/09/07 07:19:21 adam
+ * Work on character mapping. Implemented replace rules.
+ *
+ * Revision 1.97 1999/07/06 12:28:04 adam
* Updated record index structure. Format includes version ID. Compression
* algorithm ID is stored for each record block.
*
#include "zinfo.h"
+#if _FILE_OFFSET_BITS == 64
+#define PRINTF_OFF_T "%Ld"
+#else
+#define PRINTF_OFF_T "%ld"
+#endif
+
#ifndef ZEBRASDR
#define ZEBRASDR 0
#endif
char *recordCompression;
int record_compression = REC_COMPRESS_NONE;
if (!mem)
- mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024;
+ mem = atoi(res_get_def (common_resource, "memMax", "16"))*1024*1024;
if (mem < 50000)
mem = 50000;
key_buf = (char **) xmalloc (mem);
logf (LOG_LOG, "sorting section %d", key_file_no);
#if !SORT_EXTRA
qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_qsort_compare);
- getFnameTmp (out_fname, key_file_no);
+ getFnameTmp (common_resource, out_fname, key_file_no);
if (!(outf = fopen (out_fname, "wb")))
{
int rw = rGroup->flagRw;
if (rw)
zebraExplain_runNumberIncrement (zti, 1);
- zebraExplain_close (zti, rw, 0);
+ zebraExplain_close (zti, rw);
key_flush ();
xfree (key_buf);
rec_close (&records);
w->attrSet = VAL_BIB1;
w->attrUse = 1016;
w->reg_type = 'w';
+ w->extractCtrl = p;
}
static struct sortKey {
return;
addString (p, buf, i);
}
+ (p->seqnos[p->reg_type])++; /* to separate this from next one */
}
static void addCompleteField (RecWord *p)
static void addRecordKey (RecWord *p)
{
+ WRBUF wrbuf;
+ if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
+ p->string, p->length)))
+ {
+ p->string = wrbuf_buf(wrbuf);
+ p->length = wrbuf_len(wrbuf);
+ }
if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
addCompleteField (p);
else
static void recordLogPreamble (int level, const char *msg, void *info)
{
struct recordLogInfo *p = (struct recordLogInfo *) info;
- FILE *outf = log_file ();
+ FILE *outf = yaz_log_file ();
if (level & LOG_LOG)
return ;
extractCtrl.fh = fi;
extractCtrl.subType = subType;
extractCtrl.init = wordInit;
- extractCtrl.addWord = addRecordKey;
- extractCtrl.addSchema = addSchema;
+ extractCtrl.tokenAdd = addRecordKey;
+ extractCtrl.schemaAdd = addSchema;
extractCtrl.dh = rGroup->dh;
for (i = 0; i<256; i++)
{
extractCtrl.flagShowRecords = !rGroup->flagRw;
if (!rGroup->flagRw)
- printf ("File: %s %ld\n", fname, (long) recordOffset);
+ printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset);
logInfo.fname = fname;
logInfo.recordOffset = recordOffset;
if (rGroup->flagRw &&
records_processed < rGroup->fileVerboseLimit)
{
- logf (LOG_WARN, "fail %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_WARN, "fail %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
}
return 0;
}
is probably empty - unless flagShowRecords is in use */
if (!rGroup->flagRw)
return 1;
- logf (LOG_WARN, "No keys generated for file %s", fname);
- logf (LOG_WARN, " The file is probably empty");
+
+ logf (LOG_WARN, "empty %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
return 1;
}
}
/* new record */
if (deleteFlag)
{
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
logf (LOG_WARN, "cannot delete record above (seems new)");
return 1;
}
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "add %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "add %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
rec = rec_new (records);
*sysno = rec->sysno;
if (recordAttr->runNumber == zebraExplain_runNumberIncrement (zti, 0))
{
- logf (LOG_LOG, "skipped %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "skipped %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
rec_rm (&rec);
logRecord (0);
return 1;
/* record going to be deleted */
if (!delkeys.buf_used)
{
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
logf (LOG_WARN, "cannot delete file above, storeKeys false");
}
else
{
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
records_deleted++;
if (matchStr)
dict_delete (matchDict, matchStr);
/* record going to be updated */
if (!delkeys.buf_used)
{
- logf (LOG_LOG, "update %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "update %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
logf (LOG_WARN, "cannot update file above, storeKeys false");
}
else
{
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "update %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "update %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
flushRecordKeys (*sysno, 1, &reckeys);
records_updated++;
}
xmalloc (recordAttr->recordSize);
if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
{
- logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s",
- (long) recordOffset, fname);
+ logf (LOG_ERRNO|LOG_FATAL, "seek to " PRINTF_OFF_T " in %s",
+ recordOffset, fname);
exit (1);
}
if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
{
if (zebraExplain_newDatabase (zti, rGroup->databaseName,
rGroup->explainDatabase))
- abort ();
+ return 0;
}
if (rGroup->flagStoreData == -1)
if (rGroup->flagStoreKeys == -1)
rGroup->flagStoreKeys = 0;
-#if ZEBRASDR
- if (rGroup->useSDR)
- {
- ZebraSdrHandle h;
- char xname[128], *xp;
-
- strncpy (xname, fname, 127);
- if (!(xp = strchr (xname, '.')))
- return 0;
- *xp = '\0';
- if (strcmp (xp+1, "sdr.bits"))
- return 0;
-
- h = zebraSdr_open (xname);
- if (!h)
- {
- logf (LOG_WARN, "sdr open %s", xname);
- return 0;
- }
- for (;;)
- {
- unsigned char *buf;
- char sdr_name[128];
- int r, segmentno;
-
- segmentno = zebraSdr_segment (h, 0);
- sprintf (sdr_name, "%%%s.%d", xname, segmentno);
-
-#if 0
- if (segmentno > 20)
- break;
-#endif
- r = zebraSdr_read (h, &buf);
-
- if (!r)
- break;
-
- fi = file_read_start (0);
- fi->sdrbuf = buf;
- fi->sdrmax = r;
- do
- {
- file_begin (fi);
- r = recordExtract (sysno, sdr_name, rGroup, deleteFlag, fi,
- recType, subType);
- } while (r && !sysno && fi->file_more);
- file_read_stop (fi);
- free (buf);
- }
- zebraSdr_close (h);
- return 1;
- }
-#endif
if (sysno && deleteFlag)
fd = -1;
else
reckeys.prevSeqNo = 0;
extractCtrl.init = wordInit;
- extractCtrl.addWord = addRecordKey;
- extractCtrl.addSchema = addSchema;
+ extractCtrl.tokenAdd = addRecordKey;
+ extractCtrl.schemaAdd = addSchema;
extractCtrl.dh = rGroup->dh;
for (i = 0; i<256; i++)
extractCtrl.seqno[i] = 0;