/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2001, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: extract.c,v $
- * Revision 1.101 2000-05-15 13:02:39 adam
+ * Revision 1.107 2001-05-28 13:58:48 adam
+ * Call flushSortKeys when record is skipped to fix bad re-use of
+ * sort keys to whatever next record that comes in.
+ *
+ * Revision 1.106 2000/12/05 12:22:53 adam
+ * Termlist source implemented (so that we can index values of XML/SGML
+ * attributes).
+ *
+ * Revision 1.105 2000/12/05 10:01:44 adam
+ * Fixed bug regarding user-defined attribute sets.
+ *
+ * Revision 1.104 2000/09/05 14:04:05 adam
+ * Updates for prefix 'yaz_' for YAZ log functions.
+ *
+ * Revision 1.103 2000/05/18 12:01:36 adam
+ * System call times(2) used again. More 64-bit fixes.
+ *
+ * Revision 1.102 2000/05/15 15:32:33 adam
+ * Added 64 bit file input.
+ *
+ * Revision 1.101 2000/05/15 13:02:39 adam
* Minor change.
*
* Revision 1.100 2000/03/20 19:08:36 adam
#include "zinfo.h"
+#if _FILE_OFFSET_BITS == 64
+#define PRINTF_OFF_T "%Ld"
+#else
+#define PRINTF_OFF_T "%ld"
+#endif
+
#ifndef ZEBRASDR
#define ZEBRASDR 0
#endif
static ZebraExplainInfo zti = NULL;
+
static void logRecord (int showFlag)
{
if (!showFlag)
char *recordCompression;
int record_compression = REC_COMPRESS_NONE;
if (!mem)
- mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024;
+ mem = atoi(res_get_def (common_resource, "memMax", "16"))*1024*1024;
if (mem < 50000)
mem = 50000;
key_buf = (char **) xmalloc (mem);
*dst++ = lead;
+#if SU_SCHEME
+ if ((lead & 3) < 3)
+ {
+ int ch = zebraExplain_lookupSU (zti, attrSet, attrUse);
+ if (ch < 0)
+ {
+ ch = zebraExplain_addSU (zti, attrSet, attrUse);
+ }
+ assert (ch > 0);
+ memcpy (dst, &ch, sizeof(ch));
+ dst += sizeof(ch);
+ }
+#else
if (!(lead & 1))
{
memcpy (dst, &attrSet, sizeof(attrSet));
memcpy (dst, &attrUse, sizeof(attrUse));
dst += sizeof(attrUse);
}
+#endif
*dst++ = p->reg_type;
memcpy (dst, string, length);
dst += length;
while (sk)
{
struct sortKey *sk_next = sk->next;
- sortIdx_type (sortIdx, sk->attrUse);
- sortIdx_add (sortIdx, sk->string, sk->length);
+ if (cmd >= 0)
+ {
+ sortIdx_type (sortIdx, sk->attrUse);
+ sortIdx_add (sortIdx, sk->string, sk->length);
+ }
xfree (sk->string);
xfree (sk);
sk = sk_next;
static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys)
{
+#if SU_SCHEME
+#else
unsigned char attrSet = (unsigned char) -1;
unsigned short attrUse = (unsigned short) -1;
+#endif
int seqno = 0;
int off = 0;
+ int ch = 0;
zebraExplain_recordCountIncrement (zti, cmd ? 1 : -1);
while (off < reckeys->buf_used)
{
const char *src = reckeys->buf + off;
struct it_key key;
- int lead, ch;
+ int lead;
lead = *src++;
+#if SU_SCHEME
+ if ((lead & 3) < 3)
+ {
+ memcpy (&ch, src, sizeof(ch));
+ src += sizeof(ch);
+ }
+#else
if (!(lead & 1))
{
memcpy (&attrSet, src, sizeof(attrSet));
memcpy (&attrUse, src, sizeof(attrUse));
src += sizeof(attrUse);
}
+#endif
if (key_buf_used + 1024 > (ptr_top-ptr_i)*sizeof(char*))
key_flush ();
++ptr_i;
+
key_buf[ptr_top-ptr_i] = (char*)key_buf + key_buf_used;
+#if SU_SCHEME
+#else
ch = zebraExplain_lookupSU (zti, attrSet, attrUse);
if (ch < 0)
+ {
ch = zebraExplain_addSU (zti, attrSet, attrUse);
+ yaz_log (LOG_LOG, "addSU cmd=%d set=%d use=%d SU=%d",
+ cmd, attrSet, attrUse, ch);
+ }
+#endif
assert (ch > 0);
key_buf_used += key_SU_code (ch, ((char*)key_buf) + key_buf_used);
int off = 0;
int startSeq = -1;
int i;
+ int seqno = 0;
+#if SU_SCHEME
+ int chS, ch;
+#else
short attrUse;
char attrSet;
- int seqno = 0;
+#endif
for (i = 0; i<32; i++)
ws[i] = NULL;
-
+
+#if SU_SCHEME
+ chS = zebraExplain_lookupSU (zti, attrSetS, attrUseS);
+ if (chS < 0)
+ return ws;
+#endif
while (off < reckeys->buf_used)
{
int lead;
lead = *src++;
-
+#if SU_SCHEME
+ if ((lead & 3)<3)
+ {
+ memcpy (&ch, src, sizeof(ch));
+ src += sizeof(ch);
+ }
+#else
if (!(lead & 1))
{
memcpy (&attrSet, src, sizeof(attrSet));
memcpy (&attrUse, src, sizeof(attrUse));
src += sizeof(attrUse);
}
+#endif
wstart = src;
while (*src++)
;
memcpy (&seqno, src, sizeof(seqno));
src += sizeof(seqno);
}
- if (attrUseS == attrUse && attrSetS == attrSet)
+ if (
+#if SU_SCHEME
+ ch == chS
+#else
+ attrUseS == attrUse && attrSetS == attrSet
+#endif
+ )
{
int woff;
static void recordLogPreamble (int level, const char *msg, void *info)
{
struct recordLogInfo *p = (struct recordLogInfo *) info;
- FILE *outf = log_file ();
+ FILE *outf = yaz_log_file ();
if (level & LOG_LOG)
return ;
extractCtrl.flagShowRecords = !rGroup->flagRw;
if (!rGroup->flagRw)
- printf ("File: %s %ld\n", fname, (long) recordOffset);
+ printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset);
logInfo.fname = fname;
logInfo.recordOffset = recordOffset;
if (rGroup->flagRw &&
records_processed < rGroup->fileVerboseLimit)
{
- logf (LOG_WARN, "fail %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_WARN, "fail %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
}
return 0;
}
if (!rGroup->flagRw)
return 1;
- logf (LOG_WARN, "empty %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_WARN, "empty %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
return 1;
}
}
/* new record */
if (deleteFlag)
{
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
logf (LOG_WARN, "cannot delete record above (seems new)");
return 1;
}
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "add %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "add %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
rec = rec_new (records);
*sysno = rec->sysno;
if (recordAttr->runNumber == zebraExplain_runNumberIncrement (zti, 0))
{
- logf (LOG_LOG, "skipped %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "skipped %s %s " PRINTF_OFF_T, rGroup->recordType,
+ fname, recordOffset);
+ flushSortKeys (*sysno, -1);
rec_rm (&rec);
logRecord (0);
return 1;
/* record going to be deleted */
if (!delkeys.buf_used)
{
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
logf (LOG_WARN, "cannot delete file above, storeKeys false");
}
else
{
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "delete %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
records_deleted++;
if (matchStr)
dict_delete (matchDict, matchStr);
/* record going to be updated */
if (!delkeys.buf_used)
{
- logf (LOG_LOG, "update %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "update %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
logf (LOG_WARN, "cannot update file above, storeKeys false");
}
else
{
if (records_processed < rGroup->fileVerboseLimit)
- logf (LOG_LOG, "update %s %s %ld", rGroup->recordType,
- fname, (long) recordOffset);
+ logf (LOG_LOG, "update %s %s " PRINTF_OFF_T,
+ rGroup->recordType, fname, recordOffset);
flushRecordKeys (*sysno, 1, &reckeys);
records_updated++;
}
xmalloc (recordAttr->recordSize);
if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
{
- logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s",
- (long) recordOffset, fname);
+ logf (LOG_ERRNO|LOG_FATAL, "seek to " PRINTF_OFF_T " in %s",
+ recordOffset, fname);
exit (1);
}
if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)