-/* $Id: extract.c,v 1.201 2006-02-08 13:45:44 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: extract.c,v 1.210 2006-05-10 12:31:08 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
#include <fcntl.h>
#include "index.h"
+#include "orddict.h"
#include <direntz.h>
#include <charmap.h>
ctrl->flagShowRecords = !zh->m_flag_rw;
}
-static int file_extract_record(ZebraHandle zh,
- SYSNO *sysno, const char *fname,
- int deleteFlag,
- struct file_read_info *fi,
- int force_update,
- RecType recType,
- void *recTypeClientData)
+static ZEBRA_RES file_extract_record(ZebraHandle zh,
+ SYSNO *sysno, const char *fname,
+ int deleteFlag,
+ struct file_read_info *fi,
+ int force_update,
+ RecType recType,
+ void *recTypeClientData)
{
RecordAttr *recordAttr;
int r;
{
if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
zh->m_explain_database))
- return 0;
+ return ZEBRA_FAIL;
}
if (fi->fd != -1)
yaz_log_init_prefix2 (0);
if (r == RECCTRL_EXTRACT_EOF)
- return 0;
+ return ZEBRA_FAIL;
else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
{
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "fail %s %s " PRINTF_OFF_T, zh->m_record_type,
fname, recordOffset);
}
- return 0;
+ return ZEBRA_FAIL;
}
else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
{
PRINTF_OFF_T, zh->m_record_type,
fname, recordOffset);
}
- return 0;
+ return ZEBRA_FAIL;
}
if (extractCtrl.match_criteria[0])
matchStr = extractCtrl.match_criteria;
if (!matchStr)
{
yaz_log(YLOG_WARN, "Bad match criteria");
- return 0;
+ return ZEBRA_FAIL;
}
}
if (matchStr)
{
- char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+ matchStr);
if (rinfo)
{
assert(*rinfo == sizeof(*sysno));
/* the extraction process returned no information - the record
is probably empty - unless flagShowRecords is in use */
if (!zh->m_flag_rw)
- return 1;
+ return ZEBRA_OK;
if (zh->records_processed < zh->m_file_verbose_limit)
yaz_log (YLOG_WARN, "empty %s %s " PRINTF_OFF_T, zh->m_record_type,
fname, recordOffset);
- return 1;
+ return ZEBRA_OK;
}
if (! *sysno)
yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type,
fname, recordOffset);
yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
- return 1;
+ return ZEBRA_OK;
}
- if (zh->records_processed < zh->m_file_verbose_limit)
- yaz_log (YLOG_LOG, "add %s %s " PRINTF_OFF_T, zh->m_record_type,
- fname, recordOffset);
- rec = rec_new (zh->reg->records);
+ rec = rec_new (zh->reg->records);
+
*sysno = rec->sysno;
-
+
+ if (zh->records_processed < zh->m_file_verbose_limit)
+ {
+ if (matchStr)
+ yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, matchStr);
+ else
+ yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno);
+ }
recordAttr = rec_init_attr (zh->reg->zei, rec);
recordAttr->staticrank = extractCtrl.staticrank;
if (matchStr)
{
- dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+ sizeof(*sysno), sysno);
}
+
+
#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
#else
/* record going to be deleted */
if (zebra_rec_keys_empty(delkeys))
{
- yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
- zh->m_record_type, fname, recordOffset);
+ yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT,
+ zh->m_record_type, fname, recordOffset, *sysno);
yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
}
else
{
if (zh->records_processed < zh->m_file_verbose_limit)
- yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
- zh->m_record_type, fname, recordOffset);
+ {
+ if (matchStr)
+ yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, matchStr);
+ else
+ yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno);
+ }
zh->records_deleted++;
if (matchStr)
- dict_delete (zh->reg->matchDict, matchStr);
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+ }
rec_del (zh->reg->records, &rec);
}
rec_rm (&rec);
logRecord (zh);
- return 1;
+ return ZEBRA_OK;
}
else
{
/* flush new keys for sort&search etc */
if (zh->records_processed < zh->m_file_verbose_limit)
- yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
- zh->m_record_type, fname, recordOffset);
+ {
+ if (matchStr)
+ yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, matchStr);
+ else
+ yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno);
+ }
recordAttr->staticrank = extractCtrl.staticrank;
#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
/* commit this record */
rec_put (zh->reg->records, &rec);
logRecord (zh);
- return 1;
+ return ZEBRA_OK;
}
-int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
- int deleteFlag)
+ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname,
+ int deleteFlag)
{
- int r, i, fd;
+ ZEBRA_RES r = ZEBRA_OK;
+ int i, fd;
char gprefix[128];
char ext[128];
char ext_res[128];
&recTypeClientData)))
{
yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
- return 0;
+ return ZEBRA_FAIL;
}
switch(recType->version)
else
strcpy (full_rep, fname);
-
if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
{
yaz_log (YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
zh->m_record_type = original_record_type;
- return 0;
+ return ZEBRA_FAIL;
}
}
fi = file_read_start (fd);
- do
+ while(1)
{
fi->file_moffset = fi->file_offset;
fi->file_more = 0; /* file_end not called (yet) */
fi->file_offset = fi->file_moffset;
lseek(fi->fd, fi->file_moffset, SEEK_SET);
}
+ if (r != ZEBRA_OK)
+ {
+ break;
+ }
+ if (sysno)
+ {
+ break;
+ }
}
- while (r && !sysno);
file_read_stop (fi);
if (fd != -1)
close (fd);
if (!recType)
{
- yaz_log (YLOG_WARN, "No such record type: %s", zh->m_record_type);
+ yaz_log (YLOG_WARN, "No such record type: %s", recordType);
return ZEBRA_FAIL;
}
}
}
}
- if (matchStr) {
- char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+ if (matchStr)
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+ matchStr);
if (rinfo)
{
assert(*rinfo == sizeof(*sysno));
if (matchStr)
{
- dict_insert (zh->reg->matchDict, matchStr,
- sizeof(*sysno), sysno);
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+ sizeof(*sysno), sysno);
}
+
+
#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
#else
pr_fname, (long) recordOffset);
zh->records_deleted++;
if (matchStr)
- dict_delete (zh->reg->matchDict, matchStr);
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+ }
rec_del (zh->reg->records, &rec);
}
rec_rm (&rec);
return 0;
}
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ struct ord_stat {
+ int no;
+ int ord;
+ struct ord_stat *next;
+ };
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ struct ord_stat *ord_list = 0;
+ struct ord_stat *p;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = key_in.mem[0];
+
+ for (p = ord_list; p ; p = p->next)
+ if (p->ord == ord)
+ {
+ p->no++;
+ break;
+ }
+ if (!p)
+ {
+ p = xmalloc(sizeof(*p));
+ p->no = 1;
+ p->ord = ord;
+ p->next = ord_list;
+ ord_list = p;
+ }
+ }
+
+ p = ord_list;
+ while (p)
+ {
+ struct ord_stat *p1 = p;
+
+ if (is_insert)
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+ else
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+ p = p->next;
+ xfree(p1);
+ }
+ }
+}
+
void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
int cmd,
zebra_rec_keys_t reckeys,
{
ZebraExplainInfo zei = zh->reg->zei;
+ extract_rec_keys_adjust(zh, cmd, reckeys);
+
if (!zh->reg->key_buf)
{
int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
zh->reg->key_buf_used +=
key_SU_encode(ch, (char*)zh->reg->key_buf +
zh->reg->key_buf_used);
-
+
/* copy the 0-terminated stuff from str to output */
memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
zh->reg->key_buf_used += slen;
if (zh->m_staticrank) /* rank config enabled ? */
{
+ if (staticrank < 0)
+ {
+ yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
+ (long) staticrank);
+ staticrank = 0;
+ }
*keyp++ = staticrank;
key_out.len = 4;
}
ord = key.mem[0];
zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
- 0/* db */, 0/* set */, 0/* use */);
+ 0/* db */, 0/* set */, 0/* use */,
+ 0 /* string_index */);
assert(index_type);
zebra_term_untrans_iconv(zh, nmem, index_type,
&dst_term, str);
assert(key.len <= 4 && key.len > 2);
zebraExplain_lookup_ord(zh->reg->zei,
- key.mem[0], &index_type, &db, 0, 0);
+ key.mem[0], &index_type, &db, 0, 0, 0);
seqno = (int) key.mem[key.len-1];
i->prevcmd=-1;
i->keylen=0;
i->encode_handle = iscz1_start();
+ i->decode_handle = iscz1_start();
}
#define OLDENCODE 1
/* and copy & align key so we can mangle */
memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */
+#if 0
+ /* debugging */
+ key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d");
+#endif
+ assert(key.mem[0] >= 0);
+
bp0 = bp++;
iscz1_encode(i->encode_handle, &bp, &src);
+
*bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
{
yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
exit (1);
}
+
+#if 0
+ /* debugging */
+ if (1)
+ {
+ struct it_key key2;
+ const char *src = bp0+1;
+ char *dst = (char*) &key2;
+ iscz1_decode(i->decode_handle, &dst, &src);
+
+ key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d");
+
+ assert(key2.mem[1]);
+ }
+#endif
}
void encode_key_flush (struct encode_info *i, FILE *outf)
-{ /* dummy routine */
+{
iscz1_stop(i->encode_handle);
+ iscz1_stop(i->decode_handle);
}
#else
i->prevseq=0;
}
#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+