-/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: extract.c,v 1.215 2006-05-19 13:49:34 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
#include <direntz.h>
#include <charmap.h>
-#if _FILE_OFFSET_BITS == 64
-#define PRINTF_OFF_T "%Ld"
+#ifdef WIN32
+#define PRINTF_OFF_T "%I64d"
+#else
+/* !WIN32 */
+#if SIZEOF_OFF_T == SIZEOF_LONG_LONG
+#define PRINTF_OFF_T "%lld"
#else
#define PRINTF_OFF_T "%ld"
#endif
+#endif
+
+
#define USE_SHELLSORT 0
#if USE_SHELLSORT
}
}
+static void extract_add_index_string (RecWord *p, const char *str, int length);
+
static void extract_set_store_data_prepare(struct recExtractCtrl *p);
static void extract_init (struct recExtractCtrl *p, RecWord *w)
{
w->zebra_maps = p->zebra_maps;
w->seqno = 1;
-#if NATTR
-#else
- w->attrSet = VAL_BIB1;
- w->attrUse = 1016;
-#endif
- w->index_name = 0;
+ w->index_name = "any";
w->index_type = 'w';
w->extractCtrl = p;
w->record_id = 0;
static void searchRecordKey(ZebraHandle zh,
zebra_rec_keys_t reckeys,
- int attrSetS, int attrUseS,
+ const char *index_name,
const char **ws, int ws_length)
{
int i;
- int ch;
+ int ch = -1;
for (i = 0; i<ws_length; i++)
ws[i] = NULL;
- ch = zebraExplain_lookup_attr_su_any_index(zh->reg->zei,
- attrSetS, attrUseS);
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, '0', index_name);
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'p', index_name);
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'w', index_name);
+
if (ch < 0)
return ;
{
const char *ws[32];
char attset_str[64], attname_str[64];
- data1_attset *attset;
int i;
- int attSet = 1, attUse = 1;
int first = 1;
for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
for (; strchr(FILE_MATCH_BLANK, *s); s++)
;
- if (*s == ',')
+ if (*s != ',')
+ strcpy(attname_str, attset_str);
+ else
{
for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
;
attname_str[i++] = *s;
attname_str[i] = '\0';
}
-
- if ((attset = data1_get_attset (zh->reg->dh, attset_str)))
- {
- data1_att *att;
- attSet = attset->reference;
- att = data1_getattbyname(zh->reg->dh, attset, attname_str);
- if (att)
- attUse = att->value;
- else
- attUse = atoi (attname_str);
- }
- searchRecordKey (zh, reckeys, attSet, attUse, ws, 32);
+
+ searchRecordKey (zh, reckeys, attname_str, ws, 32);
if (*s != ')')
{
ctrl->flagShowRecords = !zh->m_flag_rw;
}
+static void all_matches_add(struct recExtractCtrl *ctrl)
+{
+ RecWord word;
+ extract_init(ctrl, &word);
+ word.index_name = "allrecords";
+ word.index_type = 'w';
+ word.seqno = 1;
+ extract_add_index_string (&word, "", 0);
+}
+
static ZEBRA_RES file_extract_record(ZebraHandle zh,
SYSNO *sysno, const char *fname,
int deleteFlag,
RecType recType,
void *recTypeClientData)
{
+ const char *match_str_to_print = "";
RecordAttr *recordAttr;
int r;
const char *matchStr = 0;
/* we are going to read from a file, so prepare the extraction */
zebra_rec_keys_reset(zh->reg->keys);
-#if NATTR
zebra_rec_keys_reset(zh->reg->sortKeys);
-#else
- zh->reg->sortKeys.buf_used = 0;
-#endif
recordOffset = fi->file_moffset;
extractCtrl.handle = zh;
extractCtrl.offset = fi->file_moffset;
}
return ZEBRA_FAIL;
}
+ all_matches_add(&extractCtrl);
if (extractCtrl.match_criteria[0])
- matchStr = extractCtrl.match_criteria;
+ matchStr = extractCtrl.match_criteria;
}
- /* perform match if sysno not known and if match criteria is specified */
+ /* if matchStr is set now - we assume it's printable .
+ For internal matchStr (see below) we don't print */
+ if (matchStr)
+ match_str_to_print = matchStr;
+
+ /* perform internal match if sysno not known and if match criteria is
+ specified already */
if (!sysno)
{
sysnotmp = 0;
if (!matchStr)
{
yaz_log(YLOG_WARN, "Bad match criteria");
+
+ if (zebra_rec_keys_empty(zh->reg->keys))
+ {
+ yaz_log(YLOG_WARN, "And no index keys");
+ }
return ZEBRA_FAIL;
}
}
*sysno = rec->sysno;
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
+ {
yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
" " ZINT_FORMAT " %s" ,
zh->m_record_type,
- fname, recordOffset, *sysno, matchStr);
- else
- yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT ,
- zh->m_record_type,
- fname, recordOffset, *sysno);
-
+ fname, recordOffset, *sysno, match_str_to_print);
+ }
recordAttr = rec_init_attr (zh->reg->zei, rec);
recordAttr->staticrank = extractCtrl.staticrank;
}
-#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
-#else
- extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_inserted++;
/* record already exists */
zebra_rec_keys_t delkeys = zebra_rec_keys_open();
-#if NATTR
zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
-#else
- struct sortKeys sortKeys;
-#endif
rec = rec_get (zh->reg->records, *sysno);
assert (rec);
rec->size[recInfo_delKeys],
0);
-#if NATTR
zebra_rec_keys_set_buf(sortKeys,
rec->info[recInfo_sortKeys],
rec->size[recInfo_sortKeys],
0);
extract_flushSortKeys (zh, *sysno, 0, sortKeys);
-#else
- sortKeys.buf_used = rec->size[recInfo_sortKeys];
- sortKeys.buf = rec->info[recInfo_sortKeys];
- extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
-#endif
-
extract_flushRecordKeys (zh, *sysno, 0, delkeys,
recordAttr->staticrank); /* old values */
if (deleteFlag)
else
{
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
+ {
yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
" " ZINT_FORMAT " %s" ,
zh->m_record_type,
- fname, recordOffset, *sysno, matchStr);
- else
- yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT ,
- zh->m_record_type,
- fname, recordOffset, *sysno);
-
-
-
+ fname, recordOffset, *sysno, match_str_to_print);
+ }
zh->records_deleted++;
if (matchStr)
{
{
/* flush new keys for sort&search etc */
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
- yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type,
- fname, recordOffset, *sysno, matchStr);
- else
- yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT ,
- zh->m_record_type,
- fname, recordOffset, *sysno);
-
+ {
+ yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, match_str_to_print);
+ }
recordAttr->staticrank = extractCtrl.staticrank;
-#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
-#else
- extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_updated++;
}
zebra_rec_keys_close(delkeys);
-#if NATTR
zebra_rec_keys_close(sortKeys);
-#endif
}
/* update file type */
xfree (rec->info[recInfo_fileType]);
/* update sort keys */
xfree (rec->info[recInfo_sortKeys]);
-#if NATTR
zebra_rec_keys_get_buf(zh->reg->sortKeys,
&rec->info[recInfo_sortKeys],
&rec->size[recInfo_sortKeys]);
-#else
- rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
- rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
- zh->reg->sortKeys.buf = NULL;
- zh->reg->sortKeys.buf_max = 0;
-#endif
/* save file size of original record */
zebraExplain_recordBytesIncrement (zh->reg->zei,
extractCtrl.fh = &fc;
zebra_rec_keys_reset(zh->reg->keys);
-
-#if NATTR
zebra_rec_keys_reset(zh->reg->sortKeys);
-#else
- zh->reg->sortKeys.buf_used = 0;
-#endif
+
if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
{
if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
return ZEBRA_FAIL;
}
+ all_matches_add(&extractCtrl);
+
if (extractCtrl.match_criteria[0])
match_criteria = extractCtrl.match_criteria;
}
-#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
-#else
- extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-#endif
-
-#if 0
- print_rec_keys(zh, zh->reg->keys);
-#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_inserted++;
{
/* record already exists */
zebra_rec_keys_t delkeys = zebra_rec_keys_open();
-#if NATTR
zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
-#else
- struct sortKeys sortKeys;
-#endif
-
if (!allow_update)
{
yaz_log (YLOG_LOG, "skipped %s %s %ld",
rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
0);
-#if NATTR
zebra_rec_keys_set_buf(sortKeys,
rec->info[recInfo_sortKeys],
rec->size[recInfo_sortKeys],
0);
-#else
- sortKeys.buf_used = rec->size[recInfo_sortKeys];
- sortKeys.buf = rec->info[recInfo_sortKeys];
-#endif
-#if NATTR
extract_flushSortKeys (zh, *sysno, 0, sortKeys);
-#else
- extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
-#endif
extract_flushRecordKeys (zh, *sysno, 0, delkeys,
recordAttr->staticrank);
if (delete_flag)
yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
pr_fname, (long) recordOffset);
recordAttr->staticrank = extractCtrl.staticrank;
-#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
-#else
- extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_updated++;
}
zebra_rec_keys_close(delkeys);
-#if NATTR
zebra_rec_keys_close(sortKeys);
-#endif
}
/* update file type */
xfree (rec->info[recInfo_fileType]);
/* update sort keys */
xfree (rec->info[recInfo_sortKeys]);
-#if NATTR
zebra_rec_keys_get_buf(zh->reg->sortKeys,
&rec->info[recInfo_sortKeys],
&rec->size[recInfo_sortKeys]);
-#else
- rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
- rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
- zh->reg->sortKeys.buf = NULL;
- zh->reg->sortKeys.buf_max = 0;
-#endif
/* save file size of original record */
zebraExplain_recordBytesIncrement (zh->reg->zei,
}
zebra_rec_keys_reset(zh->reg->keys);
-
-#if NATTR
zebra_rec_keys_reset(zh->reg->sortKeys);
-#else
- zh->reg->sortKeys.buf_used = 0;
-#endif
+
extractCtrl.init = extract_init;
extractCtrl.tokenAdd = extract_token_add;
extractCtrl.schemaAdd = extract_schema_add;
{
zebra_rec_keys_t delkeys = zebra_rec_keys_open();
-#if NATTR
zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
-#else
- struct sortKeys sortkeys;
-#endif
zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
0);
extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
zebra_rec_keys_close(delkeys);
-#if NATTR
+
zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
rec->size[recInfo_sortKeys],
0);
extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
zebra_rec_keys_close(sortkeys);
-#else
- sortkeys.buf_used = rec->size[recInfo_sortKeys];
- sortkeys.buf = rec->info[recInfo_sortKeys];
- extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
-#endif
}
extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
-#if NATTR
extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
-#else
- extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
-#endif
xfree (rec->info[recInfo_delKeys]);
zebra_rec_keys_get_buf(zh->reg->keys,
&rec->size[recInfo_delKeys]);
xfree (rec->info[recInfo_sortKeys]);
-#if NATTR
zebra_rec_keys_get_buf(zh->reg->sortKeys,
&rec->info[recInfo_sortKeys],
&rec->size[recInfo_sortKeys]);
-#else
- rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
- rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
- zh->reg->sortKeys.buf = NULL;
- zh->reg->sortKeys.buf_max = 0;
-#endif
return 0;
}
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ struct ord_stat {
+ int no;
+ int ord;
+ struct ord_stat *next;
+ };
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ struct ord_stat *ord_list = 0;
+ struct ord_stat *p;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = key_in.mem[0];
+
+ for (p = ord_list; p ; p = p->next)
+ if (p->ord == ord)
+ {
+ p->no++;
+ break;
+ }
+ if (!p)
+ {
+ p = xmalloc(sizeof(*p));
+ p->no = 1;
+ p->ord = ord;
+ p->next = ord_list;
+ ord_list = p;
+ }
+ }
+
+ p = ord_list;
+ while (p)
+ {
+ struct ord_stat *p1 = p;
+
+ if (is_insert)
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+ else
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+ p = p->next;
+ xfree(p1);
+ }
+ }
+}
+
void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
int cmd,
zebra_rec_keys_t reckeys,
{
ZebraExplainInfo zei = zh->reg->zei;
+ extract_rec_keys_adjust(zh, cmd, reckeys);
+
if (!zh->reg->key_buf)
{
int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
zh->reg->key_buf_used +=
key_SU_encode(ch, (char*)zh->reg->key_buf +
zh->reg->key_buf_used);
-
+
/* copy the 0-terminated stuff from str to output */
memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
zh->reg->key_buf_used += slen;
}
}
-void extract_add_index_string (RecWord *p, const char *str, int length)
+void extract_add_index_string(RecWord *p, const char *str, int length)
{
struct it_key key;
ZebraExplainInfo zei = zh->reg->zei;
int ch;
- if (p->index_name)
- {
- ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
- if (ch < 0)
- ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
- }
- else
- {
-#if NATTR
- return;
-#else
- ch = zebraExplain_lookup_attr_su(zei, p->index_type,
- p->attrSet, p->attrUse);
- if (ch < 0)
- ch = zebraExplain_add_attr_su(zei, p->index_type,
- p->attrSet, p->attrUse);
-#endif
- }
+ if (!p->index_name)
+ return;
+
+ ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
+
key.len = 4;
key.mem[0] = ch;
key.mem[1] = p->record_id;
zebra_rec_keys_write(zh->reg->keys, str, length, &key);
}
-#if NATTR
-static void extract_add_sort_string (RecWord *p, const char *str, int length)
+static void extract_add_sort_string(RecWord *p, const char *str, int length)
{
struct it_key key;
ZebraExplainInfo zei = zh->reg->zei;
int ch;
- if (p->index_name)
- {
- ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
- if (ch < 0)
- ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
- }
- else
- {
- return;
- }
+ if (!p->index_name)
+ return;
+
+ ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
key.len = 4;
key.mem[0] = ch;
key.mem[1] = p->record_id;
key.mem[2] = p->section_id;
key.mem[3] = p->seqno;
- zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
-}
-#else
-static void extract_add_sort_string (RecWord *p, const char *str, int length)
-{
- ZebraHandle zh = p->extractCtrl->handle;
- struct sortKeys *sk = &zh->reg->sortKeys;
- int off = 0;
-
- while (off < sk->buf_used)
+#if 0
+ if (1)
{
- int set, use, slen;
+ char strz[80];
+ int i;
- off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
- off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
- off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
- off += slen;
- if (p->attrSet == set && p->attrUse == use)
- return;
+ strz[0] = 0;
+ for (i = 0; i<length && i < 20; i++)
+ sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
+ /* just for debugging .. */
+ yaz_log(YLOG_LOG, "add: set=%d use=%d "
+ "record_id=%lld section_id=%lld seqno=%lld %s",
+ p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
+ strz);
}
- assert (off == sk->buf_used);
-
- if (sk->buf_used + IT_MAX_WORD > sk->buf_max)
- {
- char *b;
-
- b = (char *) xmalloc (sk->buf_max += 128000);
- if (sk->buf_used > 0)
- memcpy (b, sk->buf, sk->buf_used);
- xfree (sk->buf);
- sk->buf = b;
- }
- off += key_SU_encode(p->attrSet, sk->buf + off);
- off += key_SU_encode(p->attrUse, sk->buf + off);
- off += key_SU_encode(length, sk->buf + off);
- memcpy (sk->buf + off, str, length);
- sk->buf_used = off + length;
-}
#endif
+ zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
+}
void extract_add_string (RecWord *p, const char *string, int length)
{
zebraExplain_addSchema (zh->reg->zei, oid);
}
-#if NATTR
-#error not done yet with zebra_rec_keys_t
void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, struct recKeys *reckeys)
+ int cmd, zebra_rec_keys_t reckeys)
{
- SortIdx sortIdx = zh->reg->sortIdx;
- void *decode_handle = iscz1_start();
- int off = 0;
- int ch = 0;
-
- while (off < reckeys->buf_used)
+ if (zebra_rec_keys_rewind(reckeys))
{
- const char *src = reckeys->buf + off;
- struct it_key key;
- char *dst = (char*) &key;
-
- iscz1_decode(decode_handle, &dst, &src);
- assert(key.len == 4);
-
- ch = (int) key.mem[0]; /* ordinal for field/use/attribute */
-
- sortIdx_type(sortIdx, ch);
- if (cmd == 1)
- sortIdx_add(sortIdx, src, strlen(src));
- else
- sortIdx_add(sortIdx, "", 1);
-
- src += strlen(src);
- src++;
-
- off = src - reckeys->buf;
- }
- assert (off == reckeys->buf_used);
- iscz1_stop(decode_handle);
-}
-#else
-void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, struct sortKeys *sk)
-{
- SortIdx sortIdx = zh->reg->sortIdx;
- int off = 0;
+ SortIdx sortIdx = zh->reg->sortIdx;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
- sortIdx_sysno (sortIdx, sysno);
+ sortIdx_sysno (sortIdx, sysno);
- while (off < sk->buf_used)
- {
- int set, use, slen;
-
- off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
- off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
- off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
-
- sortIdx_type(sortIdx, use);
- if (cmd == 1)
- sortIdx_add(sortIdx, sk->buf + off, slen);
- else
- sortIdx_add(sortIdx, "", 1);
- off += slen;
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = (int) key_in.mem[0];
+
+ sortIdx_type(sortIdx, ord);
+ if (cmd == 1)
+ sortIdx_add(sortIdx, str, slen);
+ else
+ sortIdx_add(sortIdx, "", 1);
+ }
}
}
-#endif
void encode_key_init (struct encode_info *i)
{