-/* $Id: extract.c,v 1.264 2007-10-29 13:43:57 adam Exp $
+/* $Id: extract.c,v 1.267 2007-10-31 16:56:14 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
}
}
-static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
- int cmd, zebra_rec_keys_t reckeys,
- zint staticrank);
static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
int cmd, zebra_rec_keys_t skp);
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
{
- int i;
- for (i = 0; i<256; i++)
- {
- if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
- ctrl->seqno[i] = 1;
- else
- ctrl->seqno[i] = 0;
- }
ctrl->flagShowRecords = !zh->m_flag_rw;
}
{
w->seqno = 1;
w->index_name = "any";
- w->index_type = 'w';
+ w->index_type = "w";
w->extractCtrl = p;
w->record_id = 0;
w->section_id = 0;
};
-static void snippet_add_complete_field(RecWord *p, int ord)
+static void snippet_add_complete_field(RecWord *p, int ord,
+ zebra_map_t zm)
{
struct snip_rec_info *h = p->extractCtrl->handle;
- ZebraHandle zh = h->zh;
const char *b = p->term_buf;
char buf[IT_MAX_WORD+1];
const char *last = 0;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 1);
+ map = zebra_maps_input(zm, &b, remain, 1);
while (remain > 0 && i < IT_MAX_WORD)
{
{
int first = i ? 0 : 1; /* first position */
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type,
- &b, remain, first);
+ map = zebra_maps_input(zm, &b, remain, first);
}
else
map = 0;
remain = p->term_len - (b - p->term_buf);
if (remain > 0)
{
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
- remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
}
else
map = 0;
start, last - start);
}
-static void snippet_add_incomplete_field(RecWord *p, int ord)
+static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm)
{
struct snip_rec_info *h = p->extractCtrl->handle;
- ZebraHandle zh = h->zh;
const char *b = p->term_buf;
int remain = p->term_len;
int first = 1;
const char *last = b;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
while (map)
{
remain = p->term_len - (b - p->term_buf);
last = b;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
- remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
else
map = 0;
}
remain = p->term_len - (b - p->term_buf);
last = b;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
else
map = 0;
}
if (first)
{
first = 0;
- if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ if (zebra_maps_is_first_in_field(zm))
{
/* first in field marker */
p->seqno++;
{
struct snip_rec_info *h = p->extractCtrl->handle;
ZebraHandle zh = h->zh;
+ zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, p->index_type);
- if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
+ if (zm && zebra_maps_is_index(zm))
{
ZebraExplainInfo zei = zh->reg->zei;
int ch = zebraExplain_lookup_attr_str(
zei, zinfo_index_category_index, p->index_type, p->index_name);
- if(zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type))
- snippet_add_complete_field(p, ch);
+ if (zebra_maps_is_complete(zm))
+ snippet_add_complete_field(p, ch, zm);
else
- snippet_add_incomplete_field(p, ch);
+ snippet_add_incomplete_field(p, ch, zm);
}
}
ws[i] = NULL;
if (ch < 0)
- ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, '0', index_name);
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0", index_name);
if (ch < 0)
- ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'p', index_name);
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "p", index_name);
if (ch < 0)
- ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'w', index_name);
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "w", index_name);
if (ch < 0)
return ;
RecWord word;
extract_init(ctrl, &word);
word.index_name = "_ALLRECORDS";
- word.index_type = 'w';
+ word.index_type = "w";
word.seqno = 1;
extract_add_index_string(&word, zinfo_index_category_alwaysmatches,
"", 0);
char keystr[200]; /* room for zints to print */
char *dst_term = 0;
int ord = CAST_ZINT_TO_INT(key.mem[0]);
- int index_type, i;
+ const char *index_type;
+ int i;
const char *string_index;
zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
{
sprintf(dst_buf + strlen(dst_buf), " %d", str[i] & 0xff);
}
- yaz_log(level, "%s%c %s %s", keystr, index_type,
+ yaz_log(level, "%s%s %s %s", keystr, index_type,
string_index, dst_buf);
}
else
- yaz_log(level, "%s%c %s \"%s\"", keystr, index_type,
+ yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
string_index, dst_term);
nmem_reset(nmem);
yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
}
-void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd,
- zebra_rec_keys_t reckeys,
- zint staticrank)
-{
- ZebraExplainInfo zei = zh->reg->zei;
-
- extract_rec_keys_adjust(zh, cmd, reckeys);
-
- if (log_level_details)
- {
- yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s",
- sysno, cmd ? "insert" : "delete");
- extract_rec_keys_log(zh, cmd, reckeys, log_level_details);
- }
-
- if (!zh->reg->key_block)
- {
- int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
- const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
- int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
- zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
- }
- zebraExplain_recordCountIncrement(zei, cmd ? 1 : -1);
-
-#if 0
- yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd);
- print_rec_keys(zh, reckeys);
-#endif
- if (zebra_rec_keys_rewind(reckeys))
- {
- size_t slen;
- const char *str;
- struct it_key key_in;
- while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
- {
- key_block_write(zh->reg->key_block, sysno,
- &key_in, cmd, str, slen,
- staticrank, zh->m_staticrank);
- }
- }
-}
ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh,
zebra_rec_keys_t reckeys,
char *dst_term = 0;
int ord;
zint seqno;
- int index_type;
+ const char *index_type;
assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
seqno = key.mem[key.len-1];
{
char dst_buf[IT_MAX_WORD];
zint seqno;
- int index_type;
+ const char *index_type;
int ord = CAST_ZINT_TO_INT(key.mem[0]);
const char *db = 0;
assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
ctrl->staticrank = atozint(valz);
}
-static void extract_add_string(RecWord *p, const char *string, int length)
+static void extract_add_string(RecWord *p, zebra_map_t zm,
+ const char *string, int length)
{
- ZebraHandle zh = p->extractCtrl->handle;
assert(length > 0);
if (!p->index_name)
return;
- if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
+ if (zebra_maps_is_index(zm))
{
extract_add_index_string(p, zinfo_index_category_index,
string, length);
- if (zebra_maps_is_alwaysmatches(zh->reg->zebra_maps, p->index_type))
+ if (zebra_maps_is_alwaysmatches(zm))
{
RecWord word;
memcpy(&word, p, sizeof(word));
&word, zinfo_index_category_alwaysmatches, "", 0);
}
}
- else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
+ else if (zebra_maps_is_sort(zm))
{
extract_add_sort_string(p, string, length);
}
- else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type))
+ else if (zebra_maps_is_staticrank(zm))
{
extract_add_staticrank_string(p, string, length);
}
}
-static void extract_add_incomplete_field(RecWord *p)
+static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm)
{
- ZebraHandle zh = p->extractCtrl->handle;
const char *b = p->term_buf;
int remain = p->term_len;
int first = 1;
const char **map = 0;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
while (map)
{
{
remain = p->term_len - (b - p->term_buf);
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
- remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
else
map = 0;
}
buf[i++] = *(cp++);
remain = p->term_len - (b - p->term_buf);
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
else
map = 0;
}
if (first)
{
first = 0;
- if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ if (zebra_maps_is_first_in_field(zm))
{
/* first in field marker */
- extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
+ extract_add_string(p, zm, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
p->seqno++;
}
}
- extract_add_string(p, buf, i);
+ extract_add_string(p, zm, buf, i);
p->seqno++;
}
}
-static void extract_add_complete_field(RecWord *p)
+static void extract_add_complete_field(RecWord *p, zebra_map_t zm)
{
- ZebraHandle zh = p->extractCtrl->handle;
const char *b = p->term_buf;
char buf[IT_MAX_WORD+1];
const char **map = 0;
int i = 0, remain = p->term_len;
if (remain > 0)
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 1);
+ map = zebra_maps_input(zm, &b, remain, 1);
while (remain > 0 && i < IT_MAX_WORD)
{
if (remain > 0)
{
int first = i ? 0 : 1; /* first position */
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, first);
+ map = zebra_maps_input(zm, &b, remain, first);
}
else
map = 0;
remain = p->term_len - (b - p->term_buf);
if (remain > 0)
{
- map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
- remain, 0);
+ map = zebra_maps_input(zm, &b, remain, 0);
}
else
map = 0;
}
if (!i)
return;
- extract_add_string(p, buf, i);
+ extract_add_string(p, zm, buf, i);
}
static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type,
static void extract_token_add2(RecWord *p)
{
- zebra_index_type_t type;
ZebraHandle zh = p->extractCtrl->handle;
- char type_tmp[2];
- type_tmp[0] = p->index_type;
- type_tmp[1] = '\0';
- type = zebra_index_type_get(zh->reg->index_types, type_tmp);
+ zebra_index_type_t type = zebra_index_type_get(zh->reg->index_types, p->index_type);
if (type)
{
if (zebra_index_type_is_index(type))
static void extract_token_add(RecWord *p)
{
ZebraHandle zh = p->extractCtrl->handle;
+ zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type);
WRBUF wrbuf;
if (log_level_details)
{
yaz_log(log_level_details, "extract_token_add "
- "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s",
+ "type=%s index=%s seqno=" ZINT_FORMAT " s=%.*s",
p->index_type, p->index_name,
p->seqno, p->term_len, p->term_buf);
}
- if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0,
- p->term_buf, p->term_len)))
+ if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len)))
{
p->term_buf = wrbuf_buf(wrbuf);
p->term_len = wrbuf_len(wrbuf);
}
- if (zebra_maps_is_complete(zh->reg->zebra_maps, p->index_type))
- extract_add_complete_field(p);
+ if (zebra_maps_is_complete(zm))
+ extract_add_complete_field(p, zm);
else
- extract_add_incomplete_field(p);
+ extract_add_incomplete_field(p, zm);
}
static void extract_set_store_data_cb(struct recExtractCtrl *p,