X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=index%2Fextract.c;h=fb576a2940f6bc9b33c98cfc73cf21fe91d25703;hb=25aa9d21203318cca1927f906ab4f7370e43a5fe;hp=bad358670e463d12b02b4a23b39f46182b876760;hpb=b3af54f7bf58db2a5d85b6a54b720bf6c5359c1f;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index bad3586..fb576a2 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.241 2006-11-21 22:17:49 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: extract.c,v 1.248 2007-01-22 18:15:03 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -406,6 +406,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, } while(1) { + int more = 0; r = zebra_extract_record_stream(zh, streamp, deleteFlag, 0, /* tst_mode */ @@ -415,11 +416,9 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, fname, 1, /* force_update */ 1, /* allow_update */ - recType, recTypeClientData); - if (r != ZEBRA_OK) - { - break; - } + recType, recTypeClientData, &more); + if (!more) + break; if (sysno) { break; @@ -453,6 +452,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, ZEBRA_RES res; void *clientData; RecType recType = 0; + int more = 0; if (recordType && *recordType) { @@ -481,8 +481,6 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, return ZEBRA_FAIL; } - - zebra_create_stream_mem(&stream, buf, buf_size); res = zebra_extract_record_stream(zh, &stream, @@ -494,7 +492,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, fname, force_update, allow_update, - recType, clientData); + recType, clientData, &more); stream.destroy(&stream); return res; } @@ -511,7 +509,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, int force_update, int allow_update, RecType recType, - void *recTypeClientData) + void *recTypeClientData, + int *more) { zint sysno0 = 0; @@ -520,7 +519,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, int r; const char *matchStr = 0; Record rec; - off_t start_offset = 0; + off_t start_offset = 0, end_offset = 0; const char *pr_fname = fname; /* filename to print .. */ int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; @@ -558,7 +557,6 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extractCtrl.match_criteria[0] = '\0'; extractCtrl.staticrank = 0; - init_extractCtrl(zh, &extractCtrl); extract_set_store_data_prepare(&extractCtrl); @@ -584,9 +582,21 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; + + + end_offset = stream->endf(stream, 0); + + if (!end_offset) + end_offset = stream->tellf(stream); + else + stream->seekf(stream, end_offset); + } - if (!sysno) { + + *more = 1; + if (!sysno) + { sysno = &sysno0; if (match_criteria && *match_criteria) { @@ -597,7 +607,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, zh->m_record_id); if (!matchStr) { - yaz_log(YLOG_WARN, "Bad match criteria (recordID)"); + yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); return ZEBRA_FAIL; } } @@ -640,6 +651,11 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); + if (extractCtrl.staticrank < 0) + { + yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0"); + extractCtrl.staticrank = 0; + } recordAttr->staticrank = extractCtrl.staticrank; if (matchStr) @@ -672,6 +688,10 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); + /* decrease total size */ + zebraExplain_recordBytesIncrement (zh->reg->zei, + - recordAttr->recordSize); + zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], @@ -714,8 +734,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, else { if (show_progress) - yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, - pr_fname, (zint) ZINT_FORMAT); + yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); recordAttr->staticrank = extractCtrl.staticrank; extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, @@ -755,18 +775,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); - /* save file size of original record */ - zebraExplain_recordBytesIncrement (zh->reg->zei, - - recordAttr->recordSize); if (stream) { - off_t end_offset = stream->endf(stream, 0); - - if (!end_offset) - end_offset = stream->tellf(stream); - else - stream->seekf(stream, end_offset); - recordAttr->recordSize = end_offset - start_offset; zebraExplain_recordBytesIncrement(zh->reg->zei, recordAttr->recordSize); @@ -1054,8 +1064,7 @@ ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, struct it_key key; while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) { - char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; + char *dst_term = 0; int ord; zint seqno; int index_type; @@ -1149,6 +1158,20 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } +static void extract_add_staticrank_string(RecWord *p, + const char *str, int length) +{ + char valz[40]; + struct recExtractCtrl *ctrl = p->extractCtrl; + + if (length > sizeof(valz)-1) + length = sizeof(valz)-1; + + memcpy(valz, str, length); + valz[length] = '\0'; + ctrl->staticrank = atozint(valz); +} + static void extract_add_string(RecWord *p, const char *string, int length) { ZebraHandle zh = p->extractCtrl->handle; @@ -1157,9 +1180,7 @@ static void extract_add_string(RecWord *p, const char *string, int length) if (!p->index_name) return; - if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) - extract_add_sort_string(p, string, length); - else + if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type)) { extract_add_index_string(p, zinfo_index_category_index, string, length); @@ -1173,6 +1194,14 @@ static void extract_add_string(RecWord *p, const char *string, int length) &word, zinfo_index_category_alwaysmatches, "", 0); } } + else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) + { + extract_add_sort_string(p, string, length); + } + else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type)) + { + extract_add_staticrank_string(p, string, length); + } } static void extract_add_incomplete_field(RecWord *p) @@ -1180,20 +1209,12 @@ static void extract_add_incomplete_field(RecWord *p) ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; int remain = p->term_len; + int first = 1; const char **map = 0; if (remain > 0) map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); - if (map) - { - if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) - { - /* first in field marker */ - extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); - p->seqno++; - } - } while (map) { char buf[IT_MAX_WORD+1]; @@ -1226,6 +1247,17 @@ static void extract_add_incomplete_field(RecWord *p) } if (!i) return; + + if (first) + { + first = 0; + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); + p->seqno++; + } + } extract_add_string (p, buf, i); p->seqno++; } @@ -1349,24 +1381,30 @@ static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t reckeys) { +#if 0 + yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT, + cmd, sysno); + extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG); +#endif + if (zebra_rec_keys_rewind(reckeys)) { - SortIdx sortIdx = zh->reg->sortIdx; + zebra_sort_index_t si = zh->reg->sort_index; size_t slen; const char *str; struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - sortIdx_type(sortIdx, ord); + zebra_sort_type(si, ord); if (cmd == 1) - sortIdx_add(sortIdx, str, slen); + zebra_sort_add(si, str, slen); else - sortIdx_add(sortIdx, "", 1); + zebra_sort_delete(si); } } }