X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=index%2Fextract.c;h=fbd37acd3d4d0677cada9808fcca0cfc5320d37e;hb=1ef7d46cdc719bc71b84ea81a1e7b467f9669fba;hp=06eb1b4eebc2a9eb1e4574e1bf313c87b9106171;hpb=60ef5f615dd758097e842067a875307df11e4f07;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 06eb1b4..fbd37ac 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $ +/* $Id: extract.c,v 1.201 2006-02-08 13:45:44 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -206,6 +206,8 @@ static void file_end (void *handle, off_t offset) } } +#define FILE_MATCH_BLANK "\t " + static char *fileMatchStr (ZebraHandle zh, zebra_rec_keys_t reckeys, const char *fname, const char *spec) @@ -216,8 +218,8 @@ static char *fileMatchStr (ZebraHandle zh, while (1) { - while (*s == ' ' || *s == '\t') - s++; + for (; *s && strchr(FILE_MATCH_BLANK, *s); s++) + ; if (!*s) break; if (*s == '(') @@ -226,21 +228,26 @@ static char *fileMatchStr (ZebraHandle zh, char attset_str[64], attname_str[64]; data1_attset *attset; int i; - char matchFlag[32]; int attSet = 1, attUse = 1; int first = 1; - - s++; - for (i = 0; *s && *s != ',' && *s != ')'; s++) - if (i < 63) + + for (s++; strchr(FILE_MATCH_BLANK, *s); s++) + ; + for (i = 0; *s && *s != ',' && *s != ')' && + !strchr(FILE_MATCH_BLANK, *s); s++) + if (i+1 < sizeof(attset_str)) attset_str[i++] = *s; attset_str[i] = '\0'; - + + for (; strchr(FILE_MATCH_BLANK, *s); s++) + ; if (*s == ',') { - s++; - for (i = 0; *s && *s != ')'; s++) - if (i < 63) + for (s++; strchr(FILE_MATCH_BLANK, *s); s++) + ; + for (i = 0; *s && *s != ')' && + !strchr(FILE_MATCH_BLANK, *s); s++) + if (i+1 < sizeof(attname_str)) attname_str[i++] = *s; attname_str[i] = '\0'; } @@ -257,12 +264,7 @@ static char *fileMatchStr (ZebraHandle zh, } searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); - if (*s == ')') - { - for (i = 0; i<32; i++) - matchFlag[i] = 1; - } - else + if (*s != ')') { yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s", spec, zh->m_group ? zh->m_group : "none"); @@ -271,7 +273,7 @@ static char *fileMatchStr (ZebraHandle zh, s++; for (i = 0; i<32; i++) - if (matchFlag[i] && ws[i]) + if (ws[i]) { if (first) { @@ -294,12 +296,12 @@ static char *fileMatchStr (ZebraHandle zh, char special[64]; const char *spec_src = NULL; const char *s1 = ++s; - while (*s1 && *s1 != ' ' && *s1 != '\t') + while (*s1 && !strchr(FILE_MATCH_BLANK, *s1)) s1++; spec_len = s1 - s; - if (spec_len > 63) - spec_len = 63; + if (spec_len > sizeof(special)-1) + spec_len = sizeof(special)-1; memcpy (special, s, spec_len); special[spec_len] = '\0'; s = s1; @@ -329,7 +331,7 @@ static char *fileMatchStr (ZebraHandle zh, while (*s && *s != stopMarker) { - if (i < 63) + if (i+1 < sizeof(tmpString)) tmpString[i++] = *s++; } if (*s) @@ -535,7 +537,11 @@ static int file_extract_record(ZebraHandle zh, { dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -582,7 +588,7 @@ static int file_extract_record(ZebraHandle zh, { yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); - yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false"); + yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)"); } else { @@ -600,25 +606,19 @@ static int file_extract_record(ZebraHandle zh, } else { - /* record going to be updated */ - if (zebra_rec_keys_empty(delkeys)) - { + /* flush new keys for sort&search etc */ + if (zh->records_processed < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); - yaz_log (YLOG_WARN, "cannot update file above, storeKeys false"); - } - else - { - /* flush new keys for sort&search etc */ - if (zh->records_processed < zh->m_file_verbose_limit) - yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, - zh->m_record_type, fname, recordOffset); - recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else + extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); - zh->records_updated++; - } + zh->records_updated++; } zebra_rec_keys_close(delkeys); #if NATTR @@ -652,10 +652,16 @@ static int file_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -841,6 +847,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, int force_update, int allow_update) { + SYSNO sysno0 = 0; RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int r; @@ -938,14 +945,14 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "extract error: no such filter"); return ZEBRA_FAIL; } - /* match criteria */ - matchStr = NULL; if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; - if (! *sysno) { - char *rinfo; + if (!sysno) { + + sysno = &sysno0; + if (match_criteria && *match_criteria) { matchStr = match_criteria; } else { @@ -960,7 +967,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } } if (matchStr) { - rinfo = dict_lookup (zh->reg->matchDict, matchStr); + char *rinfo = dict_lookup (zh->reg->matchDict, matchStr); if (rinfo) { assert(*rinfo == sizeof(*sysno)); @@ -981,8 +988,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* new record */ if (delete_flag) { - if (show_progress) - yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, + yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, pr_fname, (long) recordOffset); yaz_log (YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; @@ -1002,7 +1008,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif #if 0 print_rec_keys(zh, zh->reg->keys); @@ -1023,8 +1033,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, if (!allow_update) { - if (show_progress) - yaz_log (YLOG_LOG, "skipped %s %s %ld", + yaz_log (YLOG_LOG, "skipped %s %s %ld", recordType, pr_fname, (long) recordOffset); logRecord(zh); return ZEBRA_FAIL; @@ -1039,8 +1048,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); +#if NATTR + zebra_rec_keys_set_buf(sortKeys, + rec->info[recInfo_sortKeys], + rec->size[recInfo_sortKeys], + 0); +#else sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; +#endif #if NATTR extract_flushSortKeys (zh, *sysno, 0, sortKeys); @@ -1054,13 +1070,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* record going to be deleted */ if (zebra_rec_keys_empty(delkeys)) { - if (show_progress) - { - yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, - pr_fname, (long) recordOffset); - yaz_log (YLOG_WARN, "cannot delete file above, " - "storeKeys false"); - } + yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, + pr_fname, (long) recordOffset); + yaz_log (YLOG_WARN, "cannot delete file above, " + "storeKeys false (3)"); } else { @@ -1078,27 +1091,18 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } else { - /* record going to be updated */ - if (zebra_rec_keys_empty(delkeys)) - { - if (show_progress) - { - yaz_log (YLOG_LOG, "update %s %s %ld", recordType, - pr_fname, (long) recordOffset); - yaz_log (YLOG_WARN, "cannot update file above, storeKeys false"); - } - } - else - { - if (show_progress) + if (show_progress) yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); - recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else + extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); - zh->records_updated++; - } + zh->records_updated++; } zebra_rec_keys_close(delkeys); #if NATTR @@ -1131,10 +1135,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1233,7 +1243,7 @@ int explain_extract (void *handle, Record rec, data1_node *n) zebra_rec_keys_t delkeys = zebra_rec_keys_open(); #if NATTR - zebra_rec_keys_t sortkeys = zzebra_rec_keys_open(); + zebra_rec_keys_t sortkeys = zebra_rec_keys_open(); #else struct sortKeys sortkeys; #endif @@ -1269,10 +1279,16 @@ int explain_extract (void *handle, Record rec, data1_node *n) &rec->size[recInfo_delKeys]); xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif return 0; } @@ -1495,14 +1511,6 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -void extract_add_it_key (ZebraHandle zh, - zebra_rec_keys_t *keys, - int reg_type, - const char *str, int slen, struct it_key *key) -{ - zebra_rec_keys_write(*keys, reg_type, str, slen, key); -} - ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets) @@ -1598,16 +1606,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length) key.mem[3] = p->seqno; #if 0 - /* just for debugging .. */ - yaz_log(YLOG_LOG, "add: set=%d use=%d " - "record_id=%lld section_id=%lld seqno=%lld", - p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); + if (1) + { + char strz[80]; + int i; + + strz[0] = 0; + for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, + strz); + } #endif - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->keys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->keys, str, length, &key); } #if NATTR @@ -1635,10 +1650,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) key.mem[2] = p->section_id; key.mem[3] = p->seqno; - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->sortKeys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } #else static void extract_add_sort_string (RecWord *p, const char *str, int length)