-/* $Id: extract.c,v 1.248 2007-01-22 18:15:03 adam Exp $
+/* $Id: extract.c,v 1.251 2007-03-13 13:46:11 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add (RecWord *p);
+static void check_log_limit(ZebraHandle zh)
+{
+ if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit)
+ {
+ yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
+ zh->m_file_verbose_limit);
+ }
+}
+
static void logRecord (ZebraHandle zh)
{
+ check_log_limit(zh);
++zh->records_processed;
if (!(zh->records_processed % 1000))
{
"", 0);
}
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData);
+
+
ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
int deleteFlag)
{
}
if (!zh->m_record_type)
{
- if (zh->records_processed < zh->m_file_verbose_limit)
+ check_log_limit(zh);
+ if (zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit)
yaz_log (YLOG_LOG, "? %s", fname);
+ zh->records_skipped++;
return 0;
}
/* determine match criteria */
streamp = &stream;
zebra_create_stream_fd(streamp, fd, 0);
}
- while(1)
- {
- int more = 0;
- r = zebra_extract_record_stream(zh, streamp,
- deleteFlag,
- 0, /* tst_mode */
- zh->m_record_type,
- sysno,
- 0, /*match_criteria */
- fname,
- 1, /* force_update */
- 1, /* allow_update */
- recType, recTypeClientData, &more);
- if (!more)
- break;
- if (sysno)
- {
- break;
- }
- }
+ r = zebra_extract_records_stream(zh, streamp,
+ deleteFlag,
+ 0, /* tst_mode */
+ zh->m_record_type,
+ sysno,
+ 0, /*match_criteria */
+ fname,
+ 1, /* force_update */
+ 1, /* allow_update */
+ recType, recTypeClientData);
if (streamp)
stream.destroy(streamp);
zh->m_record_type = original_record_type;
ZEBRA_RES res;
void *clientData;
RecType recType = 0;
- int more = 0;
if (recordType && *recordType)
{
zebra_create_stream_mem(&stream, buf, buf_size);
- res = zebra_extract_record_stream(zh, &stream,
- delete_flag,
- test_mode,
- recordType,
- sysno,
- match_criteria,
- fname,
- force_update,
- allow_update,
- recType, clientData, &more);
+ res = zebra_extract_records_stream(zh, &stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, clientData);
stream.destroy(&stream);
return res;
}
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData)
+{
+ ZEBRA_RES res = ZEBRA_OK;
+ while (1)
+ {
+ int more = 0;
+ res = zebra_extract_record_stream(zh, stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, recTypeClientData, &more);
+ if (!more)
+ {
+ res = ZEBRA_OK;
+ break;
+ }
+ if (res != ZEBRA_OK)
+ break;
+ if (sysno)
+ break;
+ }
+ return res;
+}
+
ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
struct ZebraRecStream *stream,
Record rec;
off_t start_offset = 0, end_offset = 0;
const char *pr_fname = fname; /* filename to print .. */
- int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
+ int show_progress = zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit ? 1:0;
zebra_init_log_level();
extract_set_store_data_prepare(&extractCtrl);
r = (*recType->extract)(recTypeClientData, &extractCtrl);
-
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+
+ switch (r)
{
+ case RECCTRL_EXTRACT_EOF:
+ return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_ERROR_GENERIC:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: generic");
return ZEBRA_FAIL;
- }
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
- {
+ case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: no such filter");
return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_SKIP:
+ if (show_progress)
+ yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
+ *more = 1;
+
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+
+ return ZEBRA_OK;
+ case RECCTRL_EXTRACT_OK:
+ break;
+ default:
+ yaz_log (YLOG_WARN, "extract error: unknown error: %d", r);
+ return ZEBRA_FAIL;
}
-
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+ else
+ end_offset = stream->tellf(stream);
+
all_matches_add(&extractCtrl);
if (extractCtrl.match_criteria[0])
match_criteria = extractCtrl.match_criteria;
-
-
- end_offset = stream->endf(stream, 0);
-
- if (!end_offset)
- end_offset = stream->tellf(stream);
- else
- stream->seekf(stream, end_offset);
-
}
-
*more = 1;
if (!sysno)
{
yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
return ZEBRA_FAIL;
}
+ else if (!force_update)
+ {
+ yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ yaz_log (YLOG_WARN, "cannot update record above (seems new)");
+ return ZEBRA_FAIL;
+ }
if (show_progress)
yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
(zint) start_offset);