From: Adam Dickmeiss Date: Tue, 22 Aug 2006 13:39:22 +0000 (+0000) Subject: Common stream reader interface for record filters (struct ZebraRecStream). X-Git-Tag: ZEBRA.2.0.2~49 X-Git-Url: http://sru.miketaylor.org.uk/cgi-bin?a=commitdiff_plain;h=852d5f1f9aa0a70f7e54a68143ee86752394a2f2;p=idzebra-moved-to-github.git Common stream reader interface for record filters (struct ZebraRecStream). --- diff --git a/NEWS b/NEWS index 902a796..184eae0 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +Common stream reader interface for record filters (struct ZebraRecStream). + +Debian package fix: packages idzebra-2.0 + libidzebra-2.0-modules did +not depend properly on sub packages. + Experimental segment facility (for matching of words within one field/segment). diff --git a/examples/gils/zebra.cfg b/examples/gils/zebra.cfg index 7bef8ca..7fc2886 100644 --- a/examples/gils/zebra.cfg +++ b/examples/gils/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.13 2006-07-03 14:27:05 adam Exp $ +# $Id: zebra.cfg,v 1.14 2006-08-22 13:39:23 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: .:../../tab @@ -18,6 +18,8 @@ perm.anonymous: rw #rank: zvrank recordId: (bib-1,title) +storedata: 1 + modulePath: ../../index/.libs #shadow: shadow:100M # register: register:100M diff --git a/include/idzebra/recctrl.h b/include/idzebra/recctrl.h index 3c45251..4a8f384 100644 --- a/include/idzebra/recctrl.h +++ b/include/idzebra/recctrl.h @@ -1,4 +1,4 @@ -/* $Id: recctrl.h,v 1.28 2006-08-16 13:16:35 adam Exp $ +/* $Id: recctrl.h,v 1.29 2006-08-22 13:39:25 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -48,27 +48,46 @@ YAZ_BEGIN_CDECL /* 1015 */ #define ZEBRA_XPATH_ATTR_CDATA "_XPATH_ATTR_CDATA" -/* single word entity */ +/** Indexing token */ typedef struct { + /** index type ('w', 'p', .. */ unsigned index_type; + /** index name, e.g. "title" */ const char *index_name; + /** token char data */ const char *term_buf; + /** length of term_buf */ int term_len; + /** sequence number */ zint seqno; + /** segment number */ zint segment; + /** record ID */ zint record_id; + /** section ID */ zint section_id; struct recExtractCtrl *extractCtrl; } RecWord; -/* Extract record control */ +/** \brief record reader stream */ +struct ZebraRecStream { + /** client data */ + void *fh; + /** \brief read function */ + int (*readf)(struct ZebraRecStream *s, char *buf, size_t count); + /** \brief seek function */ + off_t (*seekf)(struct ZebraRecStream *s, off_t offset); + /** \brief tell function */ + off_t (*tellf)(struct ZebraRecStream *s); + /** \brief set and get of record position */ + off_t (*endf)(struct ZebraRecStream *s, off_t *offset); + /** \brief close and destroy stream */ + void (*destroy)(struct ZebraRecStream *s); +}; + +/** \brief record extract for indexing */ struct recExtractCtrl { - void *fh; /* File handle and read function */ - int (*readf)(void *fh, char *buf, size_t count); - off_t (*seekf)(void *fh, off_t offset); /* seek function */ - off_t (*tellf)(void *fh); /* tell function */ - void (*endf)(void *fh, off_t offset); /* end of record position */ - off_t offset; /* start offset */ + struct ZebraRecStream *stream; void (*init)(struct recExtractCtrl *p, RecWord *w); void *clientData; void (*tokenAdd)(RecWord *w); @@ -85,13 +104,10 @@ struct recExtractCtrl { /* Retrieve record control */ struct recRetrieveCtrl { + struct ZebraRecStream *stream; /* Input parameters ... */ Res res; /* Resource pool */ ODR odr; /* ODR used to create response */ - void *fh; /* File descriptor and read function */ - int (*readf)(void *fh, char *buf, size_t count); - off_t (*seekf)(void *fh, off_t offset); - off_t (*tellf)(void *fh); oid_value input_format; /* Preferred record syntax */ Z_RecordComposition *comp; /* formatting instructions */ char *encoding; /* preferred character encoding */ @@ -166,9 +182,6 @@ YAZ_EXPORT RecType recType_byName(RecTypes rts, Res res, const char *name, void **clientDataP); - -#define KEY_SEGMENT_SIZE 1024 - YAZ_END_CDECL #endif diff --git a/include/idzebra/recgrs.h b/include/idzebra/recgrs.h index 79717a5..521c1ea 100644 --- a/include/idzebra/recgrs.h +++ b/include/idzebra/recgrs.h @@ -1,4 +1,4 @@ -/* $Id: recgrs.h,v 1.5 2006-08-14 10:40:14 adam Exp $ +/* $Id: recgrs.h,v 1.6 2006-08-22 13:39:25 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -28,13 +28,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA YAZ_BEGIN_CDECL struct grs_read_info { + struct ZebraRecStream *stream; void *clientData; - int (*readf)(void *, char *, size_t); - off_t (*seekf)(void *, off_t); - off_t (*tellf)(void *); - void (*endf)(void *, off_t); - void *fh; - off_t offset; NMEM mem; data1_handle dh; }; diff --git a/index/Makefile.am b/index/Makefile.am index 5b3ab30..83367a7 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.50 2006-07-06 07:56:00 adam Exp $ +## $Id: Makefile.am,v 1.51 2006-08-22 13:39:26 adam Exp $ aux_libs = \ ../rset/libidzebra-rset.la \ @@ -80,7 +80,7 @@ libidzebra_2_0_la_SOURCES = \ orddict.c orddict.h \ rank.h rank1.c ranksimilarity.c rankstatic.c \ recindex.c recindex.h recindxp.h reckeys.c reckeys.h recstat.c retrieve.c \ - sortidx.c symtab.c \ + sortidx.c symtab.c stream.c \ update_path.c update_file.c trunc.c \ zebraapi.c zinfo.c zinfo.h zserver.h zsets.c zrpn.c diff --git a/index/alvis.c b/index/alvis.c index 93f4241..40405e1 100644 --- a/index/alvis.c +++ b/index/alvis.c @@ -1,4 +1,4 @@ -/* $Id: alvis.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: alvis.c,v 1.3 2006-08-22 13:39:26 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -336,7 +336,7 @@ static void filter_destroy(void *clientData) static int ioread_ex(void *context, char *buffer, int len) { struct recExtractCtrl *p = context; - return (*p->readf)(p->fh, buffer, len); + return p->stream->readf(p->stream, buffer, len); } static int ioclose_ex(void *context) @@ -555,7 +555,7 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) static int ioread_ret(void *context, char *buffer, int len) { struct recRetrieveCtrl *p = context; - return (*p->readf)(p->fh, buffer, len); + return p->stream->readf(p->stream, buffer, len); } static int ioclose_ret(void *context) diff --git a/index/extract.c b/index/extract.c index e4973ab..3874dfe 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.227 2006-08-16 13:16:36 adam Exp $ +/* $Id: extract.c,v 1.228 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -172,71 +172,6 @@ static void searchRecordKey(ZebraHandle zh, } } -struct file_read_info { - off_t file_max; /* maximum offset so far */ - off_t file_offset; /* current offset */ - off_t file_moffset; /* offset of rec/rec boundary */ - int file_more; - int fd; -}; - -static struct file_read_info *file_read_start (int fd) -{ - struct file_read_info *fi = (struct file_read_info *) - xmalloc (sizeof(*fi)); - - fi->fd = fd; - fi->file_max = 0; - fi->file_moffset = 0; - fi->file_offset = 0; - fi->file_more = 0; - return fi; -} - -static void file_read_stop (struct file_read_info *fi) -{ - xfree (fi); -} - -static off_t file_seek (void *handle, off_t offset) -{ - struct file_read_info *p = (struct file_read_info *) handle; - p->file_offset = offset; - return lseek (p->fd, offset, SEEK_SET); -} - -static off_t file_tell (void *handle) -{ - struct file_read_info *p = (struct file_read_info *) handle; - return p->file_offset; -} - -static int file_read (void *handle, char *buf, size_t count) -{ - struct file_read_info *p = (struct file_read_info *) handle; - int fd = p->fd; - int r; - r = read (fd, buf, count); - if (r > 0) - { - p->file_offset += r; - if (p->file_offset > p->file_max) - p->file_max = p->file_offset; - } - return r; -} - -static void file_end (void *handle, off_t offset) -{ - struct file_read_info *p = (struct file_read_info *) handle; - - if (offset != p->file_moffset) - { - p->file_moffset = offset; - p->file_more = 1; - } -} - #define FILE_MATCH_BLANK "\t " static char *fileMatchStr (ZebraHandle zh, @@ -409,355 +344,6 @@ static void all_matches_add(struct recExtractCtrl *ctrl) "", 0); } -static ZEBRA_RES file_extract_record(ZebraHandle zh, - SYSNO *sysno, const char *fname, - int deleteFlag, - struct file_read_info *fi, - int force_update, - RecType recType, - void *recTypeClientData) -{ - const char *match_str_to_print = ""; - RecordAttr *recordAttr; - int r; - const char *matchStr = 0; - SYSNO sysnotmp; - Record rec; - off_t recordOffset = 0; - struct recExtractCtrl extractCtrl; - - /* announce database */ - if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) - { - if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], - zh->m_explain_database)) - return ZEBRA_FAIL; - } - - if (fi->fd != -1) - { - /* we are going to read from a file, so prepare the extraction */ - zebra_rec_keys_reset(zh->reg->keys); - - zebra_rec_keys_reset(zh->reg->sortKeys); - recordOffset = fi->file_moffset; - extractCtrl.handle = zh; - extractCtrl.offset = fi->file_moffset; - extractCtrl.readf = file_read; - extractCtrl.seekf = file_seek; - extractCtrl.tellf = file_tell; - extractCtrl.endf = file_end; - extractCtrl.fh = fi; - extractCtrl.init = extract_init; - extractCtrl.tokenAdd = extract_token_add; - extractCtrl.schemaAdd = extract_schema_add; - extractCtrl.dh = zh->reg->dh; - extractCtrl.match_criteria[0] = '\0'; - extractCtrl.staticrank = 0; - - extractCtrl.first_record = fi->file_offset ? 0 : 1; - - extract_set_store_data_prepare(&extractCtrl); - - init_extractCtrl(zh, &extractCtrl); - - if (!zh->m_flag_rw) - printf ("File: %s " ZINT_FORMAT "\n", fname, (zint)recordOffset); - if (zh->m_flag_rw) - { - char msg[512]; - sprintf (msg, "%s:" ZINT_FORMAT , fname, (zint)recordOffset); - yaz_log_init_prefix2 (msg); - } - - r = (*recType->extract)(recTypeClientData, &extractCtrl); - - yaz_log_init_prefix2 (0); - if (r == RECCTRL_EXTRACT_EOF) - return ZEBRA_FAIL; - else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) - { - /* error occured during extraction ... */ - if (zh->m_flag_rw && - zh->records_processed < zh->m_file_verbose_limit) - { - yaz_log (YLOG_WARN, "fail %s %s " ZINT_FORMAT, - zh->m_record_type, - fname, (zint) recordOffset); - } - return ZEBRA_FAIL; - } - else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) - { - /* error occured during extraction ... */ - if (zh->m_flag_rw && - zh->records_processed < zh->m_file_verbose_limit) - { - yaz_log (YLOG_WARN, "no filter for %s %s " - ZINT_FORMAT, zh->m_record_type, - fname, (zint) recordOffset); - } - return ZEBRA_FAIL; - } - all_matches_add(&extractCtrl); - if (extractCtrl.match_criteria[0]) - matchStr = extractCtrl.match_criteria; - } - - /* if matchStr is set now - we assume it's printable . - For internal matchStr (see below) we don't print */ - if (matchStr) - match_str_to_print = matchStr; - - /* perform internal match if sysno not known and if match criteria is - specified already */ - if (!sysno) - { - sysnotmp = 0; - sysno = &sysnotmp; - - if (matchStr == 0 && zh->m_record_id && *zh->m_record_id) - { - matchStr = fileMatchStr (zh, zh->reg->keys, fname, - zh->m_record_id); - if (!matchStr) - { - yaz_log(YLOG_WARN, "Bad match criteria"); - - if (zebra_rec_keys_empty(zh->reg->keys)) - { - yaz_log(YLOG_WARN, "And no index keys"); - } - return ZEBRA_FAIL; - } - } - if (matchStr) - { - int db_ord = zebraExplain_get_database_ord(zh->reg->zei); - char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord, - matchStr); - if (rinfo) - { - assert(*rinfo == sizeof(*sysno)); - memcpy (sysno, rinfo+1, sizeof(*sysno)); - } - } - } - if (! *sysno && zebra_rec_keys_empty(zh->reg->keys) ) - { - /* the extraction process returned no information - the record - is probably empty - unless flagShowRecords is in use */ - if (!zh->m_flag_rw) - return ZEBRA_OK; - - if (zh->records_processed < zh->m_file_verbose_limit) - yaz_log(YLOG_WARN, "empty %s %s " ZINT_FORMAT, zh->m_record_type, - fname, (zint)recordOffset); - return ZEBRA_OK; - } - - if (! *sysno) - { - /* new record */ - if (deleteFlag) - { - yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, zh->m_record_type, - fname, (zint)recordOffset); - yaz_log(YLOG_WARN, "cannot delete record above (seems new)"); - return ZEBRA_OK; - } - - rec = rec_new (zh->reg->records); - - *sysno = rec->sysno; - - if (zh->records_processed < zh->m_file_verbose_limit) - { - yaz_log(YLOG_LOG, "add %s %s " ZINT_FORMAT - " " ZINT_FORMAT " %s" , - zh->m_record_type, - fname, (zint) recordOffset, *sysno, match_str_to_print); - } - recordAttr = rec_init_attr (zh->reg->zei, rec); - recordAttr->staticrank = extractCtrl.staticrank; - - if (matchStr) - { - int db_ord = zebraExplain_get_database_ord(zh->reg->zei); - dict_insert_ord(zh->reg->matchDict, db_ord, matchStr, - sizeof(*sysno), sysno); - } - - - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); - zh->records_inserted++; - } - else - { - /* record already exists */ - zebra_rec_keys_t delkeys = zebra_rec_keys_open(); - - zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); - - rec = rec_get (zh->reg->records, *sysno); - assert (rec); - - recordAttr = rec_init_attr (zh->reg->zei, rec); - - zebra_rec_keys_set_buf(delkeys, - rec->info[recInfo_delKeys], - rec->size[recInfo_delKeys], - 0); - - zebra_rec_keys_set_buf(sortKeys, - rec->info[recInfo_sortKeys], - rec->size[recInfo_sortKeys], - 0); - extract_flushSortKeys (zh, *sysno, 0, sortKeys); - extract_flushRecordKeys (zh, *sysno, 0, delkeys, - recordAttr->staticrank); /* old values */ - if (deleteFlag) - { - /* record going to be deleted */ - if (zebra_rec_keys_empty(delkeys)) - { - yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT - " " ZINT_FORMAT, - zh->m_record_type, fname, (zint)recordOffset, *sysno); - yaz_log(YLOG_WARN, "cannot delete file above, storeKeys false (1)"); - } - else - { - if (zh->records_processed < zh->m_file_verbose_limit) - { - yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT - " " ZINT_FORMAT " %s" , - zh->m_record_type, fname, (zint) recordOffset, - *sysno, match_str_to_print); - } - zh->records_deleted++; - if (matchStr) - { - int db_ord = zebraExplain_get_database_ord(zh->reg->zei); - dict_delete_ord(zh->reg->matchDict, db_ord, matchStr); - } - rec_del (zh->reg->records, &rec); - } - rec_rm (&rec); - logRecord (zh); - return ZEBRA_OK; - } - else - { - /* flush new keys for sort&search etc */ - if (zh->records_processed < zh->m_file_verbose_limit) - { - yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT - " " ZINT_FORMAT " %s" , - zh->m_record_type, fname, (zint) recordOffset, - *sysno, match_str_to_print); - } - recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); - zh->records_updated++; - } - zebra_rec_keys_close(delkeys); - zebra_rec_keys_close(sortKeys); - } - /* update file type */ - xfree (rec->info[recInfo_fileType]); - rec->info[recInfo_fileType] = - rec_strdup (zh->m_record_type, &rec->size[recInfo_fileType]); - - /* update filename */ - xfree (rec->info[recInfo_filename]); - rec->info[recInfo_filename] = - rec_strdup (fname, &rec->size[recInfo_filename]); - - /* update delete keys */ - xfree (rec->info[recInfo_delKeys]); - if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1) - { - zebra_rec_keys_get_buf(zh->reg->keys, - &rec->info[recInfo_delKeys], - &rec->size[recInfo_delKeys]); - } - else - { - rec->info[recInfo_delKeys] = NULL; - rec->size[recInfo_delKeys] = 0; - } - - /* update sort keys */ - xfree (rec->info[recInfo_sortKeys]); - - zebra_rec_keys_get_buf(zh->reg->sortKeys, - &rec->info[recInfo_sortKeys], - &rec->size[recInfo_sortKeys]); - - /* save file size of original record */ - zebraExplain_recordBytesIncrement (zh->reg->zei, - - recordAttr->recordSize); - recordAttr->recordSize = fi->file_moffset - recordOffset; - if (!recordAttr->recordSize) - recordAttr->recordSize = fi->file_max - recordOffset; - zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize); - - /* set run-number for this record */ - recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei, - 0); - - /* update store data */ - xfree (rec->info[recInfo_storeData]); - if (zh->store_data_buf) - { - rec->size[recInfo_storeData] = zh->store_data_size; - rec->info[recInfo_storeData] = zh->store_data_buf; - zh->store_data_buf = 0; - } - else if (zh->m_store_data) - { - rec->size[recInfo_storeData] = recordAttr->recordSize; - rec->info[recInfo_storeData] = (char *) - xmalloc (recordAttr->recordSize); - if (lseek (fi->fd, recordOffset, SEEK_SET) < 0) - { - yaz_log(YLOG_ERRNO|YLOG_FATAL, "seek to " ZINT_FORMAT " in %s", - (zint)recordOffset, fname); - exit (1); - } - if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize) - < recordAttr->recordSize) - { - yaz_log (YLOG_ERRNO|YLOG_FATAL, "read %d bytes of %s", - recordAttr->recordSize, fname); - exit (1); - } - } - else - { - rec->info[recInfo_storeData] = NULL; - rec->size[recInfo_storeData] = 0; - } - /* update database name */ - xfree (rec->info[recInfo_databaseName]); - rec->info[recInfo_databaseName] = - rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]); - - /* update offset */ - recordAttr->recordOffset = recordOffset; - - /* commit this record */ - rec_put (zh->reg->records, &rec); - logRecord (zh); - return ZEBRA_OK; -} - ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, int deleteFlag) { @@ -766,10 +352,11 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, char gprefix[128]; char ext[128]; char ext_res[128]; - struct file_read_info *fi; + struct file_read_info *fi = 0; const char *original_record_type = 0; RecType recType; void *recTypeClientData; + struct ZebraRecStream stream, *streamp; zebra_init_log_level(); @@ -826,7 +413,10 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type); } if (sysno && deleteFlag) - fd = -1; + { + streamp = 0; + fi = 0; + } else { char full_rep[1024]; @@ -846,19 +436,21 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, zh->m_record_type = original_record_type; return ZEBRA_FAIL; } + streamp = &stream; + zebra_create_stream_fd(streamp, fd, 0); } - fi = file_read_start (fd); while(1) { - fi->file_moffset = fi->file_offset; - fi->file_more = 0; /* file_end not called (yet) */ - r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1, - recType, recTypeClientData); - if (fi->file_more) - { /* file_end has been called so reset offset .. */ - fi->file_offset = fi->file_moffset; - lseek(fi->fd, fi->file_moffset, SEEK_SET); - } + r = zebra_extract_record_stream(zh, streamp, + deleteFlag, + 0, /* tst_mode */ + zh->m_record_type, + sysno, + 0, /*match_criteria */ + fname, + 1, /* force_update */ + 1, /* allow_update */ + recType, recTypeClientData); if (r != ZEBRA_OK) { break; @@ -868,9 +460,8 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, break; } } - file_read_stop (fi); - if (fd != -1) - close (fd); + if (streamp) + stream.destroy(streamp); zh->m_record_type = original_record_type; return r; } @@ -881,6 +472,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, If not, and a record is provided, then sysno is got from there */ + ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, int delete_flag, @@ -892,49 +484,11 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, int force_update, int allow_update) { - SYSNO sysno0 = 0; - RecordAttr *recordAttr; - struct recExtractCtrl extractCtrl; - int r; - const char *matchStr = 0; - RecType recType = NULL; + struct ZebraRecStream stream; + ZEBRA_RES res; void *clientData; - Record rec; - long recordOffset = 0; - struct zebra_fetch_control fc; - const char *pr_fname = fname; /* filename to print .. */ - int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; - - zebra_init_log_level(); - - if (!pr_fname) - pr_fname = ""; /* make it printable if file is omitted */ - - fc.fd = -1; - fc.record_int_buf = buf; - fc.record_int_len = buf_size; - fc.record_int_pos = 0; - fc.offset_end = 0; - fc.record_offset = 0; - - extractCtrl.offset = 0; - extractCtrl.readf = zebra_record_int_read; - extractCtrl.seekf = zebra_record_int_seek; - extractCtrl.tellf = zebra_record_int_tell; - extractCtrl.endf = zebra_record_int_end; - extractCtrl.first_record = 1; - extractCtrl.fh = &fc; + RecType recType = 0; - zebra_rec_keys_reset(zh->reg->keys); - zebra_rec_keys_reset(zh->reg->sortKeys); - - if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) - { - if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], - zh->m_explain_database)) - return ZEBRA_FAIL; - } - if (recordType && *recordType) { yaz_log(log_level, "Record type explicitly specified: %s", recordType); @@ -960,41 +514,111 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "No such record type: %s", recordType); return ZEBRA_FAIL; } - - extractCtrl.init = extract_init; - extractCtrl.tokenAdd = extract_token_add; - extractCtrl.schemaAdd = extract_schema_add; - extractCtrl.dh = zh->reg->dh; - extractCtrl.handle = zh; - extractCtrl.match_criteria[0] = '\0'; - extractCtrl.staticrank = 0; - - init_extractCtrl(zh, &extractCtrl); - extract_set_store_data_prepare(&extractCtrl); - r = (*recType->extract)(clientData, &extractCtrl); - if (r == RECCTRL_EXTRACT_EOF) - return ZEBRA_FAIL; - else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) + zebra_create_stream_mem(&stream, buf, buf_size); + + res = zebra_extract_record_stream(zh, &stream, + delete_flag, + test_mode, + recordType, + sysno, + match_criteria, + fname, + force_update, + allow_update, + recType, clientData); + stream.destroy(&stream); + return res; +} + + +ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + int delete_flag, + int test_mode, + const char *recordType, + SYSNO *sysno, + const char *match_criteria, + const char *fname, + int force_update, + int allow_update, + RecType recType, + void *recTypeClientData) + +{ + SYSNO sysno0 = 0; + RecordAttr *recordAttr; + struct recExtractCtrl extractCtrl; + int r; + const char *matchStr = 0; + Record rec; + off_t start_offset = 0; + const char *pr_fname = fname; /* filename to print .. */ + int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; + + zebra_init_log_level(); + + if (!pr_fname) + pr_fname = ""; /* make it printable if file is omitted */ + + zebra_rec_keys_reset(zh->reg->keys); + zebra_rec_keys_reset(zh->reg->sortKeys); + + if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) { - /* error occured during extraction ... */ - yaz_log (YLOG_WARN, "extract error: generic"); - return ZEBRA_FAIL; + if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], + zh->m_explain_database)) + return ZEBRA_FAIL; } - else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) + + if (stream) { - /* error occured during extraction ... */ - yaz_log (YLOG_WARN, "extract error: no such filter"); - return ZEBRA_FAIL; - } + off_t null_offset = 0; + extractCtrl.stream = stream; + + start_offset = stream->tellf(stream); - all_matches_add(&extractCtrl); + extractCtrl.first_record = start_offset ? 0 : 1; - if (extractCtrl.match_criteria[0]) - match_criteria = extractCtrl.match_criteria; + stream->endf(stream, &null_offset);; + + extractCtrl.init = extract_init; + extractCtrl.tokenAdd = extract_token_add; + extractCtrl.schemaAdd = extract_schema_add; + extractCtrl.dh = zh->reg->dh; + extractCtrl.handle = zh; + extractCtrl.match_criteria[0] = '\0'; + extractCtrl.staticrank = 0; + + init_extractCtrl(zh, &extractCtrl); + + extract_set_store_data_prepare(&extractCtrl); + + r = (*recType->extract)(recTypeClientData, &extractCtrl); + + if (r == RECCTRL_EXTRACT_EOF) + return ZEBRA_FAIL; + else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) + { + /* error occured during extraction ... */ + yaz_log (YLOG_WARN, "extract error: generic"); + return ZEBRA_FAIL; + } + else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) + { + /* error occured during extraction ... */ + yaz_log (YLOG_WARN, "extract error: no such filter"); + return ZEBRA_FAIL; + } + + all_matches_add(&extractCtrl); + + if (extractCtrl.match_criteria[0]) + match_criteria = extractCtrl.match_criteria; + } if (!sysno) { sysno = &sysno0; @@ -1037,14 +661,14 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, /* new record */ if (delete_flag) { - yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, - pr_fname, (long) recordOffset); + yaz_log (YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); yaz_log (YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; } if (show_progress) - yaz_log (YLOG_LOG, "add %s %s %ld", recordType, pr_fname, - (long) recordOffset); + yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname, + (zint) start_offset); rec = rec_new (zh->reg->records); *sysno = rec->sysno; @@ -1072,8 +696,8 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); if (!allow_update) { - yaz_log (YLOG_LOG, "skipped %s %s %ld", - recordType, pr_fname, (long) recordOffset); + yaz_log (YLOG_LOG, "skipped %s %s " ZINT_FORMAT, + recordType, pr_fname, (zint) start_offset); logRecord(zh); return ZEBRA_FAIL; } @@ -1100,16 +724,16 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, /* record going to be deleted */ if (zebra_rec_keys_empty(delkeys)) { - yaz_log(YLOG_LOG, "delete %s %s %ld", recordType, - pr_fname, (long) recordOffset); + yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); yaz_log(YLOG_WARN, "cannot delete file above, " "storeKeys false (3)"); } else { if (show_progress) - yaz_log(YLOG_LOG, "delete %s %s %ld", recordType, - pr_fname, (long) recordOffset); + yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); zh->records_deleted++; if (matchStr) { @@ -1125,8 +749,8 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, else { if (show_progress) - yaz_log(YLOG_LOG, "update %s %s %ld", recordType, - pr_fname, (long) recordOffset); + yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) ZINT_FORMAT); recordAttr->staticrank = extractCtrl.staticrank; extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, @@ -1169,15 +793,19 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize); -#if 0 - recordAttr->recordSize = fi->file_moffset - recordOffset; - if (!recordAttr->recordSize) - recordAttr->recordSize = fi->file_max - recordOffset; -#else - recordAttr->recordSize = buf_size; -#endif - zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize); + if (stream) + { + off_t end_offset = stream->endf(stream, 0); + + if (!end_offset) + end_offset = stream->tellf(stream); + else + stream->seekf(stream, end_offset); + + recordAttr->recordSize = end_offset - start_offset; + zebraExplain_recordBytesIncrement(zh->reg->zei, + recordAttr->recordSize); + } /* set run-number for this record */ recordAttr->runNumber = @@ -1195,10 +823,15 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, } else if (zh->m_store_data) { + off_t cur_offset = stream->tellf(stream); + rec->size[recInfo_storeData] = recordAttr->recordSize; rec->info[recInfo_storeData] = (char *) xmalloc (recordAttr->recordSize); - memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize); + stream->seekf(stream, start_offset); + stream->readf(stream, rec->info[recInfo_storeData], + recordAttr->recordSize); + stream->seekf(stream, cur_offset); } else { @@ -1211,7 +844,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]); /* update offset */ - recordAttr->recordOffset = recordOffset; + recordAttr->recordOffset = start_offset; /* commit this record */ rec_put (zh->reg->records, &rec); diff --git a/index/index.h b/index/index.h index be2e858..5eb29c0 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.173 2006-08-16 13:16:36 adam Exp $ +/* $Id: index.h,v 1.174 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -363,6 +363,19 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, int force_update, int allow_update); +ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + int delete_flag, + int test_mode, + const char *recordType, + SYSNO *sysno, + const char *match_criteria, + const char *fname, + int force_update, + int allow_update, + RecType recType, + void *recTypeClientData); + #if 0 int extract_rec_in_mem (ZebraHandle zh, const char *recordType, const char *buf, size_t buf_size, @@ -373,23 +386,10 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, #endif void extract_flushWriteKeys (ZebraHandle zh, int final); -struct zebra_fetch_control { - off_t offset_end; - off_t record_offset; - off_t record_int_pos; - const char *record_int_buf; - int record_int_len; - int fd; -}; - -int zebra_record_ext_read (void *fh, char *buf, size_t count); -off_t zebra_record_ext_seek (void *fh, off_t offset); -off_t zebra_record_ext_tell (void *fh); -off_t zebra_record_int_seek (void *fh, off_t offset); -off_t zebra_record_int_tell (void *fh); -int zebra_record_int_read (void *fh, char *buf, size_t count); -void zebra_record_int_end (void *fh, off_t offset); - +YAZ_EXPORT void zebra_create_stream_mem(struct ZebraRecStream *stream, + const char *buf, size_t sz); +YAZ_EXPORT void zebra_create_stream_fd(struct ZebraRecStream *stream, + int fd, off_t start_offset); void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys); ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys, diff --git a/index/kcontrol.c b/index/kcontrol.c index e5122a7..71a4a69 100644 --- a/index/kcontrol.c +++ b/index/kcontrol.c @@ -1,4 +1,4 @@ -/* $Id: kcontrol.c,v 1.5 2006-08-16 13:16:36 adam Exp $ +/* $Id: kcontrol.c,v 1.6 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -56,7 +56,6 @@ static void my_dec(struct rset_key_control *kc) struct rset_key_control *zebra_key_control_create(ZebraHandle zh) { - const char *res_val; struct rset_key_control *kc = xmalloc(sizeof(*kc)); struct context_control *cp = xmalloc(sizeof(*cp)); diff --git a/index/marcread.c b/index/marcread.c index c8a12e2..93abbc5 100644 --- a/index/marcread.c +++ b/index/marcread.c @@ -1,4 +1,4 @@ -/* $Id: marcread.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: marcread.c,v 1.3 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -58,7 +58,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) char *absynName; data1_marctab *marctab; - if ((*p->readf)(p->fh, buf, 5) != 5) + if (p->stream->readf(p->stream, buf, 5) != 5) return NULL; while (*buf < '0' || *buf > '9') { @@ -69,7 +69,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) for (i = 0; i<4; i++) buf[i] = buf[i+1]; - if ((*p->readf)(p->fh, buf+4, 1) != 1) + if (p->stream->readf(p->stream, buf+4, 1) != 1) return NULL; } record_length = atoi_n (buf, 5); @@ -79,7 +79,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) return NULL; } /* read remaining part - attempt to read one byte furhter... */ - read_bytes = (*p->readf)(p->fh, buf+5, record_length-4); + read_bytes = p->stream->readf(p->stream, buf+5, record_length-4); if (read_bytes < record_length-5) { yaz_log (YLOG_WARN, "Couldn't read whole MARC record"); @@ -87,11 +87,14 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) } if (read_bytes == record_length - 4) { - off_t cur_offset = (*p->tellf)(p->fh); + off_t cur_offset = p->stream->tellf(p->stream); if (cur_offset <= 27) return NULL; - if (p->endf) - (*p->endf)(p->fh, cur_offset - 1); + if (p->stream->endf) + { + off_t end_offset = cur_offset - 1; + p->stream->endf(p->stream, &end_offset); + } } absynName = mi->type; res_root = data1_mk_root (p->dh, p->mem, absynName); diff --git a/index/recgrs.c b/index/recgrs.c index e3ce3f7..c09f731 100644 --- a/index/recgrs.c +++ b/index/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.4 2006-08-14 10:40:15 adam Exp $ +/* $Id: recgrs.c,v 1.5 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -952,12 +952,7 @@ static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, int oidtmp[OID_SIZE]; RecWord wrd; - gri.readf = p->readf; - gri.seekf = p->seekf; - gri.tellf = p->tellf; - gri.endf = p->endf; - gri.fh = p->fh; - gri.offset = p->offset; + gri.stream = p->stream; gri.mem = mem; gri.dh = p->dh; gri.clientData = clientData; @@ -1149,12 +1144,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, int dummy; mem = nmem_create(); - gri.readf = p->readf; - gri.seekf = p->seekf; - gri.tellf = p->tellf; - gri.endf = NULL; - gri.fh = p->fh; - gri.offset = 0; + gri.stream = p->stream; gri.mem = mem; gri.dh = p->dh; gri.clientData = clientData; diff --git a/index/rectext.c b/index/rectext.c index 64cefe5..3a06fc3 100644 --- a/index/rectext.c +++ b/index/rectext.c @@ -1,4 +1,4 @@ -/* $Id: rectext.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: rectext.c,v 1.3 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -80,7 +80,7 @@ static int buf_getchar (struct filter_info *tinfo, struct buf_info *fi, char *ds { if (fi->max <= 0) return 0; - fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096); fi->offset = 0; if (fi->max <= 0) return 0; @@ -88,8 +88,9 @@ static int buf_getchar (struct filter_info *tinfo, struct buf_info *fi, char *ds *dst = fi->buf[(fi->offset)++]; if (tinfo->sep && *dst == *tinfo->sep) { - off_t off = (*fi->p->tellf)(fi->p->fh); - (*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset)); + off_t off = fi->p->stream->tellf(fi->p->stream); + off_t end_offset = off - (fi->max - fi->offset); + fi->p->stream->endf(fi->p->stream, &end_offset); return 0; } return 1; @@ -202,7 +203,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) } if (!make_body) break; - r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096); + r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096); if (r <= 0) break; filter_ptr += r; diff --git a/index/regxread.c b/index/regxread.c index 7f88cb9..993fb53 100644 --- a/index/regxread.c +++ b/index/regxread.c @@ -1,4 +1,4 @@ -/* $Id: regxread.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: regxread.c,v 1.3 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -115,15 +115,15 @@ struct lexSpec { #if HAVE_TCL_H Tcl_Interp *tcl_interp; #endif - void *f_win_fh; - void (*f_win_ef)(void *, off_t); + struct ZebraRecStream *stream; + off_t (*f_win_ef)(struct ZebraRecStream *s, off_t *); int f_win_start; /* first byte of buffer is this file offset */ int f_win_end; /* last byte of buffer is this offset - 1 */ int f_win_size; /* size of buffer */ char *f_win_buf; /* buffer itself */ - int (*f_win_rf)(void *, char *, size_t); - off_t (*f_win_sf)(void *, off_t); + int (*f_win_rf)(struct ZebraRecStream *, char *, size_t); + off_t (*f_win_sf)(struct ZebraRecStream *, off_t); struct lexConcatBuf *concatBuf; int maxLevel; @@ -154,12 +154,12 @@ static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, } if (off < 0 || start_pos >= spec->f_win_end) { - (*spec->f_win_sf)(spec->f_win_fh, start_pos); + (*spec->f_win_sf)(spec->stream, start_pos); spec->f_win_start = start_pos; if (!spec->f_win_buf) spec->f_win_buf = (char *) xmalloc (spec->f_win_size); - *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf, + *size = (*spec->f_win_rf)(spec->stream, spec->f_win_buf, spec->f_win_size); spec->f_win_end = spec->f_win_start + *size; @@ -169,7 +169,7 @@ static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, } for (i = 0; if_win_end - start_pos; i++) spec->f_win_buf[i] = spec->f_win_buf[i + off]; - r = (*spec->f_win_rf)(spec->f_win_fh, + r = (*spec->f_win_rf)(spec->stream, spec->f_win_buf + i, spec->f_win_size - i); spec->f_win_start = start_pos; @@ -1763,10 +1763,11 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) { if (spec->f_win_ef && *ptr != F_WIN_EOF) { + off_t end_offset = *ptr; #if REGX_DEBUG yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr); #endif - (*spec->f_win_ef)(spec->f_win_fh, *ptr); + (*spec->f_win_ef)(spec->stream, &end_offset); } return NULL; } @@ -1884,6 +1885,7 @@ data1_node *grs_read_regx (struct grs_read_info *p) int res; struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec; + off_t start_offset; #if REGX_DEBUG yaz_log (YLOG_LOG, "grs_read_regx"); @@ -1901,18 +1903,19 @@ data1_node *grs_read_regx (struct grs_read_info *p) } } (*curLexSpec)->dh = p->dh; - if (!p->offset) + start_offset = p->stream->tellf(p->stream); + if (start_offset == 0) { (*curLexSpec)->f_win_start = 0; (*curLexSpec)->f_win_end = 0; - (*curLexSpec)->f_win_rf = p->readf; - (*curLexSpec)->f_win_sf = p->seekf; - (*curLexSpec)->f_win_fh = p->fh; - (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_rf = p->stream->readf; + (*curLexSpec)->f_win_sf = p->stream->seekf; + (*curLexSpec)->stream = p->stream; + (*curLexSpec)->f_win_ef = p->stream->endf; (*curLexSpec)->f_win_size = 500000; } (*curLexSpec)->m = p->mem; - return lexRoot (*curLexSpec, p->offset, "main"); + return lexRoot (*curLexSpec, start_offset, "main"); } static int extract_regx(void *clientData, struct recExtractCtrl *ctrl) @@ -1942,6 +1945,7 @@ data1_node *grs_read_tcl (struct grs_read_info *p) int res; struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec; + off_t start_offset; #if REGX_DEBUG yaz_log (YLOG_LOG, "grs_read_tcl"); @@ -1968,18 +1972,19 @@ data1_node *grs_read_tcl (struct grs_read_info *p) } } (*curLexSpec)->dh = p->dh; - if (!p->offset) + start_offset = p->stream->tellf(p->stream); + if (start_offset == 0) { (*curLexSpec)->f_win_start = 0; (*curLexSpec)->f_win_end = 0; - (*curLexSpec)->f_win_rf = p->readf; - (*curLexSpec)->f_win_sf = p->seekf; - (*curLexSpec)->f_win_fh = p->fh; - (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_rf = p->stream->readf; + (*curLexSpec)->f_win_sf = p->stream->seekf; + (*curLexSpec)->stream = p->stream; + (*curLexSpec)->f_win_ef = p->stream->endf; (*curLexSpec)->f_win_size = 500000; } (*curLexSpec)->m = p->mem; - return lexRoot (*curLexSpec, p->offset, "main"); + return lexRoot (*curLexSpec, start_offset, "main"); } static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl) diff --git a/index/retrieve.c b/index/retrieve.c index 3de8ff6..5c28b2a 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.43 2006-08-14 10:40:15 adam Exp $ +/* $Id: retrieve.c,v 1.44 2006-08-22 13:39:27 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -35,56 +35,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "index.h" #include -int zebra_record_ext_read (void *fh, char *buf, size_t count) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - return read (fc->fd, buf, count); -} - -off_t zebra_record_ext_seek (void *fh, off_t offset) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - return lseek (fc->fd, offset + fc->record_offset, SEEK_SET); -} - -off_t zebra_record_ext_tell (void *fh) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset; -} - -off_t zebra_record_int_seek (void *fh, off_t offset) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - return (off_t) (fc->record_int_pos = offset); -} - -off_t zebra_record_int_tell (void *fh) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - return (off_t) fc->record_int_pos; -} - -int zebra_record_int_read (void *fh, char *buf, size_t count) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - int l = fc->record_int_len - fc->record_int_pos; - if (l <= 0) - return 0; - l = (l < (int) count) ? l : (int) count; - memcpy (buf, fc->record_int_buf + fc->record_int_pos, l); - fc->record_int_pos += l; - return l; -} - -void zebra_record_int_end (void *fh, off_t off) -{ - struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh; - fc->offset_end = off; -} - int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, - zebra_snippets *hit_snippet, ODR stream, + zebra_snippets *hit_snippet, ODR odr, oid_value input_format, Z_RecordComposition *comp, oid_value *output_format, char **rec_bufp, int *rec_lenp, char **basenamep, @@ -94,7 +46,7 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, char *fname, *file_type, *basename; RecType rt; struct recRetrieveCtrl retrieveCtrl; - struct zebra_fetch_control fc; + struct ZebraRecStream stream; RecordAttr *recordAttr; void *clientData; int raw_mode = 0; @@ -109,7 +61,7 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, sprintf(rec_str, ZINT_FORMAT, sysno); *output_format = VAL_SUTRS; *rec_lenp = strlen(rec_str); - *rec_bufp = odr_strdup(stream, rec_str); + *rec_bufp = odr_strdup(odr, rec_str); return 0; } rec = rec_get (zh->reg->records, sysno); @@ -124,7 +76,7 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, file_type = rec->info[recInfo_fileType]; fname = rec->info[recInfo_filename]; basename = rec->info[recInfo_databaseName]; - *basenamep = (char *) odr_malloc (stream, strlen(basename)+1); + *basenamep = (char *) odr_malloc (odr, strlen(basename)+1); strcpy (*basenamep, basename); if (comp && comp->which == Z_RecordComp_simple && @@ -172,7 +124,7 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, } *output_format = VAL_SUTRS; *rec_lenp = wrbuf_len(wrbuf); - *rec_bufp = odr_malloc(stream, *rec_lenp); + *rec_bufp = odr_malloc(odr, *rec_lenp); memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp); wrbuf_free(wrbuf, 1); zebra_rec_keys_close(keys); @@ -191,31 +143,27 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, return 14; } yaz_log (YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d", sysno,score); - retrieveCtrl.fh = &fc; - fc.fd = -1; + retrieveCtrl.stream = &stream; retrieveCtrl.fname = fname; if (rec->size[recInfo_storeData] > 0) { - retrieveCtrl.readf = zebra_record_int_read; - retrieveCtrl.seekf = zebra_record_int_seek; - retrieveCtrl.tellf = zebra_record_int_tell; - fc.record_int_len = rec->size[recInfo_storeData]; - fc.record_int_buf = rec->info[recInfo_storeData]; - fc.record_int_pos = 0; - yaz_log (YLOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len); + zebra_create_stream_mem(&stream, rec->info[recInfo_storeData], + rec->size[recInfo_storeData]); if (raw_mode) { *output_format = VAL_SUTRS; *rec_lenp = rec->size[recInfo_storeData]; - *rec_bufp = (char *) odr_malloc(stream, *rec_lenp); + *rec_bufp = (char *) odr_malloc(odr, *rec_lenp); memcpy(*rec_bufp, rec->info[recInfo_storeData], *rec_lenp); rec_rm (&rec); + stream.destroy(&stream); return 0; } } else { char full_rep[1024]; + int fd; if (zh->path_reg && !yaz_is_abspath (fname)) { @@ -226,36 +174,32 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, else strcpy (full_rep, fname); - if ((fc.fd = open (full_rep, O_BINARY|O_RDONLY)) == -1) + if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1) { yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s", full_rep); rec_rm (&rec); + stream.destroy(&stream); return 14; } - fc.record_offset = recordAttr->recordOffset; - - retrieveCtrl.readf = zebra_record_ext_read; - retrieveCtrl.seekf = zebra_record_ext_seek; - retrieveCtrl.tellf = zebra_record_ext_tell; - zebra_record_ext_seek (retrieveCtrl.fh, 0); + zebra_create_stream_fd(&stream, fd, recordAttr->recordOffset); if (raw_mode) { *output_format = VAL_SUTRS; *rec_lenp = recordAttr->recordSize; - *rec_bufp = (char *) odr_malloc(stream, *rec_lenp); - zebra_record_ext_read(&fc, *rec_bufp, *rec_lenp); + *rec_bufp = (char *) odr_malloc(odr, *rec_lenp); + stream.readf(&stream, *rec_bufp, *rec_lenp); rec_rm (&rec); - close (fc.fd); - return 0; + stream.destroy(&stream); + return 0; } } retrieveCtrl.localno = sysno; retrieveCtrl.staticrank = recordAttr->staticrank; retrieveCtrl.score = score; retrieveCtrl.recordSize = recordAttr->recordSize; - retrieveCtrl.odr = stream; + retrieveCtrl.odr = odr; retrieveCtrl.input_format = retrieveCtrl.output_format = input_format; retrieveCtrl.comp = comp; retrieveCtrl.encoding = zh->record_encoding; @@ -312,13 +256,15 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, *output_format = retrieveCtrl.output_format; *rec_bufp = (char *) retrieveCtrl.rec_buf; *rec_lenp = retrieveCtrl.rec_len; - if (fc.fd != -1) - close (fc.fd); + + stream.destroy(&stream); + rec_rm (&rec); *addinfo = retrieveCtrl.addinfo; return retrieveCtrl.diagnostic; } + /* * Local variables: * c-basic-offset: 4 diff --git a/index/safari.c b/index/safari.c index 8327bda..c070cb0 100644 --- a/index/safari.c +++ b/index/safari.c @@ -1,4 +1,4 @@ -/* $Id: safari.c,v 1.3 2006-08-16 13:16:36 adam Exp $ +/* $Id: safari.c,v 1.4 2006-08-22 13:39:28 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -81,7 +81,7 @@ static int fi_getchar(struct fi_info *fi, char *dst) { if (fi->max <= 0) return 0; - fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096); fi->offset = 0; if (fi->max <= 0) return 0; @@ -236,7 +236,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) } if (!make_body) break; - r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096); + r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096); if (r <= 0) break; filter_ptr += r; diff --git a/index/sgmlread.c b/index/sgmlread.c index 83d98d5..d5564ac 100644 --- a/index/sgmlread.c +++ b/index/sgmlread.c @@ -1,4 +1,4 @@ -/* $Id: sgmlread.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: sgmlread.c,v 1.3 2006-08-22 13:39:28 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -31,9 +31,8 @@ struct sgml_getc_info { int buf_size; int size; int off; + struct ZebraRecStream *stream; off_t moffset; - void *fh; - int (*readf)(void *, char *, size_t); WRBUF wrbuf; }; @@ -49,7 +48,7 @@ int sgml_getc (void *clientData) p->moffset += p->off; p->off = 0; p->size = 0; - res = (*p->readf)(p->fh, p->buf, p->buf_size); + res = p->stream->readf(p->stream, p->buf, p->buf_size); if (res > 0) { p->size += res; @@ -64,19 +63,21 @@ static data1_node *grs_read_sgml (struct grs_read_info *p) data1_node *node; int res; - sgi->moffset = p->offset; - sgi->fh = p->fh; - sgi->readf = p->readf; + sgi->moffset = p->stream->tellf(p->stream); + sgi->stream = p->stream; sgi->off = 0; sgi->size = 0; - res = (*sgi->readf)(sgi->fh, sgi->buf, sgi->buf_size); + res = sgi->stream->readf(sgi->stream, sgi->buf, sgi->buf_size); if (res > 0) sgi->size += res; else return 0; - node = data1_read_nodex (p->dh, p->mem, sgml_getc, sgi, sgi->wrbuf); - if (node && p->endf) - (*p->endf)(sgi->fh, sgi->moffset + sgi->off); + node = data1_read_nodex(p->dh, p->mem, sgml_getc, sgi, sgi->wrbuf); + if (node && p->stream->endf) + { + off_t end_offset = sgi->moffset + sgi->off; + p->stream->endf(sgi->stream, &end_offset); + } return node; } diff --git a/index/stream.c b/index/stream.c new file mode 100644 index 0000000..1ce54e4 --- /dev/null +++ b/index/stream.c @@ -0,0 +1,165 @@ +/* $Id: stream.c,v 1.1 2006-08-22 13:39:28 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include +#include + +#include +#ifdef WIN32 +#include +#include +#endif +#if HAVE_UNISTD_H +#include +#endif + +#include "index.h" + +struct zebra_mem_control { + off_t offset_end; + off_t record_int_pos; + const char *record_int_buf; + int record_int_len; +}; + +struct zebra_ext_control { + off_t offset_end; + off_t record_offset; + int fd; +}; + +static off_t zebra_mem_seek(struct ZebraRecStream *s, off_t offset) +{ + struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh; + return (off_t) (fc->record_int_pos = offset); +} + +static off_t zebra_mem_tell(struct ZebraRecStream *s) +{ + struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh; + return (off_t) fc->record_int_pos; +} + +static int zebra_mem_read(struct ZebraRecStream *s, char *buf, size_t count) +{ + struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh; + int l = fc->record_int_len - fc->record_int_pos; + if (l <= 0) + return 0; + l = (l < (int) count) ? l : (int) count; + memcpy (buf, fc->record_int_buf + fc->record_int_pos, l); + fc->record_int_pos += l; + return l; +} + +static off_t zebra_mem_end(struct ZebraRecStream *s, off_t *offset) +{ + struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh; + if (offset) + fc->offset_end = *offset; + return fc->offset_end; +} + +static void zebra_mem_destroy(struct ZebraRecStream *s) +{ + struct zebra_mem_control *fc = s->fh; + xfree(fc); +} + +static int zebra_ext_read(struct ZebraRecStream *s, char *buf, size_t count) +{ + struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh; + return read(fc->fd, buf, count); +} + +static off_t zebra_ext_seek(struct ZebraRecStream *s, off_t offset) +{ + struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh; + return lseek(fc->fd, offset + fc->record_offset, SEEK_SET); +} + +static off_t zebra_ext_tell(struct ZebraRecStream *s) +{ + struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh; + return lseek(fc->fd, 0, SEEK_CUR) - fc->record_offset; +} + +static void zebra_ext_destroy(struct ZebraRecStream *s) +{ + struct zebra_ext_control *fc = s->fh; + if (fc->fd != -1) + close(fc->fd); + xfree(fc); +} + +static off_t zebra_ext_end(struct ZebraRecStream *s, off_t *offset) +{ + struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh; + if (offset) + fc->offset_end = *offset; + return fc->offset_end; +} + + +void zebra_create_stream_mem(struct ZebraRecStream *stream, + const char *buf, size_t sz) +{ + struct zebra_mem_control *fc = xmalloc(sizeof(*fc)); + fc->record_int_buf = buf; + fc->record_int_len = sz; + fc->record_int_pos = 0; + fc->offset_end = 0; + + stream->fh = fc; + stream->readf = zebra_mem_read; + stream->seekf = zebra_mem_seek; + stream->tellf = zebra_mem_tell; + stream->endf = zebra_mem_end; + stream->destroy = zebra_mem_destroy; +} + +void zebra_create_stream_fd(struct ZebraRecStream *stream, + int fd, off_t start_offset) +{ + struct zebra_ext_control *fc = xmalloc(sizeof(*fc)); + + fc->fd = fd; + fc->record_offset = start_offset; + fc->offset_end = 0; + + stream->fh = fc; + stream->readf = zebra_ext_read; + stream->seekf = zebra_ext_seek; + stream->tellf = zebra_ext_tell; + stream->endf = zebra_ext_end; + stream->destroy = zebra_ext_destroy; + zebra_ext_seek(stream, 0); +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/xmlread.c b/index/xmlread.c index 3bb6da3..ec42c2b 100644 --- a/index/xmlread.c +++ b/index/xmlread.c @@ -1,4 +1,4 @@ -/* $Id: xmlread.c,v 1.2 2006-08-14 10:40:15 adam Exp $ +/* $Id: xmlread.c,v 1.3 2006-08-22 13:39:28 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -156,11 +156,11 @@ static void cb_entity_decl (void *userData, const char *entityName, } -static int cb_external_entity (XML_Parser pparser, - const char *context, - const char *base, - const char *systemId, - const char *publicId) +static int cb_external_entity(XML_Parser pparser, + const char *context, + const char *base, + const char *systemId, + const char *publicId) { struct user_info *ui = (struct user_info*) XML_GetUserData(pparser); FILE *inf; @@ -392,9 +392,10 @@ static void cb_ns_end(void *userData, const char *prefix) if (prefix) yaz_log(ui->loglevel, "cb_ns_end %s", prefix); } -data1_node *zebra_read_xml (data1_handle dh, - int (*rf)(void *, char *, size_t), void *fh, - NMEM m) + +data1_node *zebra_read_xml(data1_handle dh, + struct ZebraRecStream *stream, + NMEM m) { XML_Parser parser; struct user_info uinfo; @@ -434,7 +435,7 @@ data1_node *zebra_read_xml (data1_handle dh, yaz_log (YLOG_WARN, "XML_GetBuffer fail"); break; } - r = (*rf)(fh, buf, XML_CHUNK); + r = stream->readf(stream, buf, XML_CHUNK); if (r < 0) { /* error */ @@ -484,9 +485,9 @@ struct xml_info { XML_Expat_Version expat_version; }; -static data1_node *grs_read_xml (struct grs_read_info *p) +static data1_node *grs_read_xml(struct grs_read_info *p) { - return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); + return zebra_read_xml(p->dh, p->stream, p->mem); } static void *filter_init(Res res, RecType recType) diff --git a/win/makefile b/win/makefile index a92d421c6..2d9eb47 100644 --- a/win/makefile +++ b/win/makefile @@ -1,5 +1,5 @@ # Zebra makefile for MS NMAKE -# $Id: makefile,v 1.54 2006-08-14 18:12:35 adam Exp $ +# $Id: makefile,v 1.55 2006-08-22 13:39:28 adam Exp $ ########################################################### ############### Parameters @@ -353,6 +353,7 @@ ZEBRALIB_OBJS= \ $(OBJDIR)\dopen.obj \ $(OBJDIR)\drdwr.obj \ $(OBJDIR)\extract.obj \ + $(OBJDIR)\flock.obj \ $(OBJDIR)\imalloc.obj \ $(OBJDIR)\inline.obj \ $(OBJDIR)\insert.obj \ @@ -364,7 +365,6 @@ ZEBRALIB_OBJS= \ $(OBJDIR)\kcontrol.obj \ $(OBJDIR)\kinput.obj \ $(OBJDIR)\limit.obj \ - $(OBJDIR)\flock.obj \ $(OBJDIR)\lookgrep.obj \ $(OBJDIR)\lookup.obj \ $(OBJDIR)\lookupec.obj \ @@ -404,10 +404,11 @@ ZEBRALIB_OBJS= \ $(OBJDIR)\snippet.obj \ $(OBJDIR)\sortidx.obj \ $(OBJDIR)\states.obj \ + $(OBJDIR)\stream.obj \ $(OBJDIR)\symtab.obj \ + $(OBJDIR)\trunc.obj \ $(OBJDIR)\update_path.obj \ $(OBJDIR)\update_file.obj \ - $(OBJDIR)\trunc.obj \ $(OBJDIR)\xmlread.obj \ $(OBJDIR)\xpath.obj \ $(OBJDIR)\zebra-lock.obj \