1 /* $Id: extract.c,v 1.201 2006-02-08 13:45:44 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
38 #if _FILE_OFFSET_BITS == 64
39 #define PRINTF_OFF_T "%Ld"
41 #define PRINTF_OFF_T "%ld"
44 #define USE_SHELLSORT 0
47 static void shellsort(void *ar, int r, size_t s,
48 int (*cmp)(const void *a, const void *b))
53 static const int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
54 13776, 4592, 1968, 861, 336,
55 112, 48, 21, 7, 3, 1 };
56 for ( k = 0; k < 16; k++)
57 for (h = incs[k], i = h; i < r; i++)
61 while (j > h && (*cmp)(a + s*(j-h), v) > 0)
63 memcpy (a + s*j, a + s*(j-h), s);
71 static void logRecord (ZebraHandle zh)
73 ++zh->records_processed;
74 if (!(zh->records_processed % 1000))
76 yaz_log (YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
77 ZINT_FORMAT"/"ZINT_FORMAT"/"ZINT_FORMAT,
78 zh->records_processed, zh->records_inserted, zh->records_updated,
83 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
85 static void extract_init (struct recExtractCtrl *p, RecWord *w)
87 w->zebra_maps = p->zebra_maps;
91 w->attrSet = VAL_BIB1;
101 static void searchRecordKey(ZebraHandle zh,
102 zebra_rec_keys_t reckeys,
103 int attrSetS, int attrUseS,
104 const char **ws, int ws_length)
109 for (i = 0; i<ws_length; i++)
112 ch = zebraExplain_lookup_attr_su_any_index(zh->reg->zei,
117 if (zebra_rec_keys_rewind(reckeys))
124 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
126 assert(key.len <= 4 && key.len > 2);
128 seqno = key.mem[key.len-1];
130 if (key.mem[0] == ch)
136 woff = seqno - startSeq;
137 if (woff >= 0 && woff < ws_length)
144 struct file_read_info {
145 off_t file_max; /* maximum offset so far */
146 off_t file_offset; /* current offset */
147 off_t file_moffset; /* offset of rec/rec boundary */
152 static struct file_read_info *file_read_start (int fd)
154 struct file_read_info *fi = (struct file_read_info *)
155 xmalloc (sizeof(*fi));
159 fi->file_moffset = 0;
165 static void file_read_stop (struct file_read_info *fi)
170 static off_t file_seek (void *handle, off_t offset)
172 struct file_read_info *p = (struct file_read_info *) handle;
173 p->file_offset = offset;
174 return lseek (p->fd, offset, SEEK_SET);
177 static off_t file_tell (void *handle)
179 struct file_read_info *p = (struct file_read_info *) handle;
180 return p->file_offset;
183 static int file_read (void *handle, char *buf, size_t count)
185 struct file_read_info *p = (struct file_read_info *) handle;
188 r = read (fd, buf, count);
192 if (p->file_offset > p->file_max)
193 p->file_max = p->file_offset;
198 static void file_end (void *handle, off_t offset)
200 struct file_read_info *p = (struct file_read_info *) handle;
202 if (offset != p->file_moffset)
204 p->file_moffset = offset;
209 #define FILE_MATCH_BLANK "\t "
211 static char *fileMatchStr (ZebraHandle zh,
212 zebra_rec_keys_t reckeys,
213 const char *fname, const char *spec)
215 static char dstBuf[2048]; /* static here ??? */
217 const char *s = spec;
221 for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
228 char attset_str[64], attname_str[64];
229 data1_attset *attset;
231 int attSet = 1, attUse = 1;
234 for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
236 for (i = 0; *s && *s != ',' && *s != ')' &&
237 !strchr(FILE_MATCH_BLANK, *s); s++)
238 if (i+1 < sizeof(attset_str))
239 attset_str[i++] = *s;
240 attset_str[i] = '\0';
242 for (; strchr(FILE_MATCH_BLANK, *s); s++)
246 for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
248 for (i = 0; *s && *s != ')' &&
249 !strchr(FILE_MATCH_BLANK, *s); s++)
250 if (i+1 < sizeof(attname_str))
251 attname_str[i++] = *s;
252 attname_str[i] = '\0';
255 if ((attset = data1_get_attset (zh->reg->dh, attset_str)))
258 attSet = attset->reference;
259 att = data1_getattbyname(zh->reg->dh, attset, attname_str);
263 attUse = atoi (attname_str);
265 searchRecordKey (zh, reckeys, attSet, attUse, ws, 32);
269 yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s",
270 spec, zh->m_group ? zh->m_group : "none");
275 for (i = 0; i<32; i++)
284 dst += strlen(ws[i]);
288 yaz_log (YLOG_WARN, "Record didn't contain match"
289 " fields in (%s,%s)", attset_str, attname_str);
297 const char *spec_src = NULL;
298 const char *s1 = ++s;
299 while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
303 if (spec_len > sizeof(special)-1)
304 spec_len = sizeof(special)-1;
305 memcpy (special, s, spec_len);
306 special[spec_len] = '\0';
309 if (!strcmp (special, "group"))
310 spec_src = zh->m_group;
311 else if (!strcmp (special, "database"))
312 spec_src = zh->basenames[0];
313 else if (!strcmp (special, "filename")) {
316 else if (!strcmp (special, "type"))
317 spec_src = zh->m_record_type;
322 strcpy (dst, spec_src);
323 dst += strlen (spec_src);
326 else if (*s == '\"' || *s == '\'')
328 int stopMarker = *s++;
332 while (*s && *s != stopMarker)
334 if (i+1 < sizeof(tmpString))
335 tmpString[i++] = *s++;
340 strcpy (dst, tmpString);
341 dst += strlen (tmpString);
345 yaz_log (YLOG_WARN, "Syntax error in match criteria %s in group %s",
346 spec, zh->m_group ? zh->m_group : "none");
353 yaz_log (YLOG_WARN, "No match criteria for record %s in group %s",
354 fname, zh->m_group ? zh->m_group : "none");
361 struct recordLogInfo {
364 struct recordGroup *rGroup;
367 static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
370 for (i = 0; i<256; i++)
372 if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
377 ctrl->zebra_maps = zh->reg->zebra_maps;
378 ctrl->flagShowRecords = !zh->m_flag_rw;
381 static int file_extract_record(ZebraHandle zh,
382 SYSNO *sysno, const char *fname,
384 struct file_read_info *fi,
387 void *recTypeClientData)
389 RecordAttr *recordAttr;
391 const char *matchStr = 0;
394 off_t recordOffset = 0;
395 struct recExtractCtrl extractCtrl;
397 /* announce database */
398 if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
400 if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
401 zh->m_explain_database))
407 /* we are going to read from a file, so prepare the extraction */
408 zebra_rec_keys_reset(zh->reg->keys);
411 zebra_rec_keys_reset(zh->reg->sortKeys);
413 zh->reg->sortKeys.buf_used = 0;
415 recordOffset = fi->file_moffset;
416 extractCtrl.handle = zh;
417 extractCtrl.offset = fi->file_moffset;
418 extractCtrl.readf = file_read;
419 extractCtrl.seekf = file_seek;
420 extractCtrl.tellf = file_tell;
421 extractCtrl.endf = file_end;
423 extractCtrl.init = extract_init;
424 extractCtrl.tokenAdd = extract_token_add;
425 extractCtrl.schemaAdd = extract_schema_add;
426 extractCtrl.dh = zh->reg->dh;
427 extractCtrl.match_criteria[0] = '\0';
428 extractCtrl.staticrank = 0;
430 extractCtrl.first_record = fi->file_offset ? 0 : 1;
432 extract_set_store_data_prepare(&extractCtrl);
434 init_extractCtrl(zh, &extractCtrl);
437 printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset);
441 sprintf (msg, "%s:" PRINTF_OFF_T , fname, recordOffset);
442 yaz_log_init_prefix2 (msg);
445 r = (*recType->extract)(recTypeClientData, &extractCtrl);
447 yaz_log_init_prefix2 (0);
448 if (r == RECCTRL_EXTRACT_EOF)
450 else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
452 /* error occured during extraction ... */
454 zh->records_processed < zh->m_file_verbose_limit)
456 yaz_log (YLOG_WARN, "fail %s %s " PRINTF_OFF_T, zh->m_record_type,
457 fname, recordOffset);
461 else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
463 /* error occured during extraction ... */
465 zh->records_processed < zh->m_file_verbose_limit)
467 yaz_log (YLOG_WARN, "no filter for %s %s "
468 PRINTF_OFF_T, zh->m_record_type,
469 fname, recordOffset);
473 if (extractCtrl.match_criteria[0])
474 matchStr = extractCtrl.match_criteria;
477 /* perform match if sysno not known and if match criteria is specified */
483 if (matchStr == 0 && zh->m_record_id && *zh->m_record_id)
485 matchStr = fileMatchStr (zh, zh->reg->keys, fname,
489 yaz_log(YLOG_WARN, "Bad match criteria");
495 char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
498 assert(*rinfo == sizeof(*sysno));
499 memcpy (sysno, rinfo+1, sizeof(*sysno));
503 if (! *sysno && zebra_rec_keys_empty(zh->reg->keys) )
505 /* the extraction process returned no information - the record
506 is probably empty - unless flagShowRecords is in use */
510 if (zh->records_processed < zh->m_file_verbose_limit)
511 yaz_log (YLOG_WARN, "empty %s %s " PRINTF_OFF_T, zh->m_record_type,
512 fname, recordOffset);
521 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type,
522 fname, recordOffset);
523 yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
526 if (zh->records_processed < zh->m_file_verbose_limit)
527 yaz_log (YLOG_LOG, "add %s %s " PRINTF_OFF_T, zh->m_record_type,
528 fname, recordOffset);
529 rec = rec_new (zh->reg->records);
533 recordAttr = rec_init_attr (zh->reg->zei, rec);
534 recordAttr->staticrank = extractCtrl.staticrank;
538 dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
541 extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
543 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
545 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
546 recordAttr->staticrank);
547 zh->records_inserted++;
551 /* record already exists */
552 zebra_rec_keys_t delkeys = zebra_rec_keys_open();
555 zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
557 struct sortKeys sortKeys;
560 rec = rec_get (zh->reg->records, *sysno);
563 recordAttr = rec_init_attr (zh->reg->zei, rec);
565 zebra_rec_keys_set_buf(delkeys,
566 rec->info[recInfo_delKeys],
567 rec->size[recInfo_delKeys],
571 zebra_rec_keys_set_buf(sortKeys,
572 rec->info[recInfo_sortKeys],
573 rec->size[recInfo_sortKeys],
575 extract_flushSortKeys (zh, *sysno, 0, sortKeys);
577 sortKeys.buf_used = rec->size[recInfo_sortKeys];
578 sortKeys.buf = rec->info[recInfo_sortKeys];
579 extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
582 extract_flushRecordKeys (zh, *sysno, 0, delkeys,
583 recordAttr->staticrank); /* old values */
586 /* record going to be deleted */
587 if (zebra_rec_keys_empty(delkeys))
589 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
590 zh->m_record_type, fname, recordOffset);
591 yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
595 if (zh->records_processed < zh->m_file_verbose_limit)
596 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
597 zh->m_record_type, fname, recordOffset);
598 zh->records_deleted++;
600 dict_delete (zh->reg->matchDict, matchStr);
601 rec_del (zh->reg->records, &rec);
609 /* flush new keys for sort&search etc */
610 if (zh->records_processed < zh->m_file_verbose_limit)
611 yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
612 zh->m_record_type, fname, recordOffset);
613 recordAttr->staticrank = extractCtrl.staticrank;
615 extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
617 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
619 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
620 recordAttr->staticrank);
621 zh->records_updated++;
623 zebra_rec_keys_close(delkeys);
625 zebra_rec_keys_close(sortKeys);
628 /* update file type */
629 xfree (rec->info[recInfo_fileType]);
630 rec->info[recInfo_fileType] =
631 rec_strdup (zh->m_record_type, &rec->size[recInfo_fileType]);
633 /* update filename */
634 xfree (rec->info[recInfo_filename]);
635 rec->info[recInfo_filename] =
636 rec_strdup (fname, &rec->size[recInfo_filename]);
638 /* update delete keys */
639 xfree (rec->info[recInfo_delKeys]);
640 if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
642 zebra_rec_keys_get_buf(zh->reg->keys,
643 &rec->info[recInfo_delKeys],
644 &rec->size[recInfo_delKeys]);
648 rec->info[recInfo_delKeys] = NULL;
649 rec->size[recInfo_delKeys] = 0;
652 /* update sort keys */
653 xfree (rec->info[recInfo_sortKeys]);
656 zebra_rec_keys_get_buf(zh->reg->sortKeys,
657 &rec->info[recInfo_sortKeys],
658 &rec->size[recInfo_sortKeys]);
660 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
661 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
662 zh->reg->sortKeys.buf = NULL;
663 zh->reg->sortKeys.buf_max = 0;
666 /* save file size of original record */
667 zebraExplain_recordBytesIncrement (zh->reg->zei,
668 - recordAttr->recordSize);
669 recordAttr->recordSize = fi->file_moffset - recordOffset;
670 if (!recordAttr->recordSize)
671 recordAttr->recordSize = fi->file_max - recordOffset;
672 zebraExplain_recordBytesIncrement (zh->reg->zei,
673 recordAttr->recordSize);
675 /* set run-number for this record */
676 recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei,
679 /* update store data */
680 xfree (rec->info[recInfo_storeData]);
681 if (zh->store_data_buf)
683 rec->size[recInfo_storeData] = zh->store_data_size;
684 rec->info[recInfo_storeData] = zh->store_data_buf;
685 zh->store_data_buf = 0;
687 else if (zh->m_store_data)
689 rec->size[recInfo_storeData] = recordAttr->recordSize;
690 rec->info[recInfo_storeData] = (char *)
691 xmalloc (recordAttr->recordSize);
692 if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
694 yaz_log (YLOG_ERRNO|YLOG_FATAL, "seek to " PRINTF_OFF_T " in %s",
695 recordOffset, fname);
698 if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
699 < recordAttr->recordSize)
701 yaz_log (YLOG_ERRNO|YLOG_FATAL, "read %d bytes of %s",
702 recordAttr->recordSize, fname);
708 rec->info[recInfo_storeData] = NULL;
709 rec->size[recInfo_storeData] = 0;
711 /* update database name */
712 xfree (rec->info[recInfo_databaseName]);
713 rec->info[recInfo_databaseName] =
714 rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
717 recordAttr->recordOffset = recordOffset;
719 /* commit this record */
720 rec_put (zh->reg->records, &rec);
725 int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
732 struct file_read_info *fi;
733 const char *original_record_type = 0;
735 void *recTypeClientData;
737 if (!zh->m_group || !*zh->m_group)
740 sprintf (gprefix, "%s.", zh->m_group);
742 yaz_log (YLOG_DEBUG, "fileExtract %s", fname);
744 /* determine file extension */
746 for (i = strlen(fname); --i >= 0; )
749 else if (fname[i] == '.')
751 strcpy (ext, fname+i+1);
754 /* determine file type - depending on extension */
755 original_record_type = zh->m_record_type;
756 if (!zh->m_record_type)
758 sprintf (ext_res, "%srecordType.%s", gprefix, ext);
759 zh->m_record_type = res_get (zh->res, ext_res);
761 if (!zh->m_record_type)
763 if (zh->records_processed < zh->m_file_verbose_limit)
764 yaz_log (YLOG_LOG, "? %s", fname);
767 /* determine match criteria */
768 if (!zh->m_record_id)
770 sprintf (ext_res, "%srecordId.%s", gprefix, ext);
771 zh->m_record_id = res_get (zh->res, ext_res);
775 recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type,
776 &recTypeClientData)))
778 yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
782 switch(recType->version)
787 yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
789 if (sysno && deleteFlag)
795 if (zh->path_reg && !yaz_is_abspath (fname))
797 strcpy (full_rep, zh->path_reg);
798 strcat (full_rep, "/");
799 strcat (full_rep, fname);
802 strcpy (full_rep, fname);
805 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
807 yaz_log (YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
808 zh->m_record_type = original_record_type;
812 fi = file_read_start (fd);
815 fi->file_moffset = fi->file_offset;
816 fi->file_more = 0; /* file_end not called (yet) */
817 r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1,
818 recType, recTypeClientData);
820 { /* file_end has been called so reset offset .. */
821 fi->file_offset = fi->file_moffset;
822 lseek(fi->fd, fi->file_moffset, SEEK_SET);
829 zh->m_record_type = original_record_type;
834 If sysno is provided, then it's used to identify the reocord.
835 If not, and match_criteria is provided, then sysno is guessed
836 If not, and a record is provided, then sysno is got from there
839 ZEBRA_RES buffer_extract_record(ZebraHandle zh,
840 const char *buf, size_t buf_size,
843 const char *recordType,
845 const char *match_criteria,
851 RecordAttr *recordAttr;
852 struct recExtractCtrl extractCtrl;
854 const char *matchStr = 0;
855 RecType recType = NULL;
858 long recordOffset = 0;
859 struct zebra_fetch_control fc;
860 const char *pr_fname = fname; /* filename to print .. */
861 int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
864 pr_fname = "<no file>"; /* make it printable if file is omitted */
867 fc.record_int_buf = buf;
868 fc.record_int_len = buf_size;
869 fc.record_int_pos = 0;
871 fc.record_offset = 0;
873 extractCtrl.offset = 0;
874 extractCtrl.readf = zebra_record_int_read;
875 extractCtrl.seekf = zebra_record_int_seek;
876 extractCtrl.tellf = zebra_record_int_tell;
877 extractCtrl.endf = zebra_record_int_end;
878 extractCtrl.first_record = 1;
879 extractCtrl.fh = &fc;
881 zebra_rec_keys_reset(zh->reg->keys);
884 zebra_rec_keys_reset(zh->reg->sortKeys);
886 zh->reg->sortKeys.buf_used = 0;
888 if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
890 if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
891 zh->m_explain_database))
895 if (recordType && *recordType)
897 yaz_log (YLOG_DEBUG, "Record type explicitly specified: %s", recordType);
898 recType = recType_byName (zh->reg->recTypes, zh->res, recordType,
903 if (!(zh->m_record_type))
905 yaz_log (YLOG_WARN, "No such record type defined");
908 yaz_log (YLOG_DEBUG, "Get record type from rgroup: %s",zh->m_record_type);
909 recType = recType_byName (zh->reg->recTypes, zh->res,
910 zh->m_record_type, &clientData);
911 recordType = zh->m_record_type;
916 yaz_log (YLOG_WARN, "No such record type: %s", zh->m_record_type);
920 extractCtrl.init = extract_init;
921 extractCtrl.tokenAdd = extract_token_add;
922 extractCtrl.schemaAdd = extract_schema_add;
923 extractCtrl.dh = zh->reg->dh;
924 extractCtrl.handle = zh;
925 extractCtrl.match_criteria[0] = '\0';
926 extractCtrl.staticrank = 0;
928 init_extractCtrl(zh, &extractCtrl);
930 extract_set_store_data_prepare(&extractCtrl);
932 r = (*recType->extract)(clientData, &extractCtrl);
934 if (r == RECCTRL_EXTRACT_EOF)
936 else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
938 /* error occured during extraction ... */
939 yaz_log (YLOG_WARN, "extract error: generic");
942 else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
944 /* error occured during extraction ... */
945 yaz_log (YLOG_WARN, "extract error: no such filter");
949 if (extractCtrl.match_criteria[0])
950 match_criteria = extractCtrl.match_criteria;
956 if (match_criteria && *match_criteria) {
957 matchStr = match_criteria;
959 if (zh->m_record_id && *zh->m_record_id) {
960 matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname,
964 yaz_log (YLOG_WARN, "Bad match criteria (recordID)");
970 char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
973 assert(*rinfo == sizeof(*sysno));
974 memcpy (sysno, rinfo+1, sizeof(*sysno));
978 if (zebra_rec_keys_empty(zh->reg->keys))
980 /* the extraction process returned no information - the record
981 is probably empty - unless flagShowRecords is in use */
991 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
992 pr_fname, (long) recordOffset);
993 yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
997 yaz_log (YLOG_LOG, "add %s %s %ld", recordType, pr_fname,
998 (long) recordOffset);
999 rec = rec_new (zh->reg->records);
1001 *sysno = rec->sysno;
1003 recordAttr = rec_init_attr (zh->reg->zei, rec);
1004 recordAttr->staticrank = extractCtrl.staticrank;
1008 dict_insert (zh->reg->matchDict, matchStr,
1009 sizeof(*sysno), sysno);
1012 extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
1014 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
1018 print_rec_keys(zh, zh->reg->keys);
1020 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
1021 recordAttr->staticrank);
1022 zh->records_inserted++;
1026 /* record already exists */
1027 zebra_rec_keys_t delkeys = zebra_rec_keys_open();
1029 zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
1031 struct sortKeys sortKeys;
1036 yaz_log (YLOG_LOG, "skipped %s %s %ld",
1037 recordType, pr_fname, (long) recordOffset);
1042 rec = rec_get (zh->reg->records, *sysno);
1045 recordAttr = rec_init_attr (zh->reg->zei, rec);
1047 zebra_rec_keys_set_buf(delkeys,
1048 rec->info[recInfo_delKeys],
1049 rec->size[recInfo_delKeys],
1052 zebra_rec_keys_set_buf(sortKeys,
1053 rec->info[recInfo_sortKeys],
1054 rec->size[recInfo_sortKeys],
1057 sortKeys.buf_used = rec->size[recInfo_sortKeys];
1058 sortKeys.buf = rec->info[recInfo_sortKeys];
1062 extract_flushSortKeys (zh, *sysno, 0, sortKeys);
1064 extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
1066 extract_flushRecordKeys (zh, *sysno, 0, delkeys,
1067 recordAttr->staticrank);
1070 /* record going to be deleted */
1071 if (zebra_rec_keys_empty(delkeys))
1073 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
1074 pr_fname, (long) recordOffset);
1075 yaz_log (YLOG_WARN, "cannot delete file above, "
1076 "storeKeys false (3)");
1081 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
1082 pr_fname, (long) recordOffset);
1083 zh->records_deleted++;
1085 dict_delete (zh->reg->matchDict, matchStr);
1086 rec_del (zh->reg->records, &rec);
1095 yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
1096 pr_fname, (long) recordOffset);
1097 recordAttr->staticrank = extractCtrl.staticrank;
1099 extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
1101 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
1103 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
1104 recordAttr->staticrank);
1105 zh->records_updated++;
1107 zebra_rec_keys_close(delkeys);
1109 zebra_rec_keys_close(sortKeys);
1112 /* update file type */
1113 xfree (rec->info[recInfo_fileType]);
1114 rec->info[recInfo_fileType] =
1115 rec_strdup (recordType, &rec->size[recInfo_fileType]);
1117 /* update filename */
1118 xfree (rec->info[recInfo_filename]);
1119 rec->info[recInfo_filename] =
1120 rec_strdup (fname, &rec->size[recInfo_filename]);
1122 /* update delete keys */
1123 xfree (rec->info[recInfo_delKeys]);
1124 if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
1126 zebra_rec_keys_get_buf(zh->reg->keys,
1127 &rec->info[recInfo_delKeys],
1128 &rec->size[recInfo_delKeys]);
1132 rec->info[recInfo_delKeys] = NULL;
1133 rec->size[recInfo_delKeys] = 0;
1135 /* update sort keys */
1136 xfree (rec->info[recInfo_sortKeys]);
1139 zebra_rec_keys_get_buf(zh->reg->sortKeys,
1140 &rec->info[recInfo_sortKeys],
1141 &rec->size[recInfo_sortKeys]);
1143 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
1144 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
1145 zh->reg->sortKeys.buf = NULL;
1146 zh->reg->sortKeys.buf_max = 0;
1149 /* save file size of original record */
1150 zebraExplain_recordBytesIncrement (zh->reg->zei,
1151 - recordAttr->recordSize);
1153 recordAttr->recordSize = fi->file_moffset - recordOffset;
1154 if (!recordAttr->recordSize)
1155 recordAttr->recordSize = fi->file_max - recordOffset;
1157 recordAttr->recordSize = buf_size;
1159 zebraExplain_recordBytesIncrement (zh->reg->zei,
1160 recordAttr->recordSize);
1162 /* set run-number for this record */
1163 recordAttr->runNumber =
1164 zebraExplain_runNumberIncrement (zh->reg->zei, 0);
1166 /* update store data */
1167 xfree (rec->info[recInfo_storeData]);
1169 /* update store data */
1170 if (zh->store_data_buf)
1172 rec->size[recInfo_storeData] = zh->store_data_size;
1173 rec->info[recInfo_storeData] = zh->store_data_buf;
1174 zh->store_data_buf = 0;
1176 else if (zh->m_store_data)
1178 rec->size[recInfo_storeData] = recordAttr->recordSize;
1179 rec->info[recInfo_storeData] = (char *)
1180 xmalloc (recordAttr->recordSize);
1181 memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
1185 rec->info[recInfo_storeData] = NULL;
1186 rec->size[recInfo_storeData] = 0;
1188 /* update database name */
1189 xfree (rec->info[recInfo_databaseName]);
1190 rec->info[recInfo_databaseName] =
1191 rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
1194 recordAttr->recordOffset = recordOffset;
1196 /* commit this record */
1197 rec_put (zh->reg->records, &rec);
1202 int explain_extract (void *handle, Record rec, data1_node *n)
1204 ZebraHandle zh = (ZebraHandle) handle;
1205 struct recExtractCtrl extractCtrl;
1207 if (zebraExplain_curDatabase (zh->reg->zei,
1208 rec->info[recInfo_databaseName]))
1211 if (zebraExplain_newDatabase (zh->reg->zei,
1212 rec->info[recInfo_databaseName], 0))
1216 zebra_rec_keys_reset(zh->reg->keys);
1219 zebra_rec_keys_reset(zh->reg->sortKeys);
1221 zh->reg->sortKeys.buf_used = 0;
1223 extractCtrl.init = extract_init;
1224 extractCtrl.tokenAdd = extract_token_add;
1225 extractCtrl.schemaAdd = extract_schema_add;
1226 extractCtrl.dh = zh->reg->dh;
1228 init_extractCtrl(zh, &extractCtrl);
1230 extractCtrl.flagShowRecords = 0;
1231 extractCtrl.match_criteria[0] = '\0';
1232 extractCtrl.staticrank = 0;
1233 extractCtrl.handle = handle;
1234 extractCtrl.first_record = 1;
1236 extract_set_store_data_prepare(&extractCtrl);
1239 grs_extract_tree(&extractCtrl, n);
1241 if (rec->size[recInfo_delKeys])
1243 zebra_rec_keys_t delkeys = zebra_rec_keys_open();
1246 zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
1248 struct sortKeys sortkeys;
1251 zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
1252 rec->size[recInfo_delKeys],
1254 extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
1255 zebra_rec_keys_close(delkeys);
1257 zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
1258 rec->size[recInfo_sortKeys],
1261 extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
1262 zebra_rec_keys_close(sortkeys);
1264 sortkeys.buf_used = rec->size[recInfo_sortKeys];
1265 sortkeys.buf = rec->info[recInfo_sortKeys];
1266 extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
1269 extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
1271 extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
1273 extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
1276 xfree (rec->info[recInfo_delKeys]);
1277 zebra_rec_keys_get_buf(zh->reg->keys,
1278 &rec->info[recInfo_delKeys],
1279 &rec->size[recInfo_delKeys]);
1281 xfree (rec->info[recInfo_sortKeys]);
1283 zebra_rec_keys_get_buf(zh->reg->sortKeys,
1284 &rec->info[recInfo_sortKeys],
1285 &rec->size[recInfo_sortKeys]);
1287 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
1288 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
1289 zh->reg->sortKeys.buf = NULL;
1290 zh->reg->sortKeys.buf_max = 0;
1296 void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
1298 zebra_rec_keys_t reckeys,
1301 ZebraExplainInfo zei = zh->reg->zei;
1303 if (!zh->reg->key_buf)
1305 int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
1308 yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB");
1311 /* FIXME: That "8" should be in a default settings include */
1312 /* not hard-coded here! -H */
1313 zh->reg->key_buf = (char**) xmalloc (mem);
1314 zh->reg->ptr_top = mem/sizeof(char*);
1316 zh->reg->key_buf_used = 0;
1317 zh->reg->key_file_no = 0;
1319 zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
1321 if (zebra_rec_keys_rewind(reckeys))
1325 struct it_key key_in;
1326 while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1329 struct it_key key_out;
1330 zint *keyp = key_out.mem;
1332 assert(key_in.len == 4);
1334 /* check for buffer overflow */
1335 if (zh->reg->key_buf_used + 1024 >
1336 (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
1337 extract_flushWriteKeys (zh, 0);
1340 assert(zh->reg->ptr_i > 0);
1341 (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
1342 (char*)zh->reg->key_buf + zh->reg->key_buf_used;
1344 /* encode the ordinal value (field/use/attribute) .. */
1345 ch = (int) key_in.mem[0];
1346 zh->reg->key_buf_used +=
1347 key_SU_encode(ch, (char*)zh->reg->key_buf +
1348 zh->reg->key_buf_used);
1350 /* copy the 0-terminated stuff from str to output */
1351 memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
1352 zh->reg->key_buf_used += slen;
1353 ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0';
1355 /* the delete/insert indicator */
1356 ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd;
1358 if (zh->m_staticrank) /* rank config enabled ? */
1360 *keyp++ = staticrank;
1366 if (key_in.mem[1]) /* filter specified record ID */
1367 *keyp++ = key_in.mem[1];
1370 *keyp++ = key_in.mem[2]; /* section_id */
1371 *keyp++ = key_in.mem[3]; /* sequence .. */
1373 memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used,
1374 &key_out, sizeof(key_out));
1375 (zh->reg->key_buf_used) += sizeof(key_out);
1380 void extract_flushWriteKeys (ZebraHandle zh, int final)
1381 /* optimizing: if final=1, and no files written yet */
1382 /* push the keys directly to merge, sidestepping the */
1383 /* temp file altogether. Speeds small updates */
1386 char out_fname[200];
1388 struct encode_info encode_info;
1389 int ptr_i = zh->reg->ptr_i;
1394 if (!zh->reg->key_buf || ptr_i <= 0)
1396 yaz_log (YLOG_DEBUG, " nothing to flush section=%d buf=%p i=%d",
1397 zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
1398 yaz_log (YLOG_DEBUG, " buf=%p ",
1400 yaz_log (YLOG_DEBUG, " ptr=%d ",zh->reg->ptr_i);
1401 yaz_log (YLOG_DEBUG, " reg=%p ",zh->reg);
1406 (zh->reg->key_file_no)++;
1407 yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no));
1408 yaz_log (YLOG_DEBUG, " sort_buff at %p n=%d",
1409 zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
1411 qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
1412 sizeof(char*), key_qsort_compare);
1414 /* zebra.cfg: tempfiles:
1415 Y: always use temp files (old way)
1416 A: use temp files, if more than one (auto)
1417 = if this is both the last and the first
1418 N: never bother with temp files (new) */
1420 temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
1421 if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
1422 yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
1427 if ( ( temp_policy =='N' ) || /* always from memory */
1428 ( ( temp_policy =='A' ) && /* automatic */
1429 (zh->reg->key_file_no == 1) && /* this is first time */
1430 (final) ) ) /* and last (=only) time */
1431 { /* go directly from memory */
1432 zh->reg->key_file_no =0; /* signal not to read files */
1433 zebra_index_merge(zh);
1435 zh->reg->key_buf_used = 0;
1439 /* Not doing directly from memory, write into a temp file */
1440 extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
1442 if (!(outf = fopen (out_fname, "wb")))
1444 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
1447 yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no);
1448 prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
1450 encode_key_init (&encode_info);
1451 encode_key_write (cp, &encode_info, outf);
1455 cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
1456 if (strcmp (cp, prevcp))
1458 encode_key_flush ( &encode_info, outf);
1459 encode_key_init (&encode_info);
1460 encode_key_write (cp, &encode_info, outf);
1464 encode_key_write (cp + strlen(cp), &encode_info, outf);
1466 encode_key_flush ( &encode_info, outf);
1468 qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
1469 extract_get_fname_tmp (out_fname, key_file_no);
1471 if (!(outf = fopen (out_fname, "wb")))
1473 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
1476 yaz_log (YLOG_LOG, "writing section %d", key_file_no);
1478 prevcp = key_buf[ptr_top-i];
1480 if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
1482 key_y_len = strlen(prevcp)+1;
1484 yaz_log (YLOG_LOG, "key_y_len: %2d %02x %02x %s",
1485 key_y_len, prevcp[0], prevcp[1], 2+prevcp);
1487 qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
1488 sizeof(char*), key_y_compare);
1489 cp = key_buf[ptr_top-ptr_i];
1491 encode_key_init (&encode_info);
1492 encode_key_write (cp, &encode_info, outf);
1495 cp = key_buf[ptr_top-ptr_i];
1496 encode_key_write (cp+key_y_len, &encode_info, outf);
1498 encode_key_flush ( &encode_info, outf);
1501 prevcp = key_buf[ptr_top-ptr_i];
1506 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname);
1509 yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no);
1511 zh->reg->key_buf_used = 0;
1514 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
1515 zebra_rec_keys_t reckeys,
1516 zebra_snippets *snippets)
1518 NMEM nmem = nmem_create();
1519 if (zebra_rec_keys_rewind(reckeys))
1524 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1526 char dst_buf[IT_MAX_WORD];
1527 char *dst_term = dst_buf;
1530 assert(key.len <= 4 && key.len > 2);
1531 seqno = (int) key.mem[key.len-1];
1534 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1535 0/* db */, 0/* set */, 0/* use */);
1537 zebra_term_untrans_iconv(zh, nmem, index_type,
1539 zebra_snippets_append(snippets, seqno, ord, dst_term);
1547 void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
1549 yaz_log(YLOG_LOG, "print_rec_keys");
1550 if (zebra_rec_keys_rewind(reckeys))
1555 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1557 char dst_buf[IT_MAX_WORD];
1561 assert(key.len <= 4 && key.len > 2);
1563 zebraExplain_lookup_ord(zh->reg->zei,
1564 key.mem[0], &index_type, &db, 0, 0);
1566 seqno = (int) key.mem[key.len-1];
1568 zebra_term_untrans(zh, index_type, dst_buf, str);
1570 yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s",
1571 key.mem[0], seqno, dst_buf);
1576 void extract_add_index_string (RecWord *p, const char *str, int length)
1580 ZebraHandle zh = p->extractCtrl->handle;
1581 ZebraExplainInfo zei = zh->reg->zei;
1586 ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
1588 ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
1595 ch = zebraExplain_lookup_attr_su(zei, p->index_type,
1596 p->attrSet, p->attrUse);
1598 ch = zebraExplain_add_attr_su(zei, p->index_type,
1599 p->attrSet, p->attrUse);
1604 key.mem[1] = p->record_id;
1605 key.mem[2] = p->section_id;
1606 key.mem[3] = p->seqno;
1615 for (i = 0; i<length && i < 20; i++)
1616 sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
1617 /* just for debugging .. */
1618 yaz_log(YLOG_LOG, "add: set=%d use=%d "
1619 "record_id=%lld section_id=%lld seqno=%lld %s",
1620 p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
1625 zebra_rec_keys_write(zh->reg->keys, str, length, &key);
1629 static void extract_add_sort_string (RecWord *p, const char *str, int length)
1633 ZebraHandle zh = p->extractCtrl->handle;
1634 ZebraExplainInfo zei = zh->reg->zei;
1639 ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
1641 ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
1649 key.mem[1] = p->record_id;
1650 key.mem[2] = p->section_id;
1651 key.mem[3] = p->seqno;
1653 zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
1656 static void extract_add_sort_string (RecWord *p, const char *str, int length)
1658 ZebraHandle zh = p->extractCtrl->handle;
1659 struct sortKeys *sk = &zh->reg->sortKeys;
1662 while (off < sk->buf_used)
1666 off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
1667 off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
1668 off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
1670 if (p->attrSet == set && p->attrUse == use)
1673 assert (off == sk->buf_used);
1675 if (sk->buf_used + IT_MAX_WORD > sk->buf_max)
1679 b = (char *) xmalloc (sk->buf_max += 128000);
1680 if (sk->buf_used > 0)
1681 memcpy (b, sk->buf, sk->buf_used);
1685 off += key_SU_encode(p->attrSet, sk->buf + off);
1686 off += key_SU_encode(p->attrUse, sk->buf + off);
1687 off += key_SU_encode(length, sk->buf + off);
1688 memcpy (sk->buf + off, str, length);
1689 sk->buf_used = off + length;
1693 void extract_add_string (RecWord *p, const char *string, int length)
1695 assert (length > 0);
1696 if (zebra_maps_is_sort (p->zebra_maps, p->index_type))
1697 extract_add_sort_string (p, string, length);
1699 extract_add_index_string (p, string, length);
1702 static void extract_add_incomplete_field (RecWord *p)
1704 const char *b = p->term_buf;
1705 int remain = p->term_len;
1706 const char **map = 0;
1708 yaz_log(YLOG_DEBUG, "Incomplete field, w='%.*s'", p->term_len, p->term_buf);
1711 map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0);
1715 char buf[IT_MAX_WORD+1];
1719 while (map && *map && **map == *CHR_SPACE)
1721 remain = p->term_len - (b - p->term_buf);
1723 map = zebra_maps_input(p->zebra_maps, p->index_type, &b,
1731 while (map && *map && **map != *CHR_SPACE)
1733 const char *cp = *map;
1735 while (i < IT_MAX_WORD && *cp)
1737 remain = p->term_len - (b - p->term_buf);
1739 map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0);
1745 extract_add_string (p, buf, i);
1750 static void extract_add_complete_field (RecWord *p)
1752 const char *b = p->term_buf;
1753 char buf[IT_MAX_WORD+1];
1754 const char **map = 0;
1755 int i = 0, remain = p->term_len;
1757 yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'",
1758 p->term_len, p->term_buf);
1761 map = zebra_maps_input (p->zebra_maps, p->index_type, &b, remain, 1);
1763 while (remain > 0 && i < IT_MAX_WORD)
1765 while (map && *map && **map == *CHR_SPACE)
1767 remain = p->term_len - (b - p->term_buf);
1771 int first = i ? 0 : 1; /* first position */
1772 map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, first);
1780 if (i && i < IT_MAX_WORD)
1781 buf[i++] = *CHR_SPACE;
1782 while (map && *map && **map != *CHR_SPACE)
1784 const char *cp = *map;
1786 if (**map == *CHR_CUT)
1792 if (i >= IT_MAX_WORD)
1794 yaz_log(YLOG_DEBUG, "Adding string to index '%d'", **map);
1795 while (i < IT_MAX_WORD && *cp)
1798 remain = p->term_len - (b - p->term_buf);
1801 map = zebra_maps_input (p->zebra_maps, p->index_type, &b,
1810 extract_add_string (p, buf, i);
1813 void extract_token_add (RecWord *p)
1817 yaz_log (YLOG_LOG, "token_add "
1818 "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s",
1819 p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length,
1822 if ((wrbuf = zebra_replace(p->zebra_maps, p->index_type, 0,
1823 p->term_buf, p->term_len)))
1825 p->term_buf = wrbuf_buf(wrbuf);
1826 p->term_len = wrbuf_len(wrbuf);
1828 if (zebra_maps_is_complete (p->zebra_maps, p->index_type))
1829 extract_add_complete_field (p);
1831 extract_add_incomplete_field(p);
1834 static void extract_set_store_data_cb(struct recExtractCtrl *p,
1835 void *buf, size_t sz)
1837 ZebraHandle zh = (ZebraHandle) p->handle;
1839 xfree(zh->store_data_buf);
1840 zh->store_data_buf = 0;
1841 zh->store_data_size = 0;
1844 zh->store_data_buf = xmalloc(sz);
1845 zh->store_data_size = sz;
1846 memcpy(zh->store_data_buf, buf, sz);
1850 static void extract_set_store_data_prepare(struct recExtractCtrl *p)
1852 ZebraHandle zh = (ZebraHandle) p->handle;
1853 xfree(zh->store_data_buf);
1854 zh->store_data_buf = 0;
1855 zh->store_data_size = 0;
1856 p->setStoreData = extract_set_store_data_cb;
1859 void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
1861 ZebraHandle zh = (ZebraHandle) p->handle;
1862 zebraExplain_addSchema (zh->reg->zei, oid);
1866 #error not done yet with zebra_rec_keys_t
1867 void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
1868 int cmd, struct recKeys *reckeys)
1870 SortIdx sortIdx = zh->reg->sortIdx;
1871 void *decode_handle = iscz1_start();
1875 while (off < reckeys->buf_used)
1877 const char *src = reckeys->buf + off;
1879 char *dst = (char*) &key;
1881 iscz1_decode(decode_handle, &dst, &src);
1882 assert(key.len == 4);
1884 ch = (int) key.mem[0]; /* ordinal for field/use/attribute */
1886 sortIdx_type(sortIdx, ch);
1888 sortIdx_add(sortIdx, src, strlen(src));
1890 sortIdx_add(sortIdx, "", 1);
1895 off = src - reckeys->buf;
1897 assert (off == reckeys->buf_used);
1898 iscz1_stop(decode_handle);
1901 void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
1902 int cmd, struct sortKeys *sk)
1904 SortIdx sortIdx = zh->reg->sortIdx;
1907 sortIdx_sysno (sortIdx, sysno);
1909 while (off < sk->buf_used)
1913 off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
1914 off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
1915 off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
1917 sortIdx_type(sortIdx, use);
1919 sortIdx_add(sortIdx, sk->buf + off, slen);
1921 sortIdx_add(sortIdx, "", 1);
1927 void encode_key_init (struct encode_info *i)
1936 i->encode_handle = iscz1_start();
1942 /* this is the old encode_key_write
1943 * may be deleted once we are confident that the new works
1946 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1949 char *bp = i->buf, *bp0;
1950 const char *src = (char *) &key;
1952 /* copy term to output buf */
1953 while ((*bp++ = *k++))
1955 /* and copy & align key so we can mangle */
1956 memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */
1959 iscz1_encode(i->encode_handle, &bp, &src);
1960 *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
1961 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1963 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
1968 void encode_key_flush (struct encode_info *i, FILE *outf)
1969 { /* dummy routine */
1970 iscz1_stop(i->encode_handle);
1975 /* new encode_key_write
1976 * The idea is to buffer one more key, and compare them
1977 * If we are going to delete and insert the same key,
1978 * we may as well not bother. Should make a difference in
1979 * updates with small modifications (appending to a mbox)
1981 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1986 if (*k) /* first time for new key */
1989 while ((*bp++ = *k++))
1991 i->keylen= bp - i->buf -1;
1992 assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN);
1996 bp=i->buf + i->keylen;
2001 memcpy (&key, k+1, sizeof(struct it_key));
2002 if (0==i->prevsys) /* no previous filter, fill up */
2004 i->prevsys=key.sysno;
2005 i->prevseq=key.seqno;
2008 else if ( (i->prevsys==key.sysno) &&
2009 (i->prevseq==key.seqno) &&
2011 { /* same numbers, diff cmd, they cancel out */
2015 { /* different stuff, write previous, move buf */
2016 bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp);
2017 if (i->sysno != i->prevsys)
2019 i->sysno = i->prevsys;
2022 else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd)
2024 return; /* ??? Filters some sort of duplicates away */
2025 /* ??? Can this ever happen -H 15oct02 */
2027 bp = encode_key_int (i->prevseq - i->seqno, bp);
2028 i->seqno = i->prevseq;
2029 i->cmd = i->prevcmd;
2030 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
2032 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
2035 i->keylen=0; /* ok, it's written, forget it */
2036 i->prevsys=key.sysno;
2037 i->prevseq=key.seqno;
2042 void encode_key_flush (struct encode_info *i, FILE *outf)
2043 { /* flush the last key from i */
2044 char *bp =i->buf + i->keylen;
2047 return; /* nothing to flush */
2050 bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp);
2051 if (i->sysno != i->prevsys)
2053 i->sysno = i->prevsys;
2056 else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd)
2058 return; /* ??? Filters some sort of duplicates away */
2059 /* ??? Can this ever happen -H 15oct02 */
2061 bp = encode_key_int (i->prevseq - i->seqno, bp);
2062 i->seqno = i->prevseq;
2063 i->cmd = i->prevcmd;
2064 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
2066 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
2069 i->keylen=0; /* ok, it's written, forget it */
2070 i->prevsys=0; /* forget the values too */