1 /* $Id: extract.c,v 1.186 2005-06-14 20:28:54 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
38 #if _FILE_OFFSET_BITS == 64
39 #define PRINTF_OFF_T "%Ld"
41 #define PRINTF_OFF_T "%ld"
44 #define USE_SHELLSORT 0
47 static void shellsort(void *ar, int r, size_t s,
48 int (*cmp)(const void *a, const void *b))
53 static const int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
54 13776, 4592, 1968, 861, 336,
55 112, 48, 21, 7, 3, 1 };
56 for ( k = 0; k < 16; k++)
57 for (h = incs[k], i = h; i < r; i++)
61 while (j > h && (*cmp)(a + s*(j-h), v) > 0)
63 memcpy (a + s*j, a + s*(j-h), s);
71 static void logRecord (ZebraHandle zh)
73 ++zh->records_processed;
74 if (!(zh->records_processed % 1000))
76 yaz_log (YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
77 ZINT_FORMAT"/"ZINT_FORMAT"/"ZINT_FORMAT,
78 zh->records_processed, zh->records_inserted, zh->records_updated,
83 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
85 static void extract_init (struct recExtractCtrl *p, RecWord *w)
87 w->zebra_maps = p->zebra_maps;
89 w->attrSet = VAL_BIB1;
98 static const char **searchRecordKey (ZebraHandle zh,
99 struct recKeys *reckeys,
100 int attrSetS, int attrUseS)
102 static const char *ws[32];
103 void *decode_handle = iscz1_start();
109 for (i = 0; i<32; i++)
112 while (off < reckeys->buf_used)
114 const char *src = reckeys->buf + off;
116 char *dst = (char*) &key;
117 int attrSet, attrUse;
119 iscz1_decode(decode_handle, &dst, &src);
120 assert(key.len <= 4 && key.len > 2);
122 attrSet = (int) key.mem[0] >> 16;
123 attrUse = (int) key.mem[0] & 65535;
124 seqno = (int) key.mem[key.len-1];
126 if (attrUseS == attrUse && attrSetS == attrSet)
132 woff = seqno - startSeq;
133 if (woff >= 0 && woff < 31)
139 off = src - reckeys->buf;
141 iscz1_stop(decode_handle);
142 assert (off == reckeys->buf_used);
146 struct file_read_info {
147 off_t file_max; /* maximum offset so far */
148 off_t file_offset; /* current offset */
149 off_t file_moffset; /* offset of rec/rec boundary */
154 static struct file_read_info *file_read_start (int fd)
156 struct file_read_info *fi = (struct file_read_info *)
157 xmalloc (sizeof(*fi));
161 fi->file_moffset = 0;
167 static void file_read_stop (struct file_read_info *fi)
172 static off_t file_seek (void *handle, off_t offset)
174 struct file_read_info *p = (struct file_read_info *) handle;
175 p->file_offset = offset;
176 return lseek (p->fd, offset, SEEK_SET);
179 static off_t file_tell (void *handle)
181 struct file_read_info *p = (struct file_read_info *) handle;
182 return p->file_offset;
185 static int file_read (void *handle, char *buf, size_t count)
187 struct file_read_info *p = (struct file_read_info *) handle;
190 r = read (fd, buf, count);
194 if (p->file_offset > p->file_max)
195 p->file_max = p->file_offset;
200 static void file_end (void *handle, off_t offset)
202 struct file_read_info *p = (struct file_read_info *) handle;
204 if (offset != p->file_moffset)
206 p->file_moffset = offset;
211 static char *fileMatchStr (ZebraHandle zh,
212 struct recKeys *reckeys,
213 const char *fname, const char *spec)
215 static char dstBuf[2048]; /* static here ??? */
217 const char *s = spec;
218 static const char **w;
222 while (*s == ' ' || *s == '\t')
228 char attset_str[64], attname_str[64];
229 data1_attset *attset;
232 int attSet = 1, attUse = 1;
236 for (i = 0; *s && *s != ',' && *s != ')'; s++)
238 attset_str[i++] = *s;
239 attset_str[i] = '\0';
244 for (i = 0; *s && *s != ')'; s++)
246 attname_str[i++] = *s;
247 attname_str[i] = '\0';
250 if ((attset = data1_get_attset (zh->reg->dh, attset_str)))
253 attSet = attset->reference;
254 att = data1_getattbyname(zh->reg->dh, attset, attname_str);
258 attUse = atoi (attname_str);
260 w = searchRecordKey (zh, reckeys, attSet, attUse);
265 for (i = 0; i<32; i++)
270 yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s",
271 spec, zh->m_group ? zh->m_group : "none");
276 for (i = 0; i<32; i++)
277 if (matchFlag[i] && w[i])
289 yaz_log (YLOG_WARN, "Record didn't contain match"
290 " fields in (%s,%s)", attset_str, attname_str);
298 const char *spec_src = NULL;
299 const char *s1 = ++s;
300 while (*s1 && *s1 != ' ' && *s1 != '\t')
306 memcpy (special, s, spec_len);
307 special[spec_len] = '\0';
310 if (!strcmp (special, "group"))
311 spec_src = zh->m_group;
312 else if (!strcmp (special, "database"))
313 spec_src = zh->basenames[0];
314 else if (!strcmp (special, "filename")) {
317 else if (!strcmp (special, "type"))
318 spec_src = zh->m_record_type;
323 strcpy (dst, spec_src);
324 dst += strlen (spec_src);
327 else if (*s == '\"' || *s == '\'')
329 int stopMarker = *s++;
333 while (*s && *s != stopMarker)
336 tmpString[i++] = *s++;
341 strcpy (dst, tmpString);
342 dst += strlen (tmpString);
346 yaz_log (YLOG_WARN, "Syntax error in match criteria %s in group %s",
347 spec, zh->m_group ? zh->m_group : "none");
354 yaz_log (YLOG_WARN, "No match criteria for record %s in group %s",
355 fname, zh->m_group ? zh->m_group : "none");
362 struct recordLogInfo {
365 struct recordGroup *rGroup;
368 void create_rec_keys_codec(struct recKeys *keys)
371 iscz1_reset(keys->codec_handle);
374 static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
377 for (i = 0; i<256; i++)
379 if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
384 ctrl->zebra_maps = zh->reg->zebra_maps;
385 ctrl->flagShowRecords = !zh->m_flag_rw;
388 static int file_extract_record(ZebraHandle zh,
389 SYSNO *sysno, const char *fname,
391 struct file_read_info *fi,
394 void *recTypeClientData)
396 RecordAttr *recordAttr;
398 const char *matchStr = 0;
401 off_t recordOffset = 0;
403 /* announce database */
404 if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
406 if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
407 zh->m_explain_database))
413 struct recExtractCtrl extractCtrl;
415 /* we are going to read from a file, so prepare the extraction */
416 create_rec_keys_codec(&zh->reg->keys);
418 zh->reg->sortKeys.buf_used = 0;
420 recordOffset = fi->file_moffset;
421 extractCtrl.handle = zh;
422 extractCtrl.offset = fi->file_moffset;
423 extractCtrl.readf = file_read;
424 extractCtrl.seekf = file_seek;
425 extractCtrl.tellf = file_tell;
426 extractCtrl.endf = file_end;
428 extractCtrl.init = extract_init;
429 extractCtrl.tokenAdd = extract_token_add;
430 extractCtrl.schemaAdd = extract_schema_add;
431 extractCtrl.dh = zh->reg->dh;
432 extractCtrl.match_criteria[0] = '\0';
433 extractCtrl.first_record = fi->file_offset ? 0 : 1;
435 extract_set_store_data_prepare(&extractCtrl);
437 init_extractCtrl(zh, &extractCtrl);
440 printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset);
444 sprintf (msg, "%s:" PRINTF_OFF_T , fname, recordOffset);
445 yaz_log_init_prefix2 (msg);
448 r = (*recType->extract)(recTypeClientData, &extractCtrl);
450 yaz_log_init_prefix2 (0);
451 if (r == RECCTRL_EXTRACT_EOF)
453 else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
455 /* error occured during extraction ... */
457 zh->records_processed < zh->m_file_verbose_limit)
459 yaz_log (YLOG_WARN, "fail %s %s " PRINTF_OFF_T, zh->m_record_type,
460 fname, recordOffset);
464 else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
466 /* error occured during extraction ... */
468 zh->records_processed < zh->m_file_verbose_limit)
470 yaz_log (YLOG_WARN, "no filter for %s %s "
471 PRINTF_OFF_T, zh->m_record_type,
472 fname, recordOffset);
476 if (extractCtrl.match_criteria[0])
477 matchStr = extractCtrl.match_criteria;
480 /* perform match if sysno not known and if match criteria is specified */
486 if (matchStr == 0 && zh->m_record_id && *zh->m_record_id)
489 matchStr = fileMatchStr (zh, &zh->reg->keys, fname,
493 yaz_log(YLOG_WARN, "Bad match criteria");
499 char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
502 assert(*rinfo == sizeof(*sysno));
503 memcpy (sysno, rinfo+1, sizeof(*sysno));
507 if (! *sysno && zh->reg->keys.buf_used == 0)
509 /* the extraction process returned no information - the record
510 is probably empty - unless flagShowRecords is in use */
514 if (zh->records_processed < zh->m_file_verbose_limit)
515 yaz_log (YLOG_WARN, "empty %s %s " PRINTF_OFF_T, zh->m_record_type,
516 fname, recordOffset);
525 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type,
526 fname, recordOffset);
527 yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
530 if (zh->records_processed < zh->m_file_verbose_limit)
531 yaz_log (YLOG_LOG, "add %s %s " PRINTF_OFF_T, zh->m_record_type,
532 fname, recordOffset);
533 rec = rec_new (zh->reg->records);
537 recordAttr = rec_init_attr (zh->reg->zei, rec);
541 dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
543 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
544 extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
546 zh->records_inserted++;
550 /* record already exists */
551 struct recKeys delkeys;
552 struct sortKeys sortKeys;
554 rec = rec_get (zh->reg->records, *sysno);
557 recordAttr = rec_init_attr (zh->reg->zei, rec);
559 if (!force_update && recordAttr->runNumber ==
560 zebraExplain_runNumberIncrement (zh->reg->zei, 0))
562 yaz_log (YLOG_LOG, "run number = " ZINT_FORMAT,
563 recordAttr->runNumber);
564 yaz_log (YLOG_LOG, "skipped %s %s " PRINTF_OFF_T,
565 zh->m_record_type, fname, recordOffset);
566 extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys);
571 delkeys.buf_used = rec->size[recInfo_delKeys];
572 delkeys.buf = rec->info[recInfo_delKeys];
574 sortKeys.buf_used = rec->size[recInfo_sortKeys];
575 sortKeys.buf = rec->info[recInfo_sortKeys];
577 extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
578 extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
581 /* record going to be deleted */
582 if (!delkeys.buf_used)
584 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
585 zh->m_record_type, fname, recordOffset);
586 yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false");
590 if (zh->records_processed < zh->m_file_verbose_limit)
591 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
592 zh->m_record_type, fname, recordOffset);
593 zh->records_deleted++;
595 dict_delete (zh->reg->matchDict, matchStr);
596 rec_del (zh->reg->records, &rec);
604 /* record going to be updated */
605 if (!delkeys.buf_used)
607 yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
608 zh->m_record_type, fname, recordOffset);
609 yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
613 if (zh->records_processed < zh->m_file_verbose_limit)
614 yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
615 zh->m_record_type, fname, recordOffset);
616 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
617 extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
618 zh->records_updated++;
622 /* update file type */
623 xfree (rec->info[recInfo_fileType]);
624 rec->info[recInfo_fileType] =
625 rec_strdup (zh->m_record_type, &rec->size[recInfo_fileType]);
627 /* update filename */
628 xfree (rec->info[recInfo_filename]);
629 rec->info[recInfo_filename] =
630 rec_strdup (fname, &rec->size[recInfo_filename]);
632 /* update delete keys */
633 xfree (rec->info[recInfo_delKeys]);
634 if (zh->reg->keys.buf_used > 0 && zh->m_store_keys == 1)
636 rec->size[recInfo_delKeys] = zh->reg->keys.buf_used;
637 rec->info[recInfo_delKeys] = zh->reg->keys.buf;
638 zh->reg->keys.buf = NULL;
639 zh->reg->keys.buf_max = 0;
643 rec->info[recInfo_delKeys] = NULL;
644 rec->size[recInfo_delKeys] = 0;
647 /* update sort keys */
648 xfree (rec->info[recInfo_sortKeys]);
650 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
651 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
652 zh->reg->sortKeys.buf = NULL;
653 zh->reg->sortKeys.buf_max = 0;
655 /* save file size of original record */
656 zebraExplain_recordBytesIncrement (zh->reg->zei,
657 - recordAttr->recordSize);
658 recordAttr->recordSize = fi->file_moffset - recordOffset;
659 if (!recordAttr->recordSize)
660 recordAttr->recordSize = fi->file_max - recordOffset;
661 zebraExplain_recordBytesIncrement (zh->reg->zei,
662 recordAttr->recordSize);
664 /* set run-number for this record */
665 recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei,
668 /* update store data */
669 xfree (rec->info[recInfo_storeData]);
670 if (zh->store_data_buf)
672 rec->size[recInfo_storeData] = zh->store_data_size;
673 rec->info[recInfo_storeData] = zh->store_data_buf;
674 zh->store_data_buf = 0;
676 else if (zh->m_store_data)
678 rec->size[recInfo_storeData] = recordAttr->recordSize;
679 rec->info[recInfo_storeData] = (char *)
680 xmalloc (recordAttr->recordSize);
681 if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
683 yaz_log (YLOG_ERRNO|YLOG_FATAL, "seek to " PRINTF_OFF_T " in %s",
684 recordOffset, fname);
687 if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
688 < recordAttr->recordSize)
690 yaz_log (YLOG_ERRNO|YLOG_FATAL, "read %d bytes of %s",
691 recordAttr->recordSize, fname);
697 rec->info[recInfo_storeData] = NULL;
698 rec->size[recInfo_storeData] = 0;
700 /* update database name */
701 xfree (rec->info[recInfo_databaseName]);
702 rec->info[recInfo_databaseName] =
703 rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
706 recordAttr->recordOffset = recordOffset;
708 /* commit this record */
709 rec_put (zh->reg->records, &rec);
714 int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
721 struct file_read_info *fi;
722 const char *original_record_type = 0;
724 void *recTypeClientData;
726 if (!zh->m_group || !*zh->m_group)
729 sprintf (gprefix, "%s.", zh->m_group);
731 yaz_log (YLOG_DEBUG, "fileExtract %s", fname);
733 /* determine file extension */
735 for (i = strlen(fname); --i >= 0; )
738 else if (fname[i] == '.')
740 strcpy (ext, fname+i+1);
743 /* determine file type - depending on extension */
744 original_record_type = zh->m_record_type;
745 if (!zh->m_record_type)
747 sprintf (ext_res, "%srecordType.%s", gprefix, ext);
748 zh->m_record_type = res_get (zh->res, ext_res);
750 if (!zh->m_record_type)
752 if (zh->records_processed < zh->m_file_verbose_limit)
753 yaz_log (YLOG_LOG, "? %s", fname);
756 /* determine match criteria */
757 if (!zh->m_record_id)
759 sprintf (ext_res, "%srecordId.%s", gprefix, ext);
760 zh->m_record_id = res_get (zh->res, ext_res);
764 recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type,
765 &recTypeClientData)))
767 yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
771 switch(recType->version)
776 yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
778 if (sysno && deleteFlag)
784 if (zh->path_reg && !yaz_is_abspath (fname))
786 strcpy (full_rep, zh->path_reg);
787 strcat (full_rep, "/");
788 strcat (full_rep, fname);
791 strcpy (full_rep, fname);
794 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
796 yaz_log (YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
797 zh->m_record_type = original_record_type;
801 fi = file_read_start (fd);
804 fi->file_moffset = fi->file_offset;
805 fi->file_more = 0; /* file_end not called (yet) */
806 r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1,
807 recType, recTypeClientData);
809 { /* file_end has been called so reset offset .. */
810 fi->file_offset = fi->file_moffset;
811 lseek(fi->fd, fi->file_moffset, SEEK_SET);
818 zh->m_record_type = original_record_type;
823 If sysno is provided, then it's used to identify the reocord.
824 If not, and match_criteria is provided, then sysno is guessed
825 If not, and a record is provided, then sysno is got from there
828 ZEBRA_RES buffer_extract_record (ZebraHandle zh,
829 const char *buf, size_t buf_size,
832 const char *recordType,
834 const char *match_criteria,
839 RecordAttr *recordAttr;
840 struct recExtractCtrl extractCtrl;
842 const char *matchStr = 0;
843 RecType recType = NULL;
846 long recordOffset = 0;
847 struct zebra_fetch_control fc;
848 const char *pr_fname = fname; /* filename to print .. */
849 int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
852 pr_fname = "<no file>"; /* make it printable if file is omitted */
855 fc.record_int_buf = buf;
856 fc.record_int_len = buf_size;
857 fc.record_int_pos = 0;
859 fc.record_offset = 0;
861 extractCtrl.offset = 0;
862 extractCtrl.readf = zebra_record_int_read;
863 extractCtrl.seekf = zebra_record_int_seek;
864 extractCtrl.tellf = zebra_record_int_tell;
865 extractCtrl.endf = zebra_record_int_end;
866 extractCtrl.first_record = 1;
867 extractCtrl.fh = &fc;
869 create_rec_keys_codec(&zh->reg->keys);
871 zh->reg->sortKeys.buf_used = 0;
873 if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
875 if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
876 zh->m_explain_database))
880 if (recordType && *recordType)
882 yaz_log (YLOG_DEBUG, "Record type explicitly specified: %s", recordType);
883 recType = recType_byName (zh->reg->recTypes, zh->res, recordType,
888 if (!(zh->m_record_type))
890 yaz_log (YLOG_WARN, "No such record type defined");
893 yaz_log (YLOG_DEBUG, "Get record type from rgroup: %s",zh->m_record_type);
894 recType = recType_byName (zh->reg->recTypes, zh->res,
895 zh->m_record_type, &clientData);
896 recordType = zh->m_record_type;
901 yaz_log (YLOG_WARN, "No such record type: %s", zh->m_record_type);
905 extractCtrl.init = extract_init;
906 extractCtrl.tokenAdd = extract_token_add;
907 extractCtrl.schemaAdd = extract_schema_add;
908 extractCtrl.dh = zh->reg->dh;
909 extractCtrl.handle = zh;
910 extractCtrl.match_criteria[0] = '\0';
912 init_extractCtrl(zh, &extractCtrl);
914 extract_set_store_data_prepare(&extractCtrl);
916 r = (*recType->extract)(clientData, &extractCtrl);
918 if (r == RECCTRL_EXTRACT_EOF)
920 else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
922 /* error occured during extraction ... */
923 yaz_log (YLOG_WARN, "extract error: generic");
926 else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
928 /* error occured during extraction ... */
929 yaz_log (YLOG_WARN, "extract error: no such filter");
935 if (extractCtrl.match_criteria[0])
936 match_criteria = extractCtrl.match_criteria;
940 if (match_criteria && *match_criteria) {
941 matchStr = match_criteria;
943 if (zh->m_record_id && *zh->m_record_id) {
944 matchStr = fileMatchStr (zh, &zh->reg->keys, pr_fname,
948 yaz_log (YLOG_WARN, "Bad match criteria (recordID)");
954 rinfo = dict_lookup (zh->reg->matchDict, matchStr);
957 assert(*rinfo == sizeof(*sysno));
958 memcpy (sysno, rinfo+1, sizeof(*sysno));
962 if (zh->reg->keys.buf_used == 0)
964 /* the extraction process returned no information - the record
965 is probably empty - unless flagShowRecords is in use */
976 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
977 pr_fname, (long) recordOffset);
978 yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
982 yaz_log (YLOG_LOG, "add %s %s %ld", recordType, pr_fname,
983 (long) recordOffset);
984 rec = rec_new (zh->reg->records);
988 recordAttr = rec_init_attr (zh->reg->zei, rec);
992 dict_insert (zh->reg->matchDict, matchStr,
993 sizeof(*sysno), sysno);
995 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
996 extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
998 zh->records_inserted++;
1002 /* record already exists */
1003 struct recKeys delkeys;
1004 struct sortKeys sortKeys;
1009 yaz_log (YLOG_LOG, "skipped %s %s %ld",
1010 recordType, pr_fname, (long) recordOffset);
1015 rec = rec_get (zh->reg->records, *sysno);
1018 recordAttr = rec_init_attr (zh->reg->zei, rec);
1020 if (!force_update) {
1021 if (recordAttr->runNumber ==
1022 zebraExplain_runNumberIncrement (zh->reg->zei, 0))
1025 yaz_log (YLOG_LOG, "skipped %s %s %ld", recordType,
1026 pr_fname, (long) recordOffset);
1027 extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys);
1034 delkeys.buf_used = rec->size[recInfo_delKeys];
1035 delkeys.buf = rec->info[recInfo_delKeys];
1037 sortKeys.buf_used = rec->size[recInfo_sortKeys];
1038 sortKeys.buf = rec->info[recInfo_sortKeys];
1040 extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
1041 extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
1044 /* record going to be deleted */
1045 if (!delkeys.buf_used)
1049 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
1050 pr_fname, (long) recordOffset);
1051 yaz_log (YLOG_WARN, "cannot delete file above, "
1058 yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
1059 pr_fname, (long) recordOffset);
1060 zh->records_deleted++;
1062 dict_delete (zh->reg->matchDict, matchStr);
1063 rec_del (zh->reg->records, &rec);
1071 /* record going to be updated */
1072 if (!delkeys.buf_used)
1076 yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
1077 pr_fname, (long) recordOffset);
1078 yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
1084 yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
1085 pr_fname, (long) recordOffset);
1086 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
1087 extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
1088 zh->records_updated++;
1092 /* update file type */
1093 xfree (rec->info[recInfo_fileType]);
1094 rec->info[recInfo_fileType] =
1095 rec_strdup (recordType, &rec->size[recInfo_fileType]);
1097 /* update filename */
1098 xfree (rec->info[recInfo_filename]);
1099 rec->info[recInfo_filename] =
1100 rec_strdup (fname, &rec->size[recInfo_filename]);
1102 /* update delete keys */
1103 xfree (rec->info[recInfo_delKeys]);
1104 if (zh->reg->keys.buf_used > 0 && zh->m_store_keys == 1)
1106 rec->size[recInfo_delKeys] = zh->reg->keys.buf_used;
1107 rec->info[recInfo_delKeys] = zh->reg->keys.buf;
1108 zh->reg->keys.buf = NULL;
1109 zh->reg->keys.buf_max = 0;
1113 rec->info[recInfo_delKeys] = NULL;
1114 rec->size[recInfo_delKeys] = 0;
1117 /* update sort keys */
1118 xfree (rec->info[recInfo_sortKeys]);
1120 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
1121 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
1122 zh->reg->sortKeys.buf = NULL;
1123 zh->reg->sortKeys.buf_max = 0;
1125 /* save file size of original record */
1126 zebraExplain_recordBytesIncrement (zh->reg->zei,
1127 - recordAttr->recordSize);
1129 recordAttr->recordSize = fi->file_moffset - recordOffset;
1130 if (!recordAttr->recordSize)
1131 recordAttr->recordSize = fi->file_max - recordOffset;
1133 recordAttr->recordSize = buf_size;
1135 zebraExplain_recordBytesIncrement (zh->reg->zei,
1136 recordAttr->recordSize);
1138 /* set run-number for this record */
1139 recordAttr->runNumber =
1140 zebraExplain_runNumberIncrement (zh->reg->zei, 0);
1142 /* update store data */
1143 xfree (rec->info[recInfo_storeData]);
1145 /* update store data */
1146 if (zh->store_data_buf)
1148 rec->size[recInfo_storeData] = zh->store_data_size;
1149 rec->info[recInfo_storeData] = zh->store_data_buf;
1150 zh->store_data_buf = 0;
1152 else if (zh->m_store_data)
1154 rec->size[recInfo_storeData] = recordAttr->recordSize;
1155 rec->info[recInfo_storeData] = (char *)
1156 xmalloc (recordAttr->recordSize);
1157 memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
1161 rec->info[recInfo_storeData] = NULL;
1162 rec->size[recInfo_storeData] = 0;
1164 /* update database name */
1165 xfree (rec->info[recInfo_databaseName]);
1166 rec->info[recInfo_databaseName] =
1167 rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
1170 recordAttr->recordOffset = recordOffset;
1172 /* commit this record */
1173 rec_put (zh->reg->records, &rec);
1178 int explain_extract (void *handle, Record rec, data1_node *n)
1180 ZebraHandle zh = (ZebraHandle) handle;
1181 struct recExtractCtrl extractCtrl;
1183 if (zebraExplain_curDatabase (zh->reg->zei,
1184 rec->info[recInfo_databaseName]))
1187 if (zebraExplain_newDatabase (zh->reg->zei,
1188 rec->info[recInfo_databaseName], 0))
1192 create_rec_keys_codec(&zh->reg->keys);
1194 zh->reg->sortKeys.buf_used = 0;
1196 extractCtrl.init = extract_init;
1197 extractCtrl.tokenAdd = extract_token_add;
1198 extractCtrl.schemaAdd = extract_schema_add;
1199 extractCtrl.dh = zh->reg->dh;
1201 init_extractCtrl(zh, &extractCtrl);
1203 extractCtrl.flagShowRecords = 0;
1204 extractCtrl.match_criteria[0] = '\0';
1205 extractCtrl.handle = handle;
1206 extractCtrl.first_record = 1;
1208 extract_set_store_data_prepare(&extractCtrl);
1211 grs_extract_tree(&extractCtrl, n);
1213 if (rec->size[recInfo_delKeys])
1215 struct recKeys delkeys;
1216 struct sortKeys sortkeys;
1218 delkeys.buf_used = rec->size[recInfo_delKeys];
1219 delkeys.buf = rec->info[recInfo_delKeys];
1221 sortkeys.buf_used = rec->size[recInfo_sortKeys];
1222 sortkeys.buf = rec->info[recInfo_sortKeys];
1224 extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
1225 extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys);
1227 extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys);
1228 extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
1230 xfree (rec->info[recInfo_delKeys]);
1231 rec->size[recInfo_delKeys] = zh->reg->keys.buf_used;
1232 rec->info[recInfo_delKeys] = zh->reg->keys.buf;
1233 zh->reg->keys.buf = NULL;
1234 zh->reg->keys.buf_max = 0;
1236 xfree (rec->info[recInfo_sortKeys]);
1237 rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
1238 rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
1239 zh->reg->sortKeys.buf = NULL;
1240 zh->reg->sortKeys.buf_max = 0;
1245 void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
1246 int cmd, struct recKeys *reckeys)
1248 void *decode_handle = iscz1_start();
1251 ZebraExplainInfo zei = zh->reg->zei;
1253 if (!zh->reg->key_buf)
1255 int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
1258 yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB");
1261 /* FIXME: That "8" should be in a default settings include */
1262 /* not hard-coded here! -H */
1263 zh->reg->key_buf = (char**) xmalloc (mem);
1264 zh->reg->ptr_top = mem/sizeof(char*);
1266 zh->reg->key_buf_used = 0;
1267 zh->reg->key_file_no = 0;
1269 zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
1271 while (off < reckeys->buf_used)
1273 const char *src = reckeys->buf + off;
1275 char *dst = (char*) &key;
1277 iscz1_decode(decode_handle, &dst, &src);
1278 assert(key.len == 4);
1280 if (zh->reg->key_buf_used + 1024 >
1281 (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
1282 extract_flushWriteKeys (zh,0);
1284 assert(zh->reg->ptr_i > 0);
1285 (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
1286 (char*)zh->reg->key_buf + zh->reg->key_buf_used;
1288 ch = (int) key.mem[0]; /* ordinal for field/use/attribute */
1290 zh->reg->key_buf_used +=
1291 key_SU_encode (ch,((char*)zh->reg->key_buf) +
1292 zh->reg->key_buf_used);
1294 ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++;
1296 ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0';
1297 ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd;
1300 if (key.mem[1]) /* filter specified record ID */
1301 key.mem[0] = key.mem[1];
1304 key.mem[1] = key.mem[2]; /* section_id */
1305 key.mem[2] = key.mem[3]; /* sequence .. */
1307 memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used,
1309 (zh->reg->key_buf_used) += sizeof(key);
1310 off = src - reckeys->buf;
1312 assert (off == reckeys->buf_used);
1313 iscz1_stop(decode_handle);
1316 void extract_flushWriteKeys (ZebraHandle zh, int final)
1317 /* optimizing: if final=1, and no files written yet */
1318 /* push the keys directly to merge, sidestepping the */
1319 /* temp file altogether. Speeds small updates */
1322 char out_fname[200];
1324 struct encode_info encode_info;
1325 int ptr_i = zh->reg->ptr_i;
1330 if (!zh->reg->key_buf || ptr_i <= 0)
1332 yaz_log (YLOG_DEBUG, " nothing to flush section=%d buf=%p i=%d",
1333 zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
1334 yaz_log (YLOG_DEBUG, " buf=%p ",
1336 yaz_log (YLOG_DEBUG, " ptr=%d ",zh->reg->ptr_i);
1337 yaz_log (YLOG_DEBUG, " reg=%p ",zh->reg);
1342 (zh->reg->key_file_no)++;
1343 yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no));
1344 yaz_log (YLOG_DEBUG, " sort_buff at %p n=%d",
1345 zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
1347 qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
1348 sizeof(char*), key_qsort_compare);
1350 /* zebra.cfg: tempfiles:
1351 Y: always use temp files (old way)
1352 A: use temp files, if more than one (auto)
1353 = if this is both the last and the first
1354 N: never bother with temp files (new) */
1356 temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
1357 if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
1358 yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
1363 if ( ( temp_policy =='N' ) || /* always from memory */
1364 ( ( temp_policy =='A' ) && /* automatic */
1365 (zh->reg->key_file_no == 1) && /* this is first time */
1366 (final) ) ) /* and last (=only) time */
1367 { /* go directly from memory */
1368 zh->reg->key_file_no =0; /* signal not to read files */
1369 zebra_index_merge(zh);
1371 zh->reg->key_buf_used = 0;
1375 /* Not doing directly from memory, write into a temp file */
1376 extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
1378 if (!(outf = fopen (out_fname, "wb")))
1380 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
1383 yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no);
1384 prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
1386 encode_key_init (&encode_info);
1387 encode_key_write (cp, &encode_info, outf);
1391 cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
1392 if (strcmp (cp, prevcp))
1394 encode_key_flush ( &encode_info, outf);
1395 encode_key_init (&encode_info);
1396 encode_key_write (cp, &encode_info, outf);
1400 encode_key_write (cp + strlen(cp), &encode_info, outf);
1402 encode_key_flush ( &encode_info, outf);
1404 qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
1405 extract_get_fname_tmp (out_fname, key_file_no);
1407 if (!(outf = fopen (out_fname, "wb")))
1409 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
1412 yaz_log (YLOG_LOG, "writing section %d", key_file_no);
1414 prevcp = key_buf[ptr_top-i];
1416 if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
1418 key_y_len = strlen(prevcp)+1;
1420 yaz_log (YLOG_LOG, "key_y_len: %2d %02x %02x %s",
1421 key_y_len, prevcp[0], prevcp[1], 2+prevcp);
1423 qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
1424 sizeof(char*), key_y_compare);
1425 cp = key_buf[ptr_top-ptr_i];
1427 encode_key_init (&encode_info);
1428 encode_key_write (cp, &encode_info, outf);
1431 cp = key_buf[ptr_top-ptr_i];
1432 encode_key_write (cp+key_y_len, &encode_info, outf);
1434 encode_key_flush ( &encode_info, outf);
1437 prevcp = key_buf[ptr_top-ptr_i];
1442 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname);
1445 yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no);
1447 zh->reg->key_buf_used = 0;
1450 void extract_add_it_key (ZebraHandle zh,
1452 const char *str, int slen, struct it_key *key)
1455 struct recKeys *keys = &zh->reg->keys;
1456 const char *src = (char*) key;
1458 if (keys->buf_used+1024 > keys->buf_max)
1460 char *b = (char *) xmalloc (keys->buf_max += 128000);
1461 if (keys->buf_used > 0)
1462 memcpy (b, keys->buf, keys->buf_used);
1466 dst = keys->buf + keys->buf_used;
1468 iscz1_encode(keys->codec_handle, &dst, &src);
1471 memcpy (dst, str, slen);
1474 keys->buf_used = dst - keys->buf;
1477 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys,
1478 zebra_snippets *snippets)
1480 void *decode_handle = iscz1_start();
1483 NMEM nmem = nmem_create();
1485 yaz_log(YLOG_LOG, "zebra_rec_keys_snippets buf=%p sz=%d", reckeys->buf,
1487 assert(reckeys->buf);
1488 while (off < reckeys->buf_used)
1490 const char *src = reckeys->buf + off;
1492 char *dst = (char*) &key;
1493 char dst_buf[IT_MAX_WORD];
1494 char *dst_term = dst_buf;
1496 iscz1_decode(decode_handle, &dst, &src);
1497 assert(key.len <= 4 && key.len > 2);
1499 seqno = (int) key.mem[key.len-1];
1501 zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1);
1502 zebra_snippets_append(snippets, seqno, src[0], key.mem[0], dst_term);
1505 off = src - reckeys->buf;
1509 iscz1_stop(decode_handle);
1513 void print_rec_keys(ZebraHandle zh, struct recKeys *reckeys)
1515 void *decode_handle = iscz1_start();
1518 NMEM nmem = nmem_create();
1520 yaz_log(YLOG_LOG, "print_rec_keys buf=%p sz=%d", reckeys->buf,
1522 assert(reckeys->buf);
1523 while (off < reckeys->buf_used)
1525 const char *src = reckeys->buf + off;
1527 char *dst = (char*) &key;
1528 int attrSet, attrUse;
1529 char dst_buf[IT_MAX_WORD];
1530 char *dst_term = dst_buf;
1532 iscz1_decode(decode_handle, &dst, &src);
1533 assert(key.len <= 4 && key.len > 2);
1535 attrSet = (int) key.mem[0] >> 16;
1536 attrUse = (int) key.mem[0] & 65535;
1537 seqno = (int) key.mem[key.len-1];
1539 zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1);
1541 yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s",
1542 key.mem[0], seqno, dst_term);
1545 off = src - reckeys->buf;
1549 iscz1_stop(decode_handle);
1552 void extract_add_index_string (RecWord *p, const char *str, int length)
1556 ZebraHandle zh = p->extractCtrl->handle;
1557 ZebraExplainInfo zei = zh->reg->zei;
1562 ch = zebraExplain_lookup_attr_str(zei, p->attrStr);
1564 ch = zebraExplain_add_attr_str(zei, p->attrStr);
1568 ch = zebraExplain_lookup_attr_su(zei, p->attrSet, p->attrUse);
1570 ch = zebraExplain_add_attr_su(zei, p->attrSet, p->attrUse);
1574 key.mem[1] = p->record_id;
1575 key.mem[2] = p->section_id;
1576 key.mem[3] = p->seqno;
1579 /* just for debugging .. */
1580 yaz_log(YLOG_LOG, "add: set=%d use=%d "
1581 "record_id=%lld section_id=%lld seqno=%lld",
1582 p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno);
1585 extract_add_it_key(p->extractCtrl->handle, p->reg_type, str,
1589 static void extract_add_sort_string (RecWord *p, const char *str,
1592 ZebraHandle zh = p->extractCtrl->handle;
1593 struct sortKeys *sk = &zh->reg->sortKeys;
1596 while (off < sk->buf_used)
1600 off += key_SU_decode(&set, sk->buf + off);
1601 off += key_SU_decode(&use, sk->buf + off);
1602 off += key_SU_decode(&slen, sk->buf + off);
1604 if (p->attrSet == set && p->attrUse == use)
1607 assert (off == sk->buf_used);
1609 if (sk->buf_used + IT_MAX_WORD > sk->buf_max)
1613 b = (char *) xmalloc (sk->buf_max += 128000);
1614 if (sk->buf_used > 0)
1615 memcpy (b, sk->buf, sk->buf_used);
1619 off += key_SU_encode(p->attrSet, sk->buf + off);
1620 off += key_SU_encode(p->attrUse, sk->buf + off);
1621 off += key_SU_encode(length, sk->buf + off);
1622 memcpy (sk->buf + off, str, length);
1623 sk->buf_used = off + length;
1626 void extract_add_string (RecWord *p, const char *string, int length)
1628 assert (length > 0);
1629 if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
1630 extract_add_sort_string (p, string, length);
1632 extract_add_index_string (p, string, length);
1635 static void extract_add_incomplete_field (RecWord *p)
1637 const char *b = p->term_buf;
1638 int remain = p->term_len;
1639 const char **map = 0;
1641 yaz_log(YLOG_DEBUG, "Incomplete field, w='%.*s'", p->term_len, p->term_buf);
1644 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
1648 char buf[IT_MAX_WORD+1];
1652 while (map && *map && **map == *CHR_SPACE)
1654 remain = p->term_len - (b - p->term_buf);
1656 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
1663 while (map && *map && **map != *CHR_SPACE)
1665 const char *cp = *map;
1667 while (i < IT_MAX_WORD && *cp)
1669 remain = p->term_len - (b - p->term_buf);
1671 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
1677 extract_add_string (p, buf, i);
1682 static void extract_add_complete_field (RecWord *p)
1684 const char *b = p->term_buf;
1685 char buf[IT_MAX_WORD+1];
1686 const char **map = 0;
1687 int i = 0, remain = p->term_len;
1689 yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'",
1690 p->term_len, p->term_buf);
1693 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain, 1);
1695 while (remain > 0 && i < IT_MAX_WORD)
1697 while (map && *map && **map == *CHR_SPACE)
1699 remain = p->term_len - (b - p->term_buf);
1703 int first = i ? 0 : 1; /* first position */
1704 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, first);
1712 if (i && i < IT_MAX_WORD)
1713 buf[i++] = *CHR_SPACE;
1714 while (map && *map && **map != *CHR_SPACE)
1716 const char *cp = *map;
1718 if (**map == *CHR_CUT)
1724 if (i >= IT_MAX_WORD)
1726 yaz_log(YLOG_DEBUG, "Adding string to index '%d'", **map);
1727 while (i < IT_MAX_WORD && *cp)
1730 remain = p->term_len - (b - p->term_buf);
1733 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
1742 extract_add_string (p, buf, i);
1745 void extract_token_add (RecWord *p)
1749 yaz_log (YLOG_LOG, "token_add "
1750 "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s",
1751 p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length,
1754 if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
1755 p->term_buf, p->term_len)))
1757 p->term_buf = wrbuf_buf(wrbuf);
1758 p->term_len = wrbuf_len(wrbuf);
1760 if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
1761 extract_add_complete_field (p);
1763 extract_add_incomplete_field(p);
1766 static void extract_set_store_data_cb(struct recExtractCtrl *p,
1767 void *buf, size_t sz)
1769 ZebraHandle zh = (ZebraHandle) p->handle;
1771 xfree(zh->store_data_buf);
1772 zh->store_data_buf = 0;
1773 zh->store_data_size = 0;
1776 zh->store_data_buf = xmalloc(sz);
1777 zh->store_data_size = sz;
1778 memcpy(zh->store_data_buf, buf, sz);
1782 static void extract_set_store_data_prepare(struct recExtractCtrl *p)
1784 ZebraHandle zh = (ZebraHandle) p->handle;
1785 xfree(zh->store_data_buf);
1786 zh->store_data_buf = 0;
1787 zh->store_data_size = 0;
1788 p->setStoreData = extract_set_store_data_cb;
1791 void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
1793 ZebraHandle zh = (ZebraHandle) p->handle;
1794 zebraExplain_addSchema (zh->reg->zei, oid);
1797 void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
1798 int cmd, struct sortKeys *sk)
1800 SortIdx sortIdx = zh->reg->sortIdx;
1803 sortIdx_sysno (sortIdx, sysno);
1805 while (off < sk->buf_used)
1809 off += key_SU_decode(&set, sk->buf + off);
1810 off += key_SU_decode(&use, sk->buf + off);
1811 off += key_SU_decode(&slen, sk->buf + off);
1813 sortIdx_type(sortIdx, use);
1815 sortIdx_add(sortIdx, sk->buf + off, slen);
1817 sortIdx_add(sortIdx, "", 1);
1822 void encode_key_init (struct encode_info *i)
1831 i->encode_handle = iscz1_start();
1837 /* this is the old encode_key_write
1838 * may be deleted once we are confident that the new works
1841 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1844 char *bp = i->buf, *bp0;
1845 const char *src = (char *) &key;
1847 /* copy term to output buf */
1848 while ((*bp++ = *k++))
1850 /* and copy & align key so we can mangle */
1851 memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */
1854 iscz1_encode(i->encode_handle, &bp, &src);
1855 *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
1856 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1858 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
1863 void encode_key_flush (struct encode_info *i, FILE *outf)
1864 { /* dummy routine */
1865 iscz1_stop(i->encode_handle);
1870 /* new encode_key_write
1871 * The idea is to buffer one more key, and compare them
1872 * If we are going to delete and insert the same key,
1873 * we may as well not bother. Should make a difference in
1874 * updates with small modifications (appending to a mbox)
1876 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1881 if (*k) /* first time for new key */
1884 while ((*bp++ = *k++))
1886 i->keylen= bp - i->buf -1;
1887 assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN);
1891 bp=i->buf + i->keylen;
1896 memcpy (&key, k+1, sizeof(struct it_key));
1897 if (0==i->prevsys) /* no previous filter, fill up */
1899 i->prevsys=key.sysno;
1900 i->prevseq=key.seqno;
1903 else if ( (i->prevsys==key.sysno) &&
1904 (i->prevseq==key.seqno) &&
1906 { /* same numbers, diff cmd, they cancel out */
1910 { /* different stuff, write previous, move buf */
1911 bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp);
1912 if (i->sysno != i->prevsys)
1914 i->sysno = i->prevsys;
1917 else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd)
1919 return; /* ??? Filters some sort of duplicates away */
1920 /* ??? Can this ever happen -H 15oct02 */
1922 bp = encode_key_int (i->prevseq - i->seqno, bp);
1923 i->seqno = i->prevseq;
1924 i->cmd = i->prevcmd;
1925 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1927 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
1930 i->keylen=0; /* ok, it's written, forget it */
1931 i->prevsys=key.sysno;
1932 i->prevseq=key.seqno;
1937 void encode_key_flush (struct encode_info *i, FILE *outf)
1938 { /* flush the last key from i */
1939 char *bp =i->buf + i->keylen;
1942 return; /* nothing to flush */
1945 bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp);
1946 if (i->sysno != i->prevsys)
1948 i->sysno = i->prevsys;
1951 else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd)
1953 return; /* ??? Filters some sort of duplicates away */
1954 /* ??? Can this ever happen -H 15oct02 */
1956 bp = encode_key_int (i->prevseq - i->seqno, bp);
1957 i->seqno = i->prevseq;
1958 i->cmd = i->prevcmd;
1959 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1961 yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
1964 i->keylen=0; /* ok, it's written, forget it */
1965 i->prevsys=0; /* forget the values too */