+ memcpy (dst, &attrSet, sizeof(attrSet));
+ dst += sizeof(attrSet);
+ }
+ if (!(lead & 2))
+ {
+ memcpy (dst, &attrUse, sizeof(attrUse));
+ dst += sizeof(attrUse);
+ }
+#endif
+ *dst++ = p->reg_type;
+ memcpy (dst, string, length);
+ dst += length;
+ *dst++ = '\0';
+
+ if (!diff)
+ {
+ memcpy (dst, pseqno, sizeof(*pseqno));
+ dst += sizeof(*pseqno);
+ }
+ keys->buf_used = dst - keys->buf;
+}
+
+static void extract_add_sort_string (RecWord *p, const char *string,
+ int length)
+{
+ ZebraHandle zh = p->extractCtrl->handle;
+ struct sortKeys *sk = &zh->reg->sortKeys;
+ int off = 0;
+
+ while (off < sk->buf_used)
+ {
+ int set, use, slen;
+
+ off += key_SU_decode(&set, sk->buf + off);
+ off += key_SU_decode(&use, sk->buf + off);
+ off += key_SU_decode(&slen, sk->buf + off);
+ off += slen;
+ if (p->attrSet == set && p->attrUse == use)
+ return;
+ }
+ assert (off == sk->buf_used);
+
+ if (sk->buf_used + IT_MAX_WORD > sk->buf_max)
+ {
+ char *b;
+
+ b = (char *) xmalloc (sk->buf_max += 128000);
+ if (sk->buf_used > 0)
+ memcpy (b, sk->buf, sk->buf_used);
+ xfree (sk->buf);
+ sk->buf = b;
+ }
+ off += key_SU_encode(p->attrSet, sk->buf + off);
+ off += key_SU_encode(p->attrUse, sk->buf + off);
+ off += key_SU_encode(length, sk->buf + off);
+ memcpy (sk->buf + off, string, length);
+ sk->buf_used = off + length;
+}
+
+void extract_add_string (RecWord *p, const char *string, int length)
+{
+ assert (length > 0);
+ if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
+ extract_add_sort_string (p, string, length);
+ else
+ extract_add_index_string (p, string, length);
+}
+
+static void extract_add_incomplete_field (RecWord *p)
+{
+ const char *b = p->string;
+ int remain = p->length;
+ const char **map = 0;
+
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+
+ while (map)
+ {
+ char buf[IT_MAX_WORD+1];
+ int i, remain;
+
+ /* Skip spaces */
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+ i = 0;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!i)
+ return;
+ extract_add_string (p, buf, i);
+ p->seqno++;
+ }
+}
+
+static void extract_add_complete_field (RecWord *p)
+{
+ const char *b = p->string;
+ char buf[IT_MAX_WORD+1];
+ const char **map = 0;
+ int i = 0, remain = p->length;
+
+ if (remain > 0)
+ map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
+
+ while (remain > 0 && i < IT_MAX_WORD)
+ {
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+
+ if (i && i < IT_MAX_WORD)
+ buf[i++] = *CHR_SPACE;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ if (i >= IT_MAX_WORD)
+ break;
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
+ remain);
+ else
+ map = 0;
+ }
+ }
+ if (!i)
+ return;
+ extract_add_string (p, buf, i);
+}
+
+void extract_token_add (RecWord *p)
+{
+ WRBUF wrbuf;
+#if 0
+ yaz_log (LOG_LOG, "token_add "
+ "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s",
+ p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length,
+ p->string);
+#endif
+ if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
+ p->string, p->length)))
+ {
+ p->string = wrbuf_buf(wrbuf);
+ p->length = wrbuf_len(wrbuf);
+ }
+ if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
+ extract_add_complete_field (p);
+ else
+ extract_add_incomplete_field(p);
+}
+
+void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
+{
+ ZebraHandle zh = (ZebraHandle) (p->handle);
+ zebraExplain_addSchema (zh->reg->zei, oid);
+}
+
+void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
+ int cmd, struct sortKeys *sk)
+{
+ SortIdx sortIdx = zh->reg->sortIdx;
+ int off = 0;
+
+ sortIdx_sysno (sortIdx, sysno);
+
+ while (off < sk->buf_used)
+ {
+ int set, use, slen;
+
+ off += key_SU_decode(&set, sk->buf + off);
+ off += key_SU_decode(&use, sk->buf + off);
+ off += key_SU_decode(&slen, sk->buf + off);
+
+ sortIdx_type(sortIdx, use);
+ if (cmd == 1)
+ sortIdx_add(sortIdx, sk->buf + off, slen);
+ else
+ sortIdx_add(sortIdx, "", 1);
+ off += slen;
+ }
+}
+
+void encode_key_init (struct encode_info *i)
+{
+ i->sysno = 0;
+ i->seqno = 0;
+ i->cmd = -1;
+ i->prevsys=0;
+ i->prevseq=0;
+ i->prevcmd=-1;
+ i->keylen=0;
+}
+
+char *encode_key_int (int d, char *bp)
+{
+ if (d <= 63)
+ *bp++ = d;
+ else if (d <= 16383)
+ {
+ *bp++ = 64 + (d>>8);
+ *bp++ = d & 255;
+ }
+ else if (d <= 4194303)
+ {
+ *bp++ = 128 + (d>>16);
+ *bp++ = (d>>8) & 255;
+ *bp++ = d & 255;
+ }
+ else
+ {
+ *bp++ = 192 + (d>>24);
+ *bp++ = (d>>16) & 255;
+ *bp++ = (d>>8) & 255;
+ *bp++ = d & 255;
+ }
+ return bp;
+}
+#define OLDENCODE 1
+
+#ifdef OLDENCODE
+/* this is the old encode_key_write
+ * may be deleted once we are confident that the new works
+ * HL 15-oct-2002
+ */
+void encode_key_write (char *k, struct encode_info *i, FILE *outf)
+{
+ struct it_key key;
+ char *bp = i->buf;
+
+ while ((*bp++ = *k++))
+ ;
+ memcpy (&key, k+1, sizeof(struct it_key));
+ bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
+ if (i->sysno != key.sysno)
+ {
+ i->sysno = key.sysno;
+ i->seqno = 0;
+ }
+ else if (!i->seqno && !key.seqno && i->cmd == *k)
+ return;
+ bp = encode_key_int (key.seqno - i->seqno, bp);
+ i->seqno = key.seqno;
+ i->cmd = *k;
+ if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "fwrite");
+ exit (1);
+ }
+}
+
+void encode_key_flush (struct encode_info *i, FILE *outf)
+{ /* dummy routine */
+}
+
+#else
+
+/* new encode_key_write
+ * The idea is to buffer one more key, and compare them
+ * If we are going to delete and insert the same key,
+ * we may as well not bother. Should make a difference in
+ * updates with small modifications (appending to a mbox)
+ */
+void encode_key_write (char *k, struct encode_info *i, FILE *outf)
+{
+ struct it_key key;
+ char *bp;
+
+ if (*k) /* first time for new key */
+ {
+ bp = i->buf;
+ while ((*bp++ = *k++))
+ ;
+ i->keylen= bp - i->buf -1;
+ assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN);
+ }
+ else
+ {
+ bp=i->buf + i->keylen;
+ *bp++=0;
+ k++;
+ }
+
+ memcpy (&key, k+1, sizeof(struct it_key));
+ if (0==i->prevsys) /* no previous filter, fill up */
+ {
+ i->prevsys=key.sysno;
+ i->prevseq=key.seqno;
+ i->prevcmd=*k;
+ }
+ else if ( (i->prevsys==key.sysno) &&
+ (i->prevseq==key.seqno) &&
+ (i->prevcmd!=*k) )
+ { /* same numbers, diff cmd, they cancel out */
+ i->prevsys=0;
+ }
+ else
+ { /* different stuff, write previous, move buf */
+ bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp);
+ if (i->sysno != i->prevsys)
+ {
+ i->sysno = i->prevsys;
+ i->seqno = 0;
+ }
+ else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd)
+ {
+ return; /* ??? Filters some sort of duplicates away */
+ /* ??? Can this ever happen -H 15oct02 */
+ }
+ bp = encode_key_int (i->prevseq - i->seqno, bp);
+ i->seqno = i->prevseq;
+ i->cmd = i->prevcmd;
+ if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "fwrite");
+ exit (1);
+ }
+ i->keylen=0; /* ok, it's written, forget it */
+ i->prevsys=key.sysno;
+ i->prevseq=key.seqno;
+ i->prevcmd=*k;
+ }