-/* $Id: extract.c,v 1.258 2007-05-08 14:27:23 adam Exp $
+/* $Id: extract.c,v 1.262 2007-08-31 07:02:24 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
}
}
+static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
+{
+ int i;
+ for (i = 0; i<256; i++)
+ {
+ if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
+ ctrl->seqno[i] = 1;
+ else
+ ctrl->seqno[i] = 0;
+ }
+ ctrl->flagShowRecords = !zh->m_flag_rw;
+}
+
+
static void extract_add_index_string (RecWord *p,
zinfo_index_category_t cat,
const char *str, int length);
w->segment = 0;
}
+struct snip_rec_info {
+ ZebraHandle zh;
+ zebra_snippets *snippets;
+};
+
+
+static void snippet_add_complete_field(RecWord *p, int ord)
+{
+ struct snip_rec_info *h = p->extractCtrl->handle;
+ ZebraHandle zh = h->zh;
+
+ const char *b = p->term_buf;
+ char buf[IT_MAX_WORD+1];
+ const char **map = 0;
+ int i = 0, remain = p->term_len;
+ const char *start = b;
+ const char *last = 0;
+
+ if (remain > 0)
+ map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1);
+
+ while (remain > 0 && i < IT_MAX_WORD)
+ {
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->term_len - (b - p->term_buf);
+
+ if (i == 0)
+ start = b; /* set to first non-ws area */
+ if (remain > 0)
+ {
+ int first = i ? 0 : 1; /* first position */
+
+ map = zebra_maps_input(zh->reg->zebra_maps, p->index_type,
+ &b, remain, first);
+ }
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+
+ if (i && i < IT_MAX_WORD)
+ buf[i++] = *CHR_SPACE;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ if (**map == *CHR_CUT)
+ {
+ i = 0;
+ }
+ else
+ {
+ if (i >= IT_MAX_WORD)
+ break;
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ }
+ last = b;
+ remain = p->term_len - (b - p->term_buf);
+ if (remain > 0)
+ {
+ map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b,
+ remain, 0);
+ }
+ else
+ map = 0;
+ }
+ }
+ if (!i)
+ return;
+ if (last && start != last)
+ zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
+ start, last - start);
+}
+
+static void snippet_add_incomplete_field(RecWord *p, int ord)
+{
+ struct snip_rec_info *h = p->extractCtrl->handle;
+ ZebraHandle zh = h->zh;
+ const char *b = p->term_buf;
+ int remain = p->term_len;
+ int first = 1;
+ const char **map = 0;
+ const char *start = b;
+ const char *last = b;
+
+ if (remain > 0)
+ map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+
+ while (map)
+ {
+ char buf[IT_MAX_WORD+1];
+ int i, remain;
+
+ /* Skip spaces */
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->term_len - (b - p->term_buf);
+ last = b;
+ if (remain > 0)
+ map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
+ remain, 0);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+ if (start != last)
+ {
+ zebra_snippets_appendn(h->snippets, p->seqno, 1, ord,
+ start, last - start);
+
+ }
+ start = last;
+
+ i = 0;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->term_len - (b - p->term_buf);
+ last = b;
+ if (remain > 0)
+ map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ else
+ map = 0;
+ }
+ if (!i)
+ return;
+
+ if (first)
+ {
+ first = 0;
+ if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ {
+ /* first in field marker */
+ p->seqno++;
+ }
+ }
+ if (start != last)
+ zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
+ start, last - start);
+ start = last;
+ p->seqno++;
+ }
+
+}
+
+static void snippet_token_add(RecWord *p)
+{
+ struct snip_rec_info *h = p->extractCtrl->handle;
+ ZebraHandle zh = h->zh;
+
+ if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
+ {
+ ZebraExplainInfo zei = zh->reg->zei;
+ int ch = zebraExplain_lookup_attr_str(
+ zei, zinfo_index_category_index, p->index_type, p->index_name);
+
+ if (zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type))
+ snippet_add_complete_field (p, ch);
+ else
+ snippet_add_incomplete_field(p, ch);
+ }
+}
+
+static void snippet_schema_add(
+ struct recExtractCtrl *p, Odr_oid *oid)
+{
+
+}
+
+void extract_snippet(ZebraHandle zh, zebra_snippets *sn,
+ struct ZebraRecStream *stream,
+ RecType rt, void *recTypeClientData)
+{
+ struct recExtractCtrl extractCtrl;
+ struct snip_rec_info info;
+ int r;
+
+ extractCtrl.stream = stream;
+ extractCtrl.first_record = 1;
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = snippet_token_add;
+ extractCtrl.schemaAdd = snippet_schema_add;
+ assert(zh->reg);
+ assert(zh->reg->dh);
+
+ extractCtrl.dh = zh->reg->dh;
+
+ info.zh = zh;
+ info.snippets = sn;
+ extractCtrl.handle = &info;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+ extractCtrl.action = action_insert;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extractCtrl.setStoreData = 0;
+
+ r = (*rt->extract)(recTypeClientData, &extractCtrl);
+
+}
+
static void searchRecordKey(ZebraHandle zh,
zebra_rec_keys_t reckeys,
const char *index_name,
struct recordGroup *rGroup;
};
-static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
-{
- int i;
- for (i = 0; i<256; i++)
- {
- if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
- ctrl->seqno[i] = 1;
- else
- ctrl->seqno[i] = 0;
- }
- ctrl->flagShowRecords = !zh->m_flag_rw;
-}
-
static void all_matches_add(struct recExtractCtrl *ctrl)
{
RecWord word;
assert(index_type);
zebra_term_untrans_iconv(zh, nmem, index_type,
&dst_term, str);
- zebra_snippets_append(snippets, seqno, ord, dst_term);
+ zebra_snippets_append(snippets, seqno, 0, ord, dst_term);
nmem_reset(nmem);
}
}