const char *(*token_next_handler)(pp2_charset_token_t prt);
const char *(*get_sort_handler)(pp2_charset_token_t prt);
const char *(*get_display_handler)(pp2_charset_token_t prt);
+ void (*get_org_handler)(pp2_charset_token_t ptr,
+ size_t *start, size_t *len);
#if YAZ_HAVE_ICU
struct icu_chain * icu_chn;
UErrorCode icu_sts;
static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt);
static const char *pp2_get_sort_ascii(pp2_charset_token_t prt);
static const char *pp2_get_display_ascii(pp2_charset_token_t prt);
+static void pp2_get_org_ascii(pp2_charset_token_t prt,
+ size_t *start, size_t *len);
#if YAZ_HAVE_ICU
static const char *pp2_charset_token_icu(pp2_charset_token_t prt);
static const char *pp2_get_sort_icu(pp2_charset_token_t prt);
static const char *pp2_get_display_icu(pp2_charset_token_t prt);
+static void pp2_get_org_icu(pp2_charset_token_t prt,
+ size_t *start, size_t *len);
#endif
/* tokenzier handle */
#if YAZ_HAVE_ICU
yaz_icu_iter_t iter;
#endif
+ const char *cp0;
+ size_t start;
+ size_t len;
};
struct pp2_charset_fact_s {
pct->token_next_handler = pp2_charset_token_null;
pct->get_sort_handler = pp2_get_sort_ascii;
pct->get_display_handler = pp2_get_display_ascii;
+ pct->get_org_handler = pp2_get_org_ascii;
#if YAZ_HAVE_ICU
pct->icu_chn = 0;
#endif // YAZ_HAVE_ICU
pct->token_next_handler = pp2_charset_token_icu;
pct->get_sort_handler = pp2_get_sort_icu;
pct->get_display_handler = pp2_get_display_icu;
+ pct->get_org_handler = pp2_get_org_icu;
}
return pct;
}
if (pct->icu_chn)
prt->iter = icu_iter_create(pct->icu_chn);
#endif
+ prt->start = 0;
+ prt->len = 0;
return prt;
}
wrbuf_rewind(prt->norm_str);
wrbuf_rewind(prt->sort_str);
+ prt->cp0 = buf;
prt->cp = buf;
prt->last_cp = 0;
return prt->pct->get_display_handler(prt);
}
+void pp2_get_org(pp2_charset_token_t prt, size_t *start, size_t *len)
+{
+ prt->pct->get_org_handler(prt, start, len);
+}
+
+
#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) : -1)
/* original tokenizer with our tokenize interface, but we
add +1 to ensure no '\0' are in our string (except for EOF)
const char *cp = prt->cp;
int c;
+ prt->start = cp - prt->cp0;
/* skip white space */
while (*cp && (c = raw_char(tolower(*(const unsigned char *)cp))) < 0)
cp++;
wrbuf_putc(prt->norm_str, c);
cp++;
}
+ prt->len = (cp - prt->cp0) - prt->start;
prt->cp = cp;
return wrbuf_cstr(prt->norm_str);
}
}
}
+static void pp2_get_org_ascii(pp2_charset_token_t prt,
+ size_t *start, size_t *len)
+{
+ *start = prt->start;
+ *len = prt->len;
+}
+
static const char *pp2_charset_token_null(pp2_charset_token_t prt)
{
const char *cp = prt->cp;
while (*cp)
cp++;
prt->cp = cp;
+ prt->len = cp - prt->cp0;
return prt->last_cp;
}
return icu_iter_get_display(prt->iter);
}
+static void pp2_get_org_icu(pp2_charset_token_t prt, size_t *start, size_t *len)
+{
+ icu_iter_get_org_info(prt->iter, start, len);
+}
+
#endif // YAZ_HAVE_ICU
const char *pp2_charset_token_next(pp2_charset_token_t prt);
const char *pp2_get_sort(pp2_charset_token_t prt);
const char *pp2_get_display(pp2_charset_token_t prt);
+void pp2_get_org(pp2_charset_token_t prt, size_t *start, size_t *len);
#endif
}
static void write_metadata(WRBUF w, struct conf_service *service,
- struct record_metadata **ml, int full, int indent)
+ struct record_metadata **ml, unsigned flags,
+ int indent)
{
int imeta;
{
struct conf_metadata *cmd = &service->metadata[imeta];
struct record_metadata *md;
- if (!cmd->brief && !full)
+ if (!cmd->brief && !(flags & 1))
continue;
for (md = ml[imeta]; md; md = md->next)
{
switch (cmd->type)
{
case Metadata_type_generic:
- wrbuf_xmlputs(w, md->data.text.disp);
+ if (md->data.text.snippet && (flags & 2))
+ wrbuf_puts(w, md->data.text.snippet);
+ else
+ wrbuf_xmlputs(w, md->data.text.disp);
break;
case Metadata_type_year:
wrbuf_printf(w, "%d", md->data.number.min);
}
static void write_subrecord(struct record *r, WRBUF w,
- struct conf_service *service, int show_details)
+ struct conf_service *service, unsigned flags,
+ int indent)
{
const char *name = session_setting_oneval(
client_get_database(r->client), PZ_NAME);
wrbuf_printf(w, "%u", r->checksum);
wrbuf_puts(w, "\">\n");
- write_metadata(w, service, r->metadata, show_details, 2);
+ write_metadata(w, service, r->metadata, flags, indent);
wrbuf_puts(w, " </location>\n");
}
const char *offsetstr = http_argbyname(rq, "offset");
const char *binarystr = http_argbyname(rq, "binary");
const char *checksumstr = http_argbyname(rq, "checksum");
+ const char *snippets = http_argbyname(rq, "snippets");
+ unsigned flags = (snippets && *snippets == '1') ? 3 : 1;
if (!s)
return;
}
wrbuf_printf(c->wrbuf, " <activeclients>%d</activeclients>\n",
session_active_clients(s->psession));
- write_metadata(c->wrbuf, service, rec->metadata, 1, 1);
+ write_metadata(c->wrbuf, service, rec->metadata, flags, 1);
for (r = rec->records; r; r = r->next)
- write_subrecord(r, c->wrbuf, service, 2);
+ write_subrecord(r, c->wrbuf, service, flags, 2);
response_close(c, "record");
}
show_single_stop(s->psession, rec);
const char *num = http_argbyname(rq, "num");
const char *sort = http_argbyname(rq, "sort");
int version = get_version(rq);
+ const char *snippets = http_argbyname(rq, "snippets");
+ unsigned flags = (snippets && *snippets == '1') ? 2 : 0;
int startn = 0;
int numn = 20;
struct conf_service *service = s->psession->service;
wrbuf_puts(c->wrbuf, "<hit>\n");
- write_metadata(c->wrbuf, service, rec->metadata, 0, 1);
+ write_metadata(c->wrbuf, service, rec->metadata, flags, 1);
for (ccount = 0, p = rl[i]->records; p; p = p->next, ccount++)
- write_subrecord(p, c->wrbuf, service, 0); // subrecs w/o details
+ write_subrecord(p, c->wrbuf, service, flags, 2);
wrbuf_printf(c->wrbuf, " <count>%d</count>\n", ccount);
if (strstr(sort, "relevance"))
{
struct {
const char *disp;
const char *sort;
+ const char *snippet;
} text;
struct {
int min;
return 0;
}
+int relevance_snippet(struct relevance *r,
+ const char *words, const char *name,
+ WRBUF w_snippet)
+{
+ int no = 0;
+ const char *norm_str;
+#if 1
+ yaz_log(YLOG_LOG, "relevance_snippet for field=%s content=%s",
+ name, words);
+#endif
+ pp2_charset_token_first(r->prt, words, 0);
+
+ while ((norm_str = pp2_charset_token_next(r->prt)))
+ {
+ size_t org_start, org_len;
+ struct word_entry *entries = r->entries;
+ int highlight = 0;
+ int i;
+
+ pp2_get_org(r->prt, &org_start, &org_len);
+ for (; entries; entries = entries->next, i++)
+ {
+ yaz_log(YLOG_LOG, "Compare: %s %s", norm_str, entries->norm_str);
+ if (*norm_str && !strcmp(norm_str, entries->norm_str))
+ highlight = 1;
+ }
+ if (highlight)
+ wrbuf_puts(w_snippet, "<match>");
+
+ wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
+ if (highlight)
+ wrbuf_puts(w_snippet, "</match>");
+ no += highlight;
+ }
+ if (no)
+ {
+ yaz_log(YLOG_LOG, "SNIPPET match: %s", wrbuf_cstr(w_snippet));
+ }
+ return no;
+}
+
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
const char *words, const char *rank,
const char *name)
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
const char *words, const char *multiplier,
const char *name);
+int relevance_snippet(struct relevance *r,
+ const char *words, const char *name,
+ WRBUF w_snippet);
+
void relevance_donerecord(struct relevance *r, struct record_cluster *cluster);
void relevance_prepare_read(struct relevance *rel, struct reclist *rec);
rec_md->data.text.disp = p;
rec_md->data.text.sort = 0;
+ rec_md->data.text.snippet = 0;
}
else if (type == Metadata_type_year || type == Metadata_type_date)
{
"for element '%s'", value, type);
continue;
}
+
+ if (ser_md->type == Metadata_type_generic)
+ {
+ WRBUF w = wrbuf_alloc();
+ if (relevance_snippet(se->relevance,
+ (char*) value, ser_md->name, w))
+ rec_md->data.text.snippet = nmem_strdup(se->nmem,
+ wrbuf_cstr(w));
+ wrbuf_destroy(w);
+ }
+
+
wheretoput = &record->metadata[md_field_id];
while (*wheretoput)
wheretoput = &(*wheretoput)->next;
relevance_countwords(se->relevance, cluster,
(char *) value, rank, ser_md->name);
}
-
// construct facets ... unless the client already has reported them
if (ser_md->termlist && !client_has_facet(cl, (char *) type))
{