Display term and normalized term is now saved per facet.
struct pp2_charset_s {
const char *(*token_next_handler)(pp2_relevance_token_t prt);
const char *(*get_sort_handler)(pp2_relevance_token_t prt);
+ const char *(*get_display_handler)(pp2_relevance_token_t prt);
int ref_count;
#if YAZ_HAVE_ICU
struct icu_chain * icu_chn;
static const char *pp2_relevance_token_null(pp2_relevance_token_t prt);
static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt);
static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt);
+static const char *pp2_get_display_ascii(pp2_relevance_token_t prt);
#if YAZ_HAVE_ICU
static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt);
static const char *pp2_get_sort_icu(pp2_relevance_token_t prt);
+static const char *pp2_get_display_icu(pp2_relevance_token_t prt);
#endif
/* tokenzier handle */
pct->token_next_handler = pp2_relevance_token_null;
pct->get_sort_handler = pp2_get_sort_ascii;
+ pct->get_display_handler = pp2_get_display_ascii;
pct->ref_count = 1;
#if YAZ_HAVE_ICU
pct->icu_chn = 0;
pct->icu_sts = U_ZERO_ERROR;
pct->token_next_handler = pp2_relevance_token_icu;
pct->get_sort_handler = pp2_get_sort_icu;
+ pct->get_display_handler = pp2_get_display_icu;
}
#endif // YAZ_HAVE_ICU
return pct;
return prt->pct->get_sort_handler(prt);
}
+const char *pp2_get_display(pp2_relevance_token_t prt)
+{
+ return prt->pct->get_display_handler(prt);
+}
+
#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) : -1)
/* original tokenizer with our tokenize interface, but we
add +1 to ensure no '\0' are in our string (except for EOF)
}
}
+static const char *pp2_get_display_ascii(pp2_relevance_token_t prt)
+{
+ if (prt->last_cp == 0)
+ return 0;
+ else
+ {
+ return wrbuf_cstr(prt->norm_str);
+ }
+}
+
static const char *pp2_relevance_token_null(pp2_relevance_token_t prt)
{
const char *cp = prt->cp;
return icu_iter_get_sortkey(prt->iter);
}
+static const char *pp2_get_display_icu(pp2_relevance_token_t prt)
+{
+ return icu_iter_get_display(prt->iter);
+}
+
#endif // YAZ_HAVE_ICU
void pp2_relevance_token_destroy(pp2_relevance_token_t prt);
const char *pp2_relevance_token_next(pp2_relevance_token_t prt);
const char *pp2_get_sort(pp2_relevance_token_t prt);
+const char *pp2_get_display(pp2_relevance_token_t prt);
#if 0
typedef int pp2_charset_normalize_t(pp2_charset_t pct,
for (i = 0; i < len && i < num; i++)
{
// prevnt sending empty term elements
- if (!p[i]->term || !p[i]->term[0])
+ if (!p[i]->display_term || !p[i]->display_term[0])
continue;
wrbuf_puts(c->wrbuf, "<term>");
wrbuf_puts(c->wrbuf, "<name>");
- wrbuf_xmlputs(c->wrbuf, p[i]->term);
+ wrbuf_xmlputs(c->wrbuf, p[i]->display_term);
wrbuf_puts(c->wrbuf, "</name>");
wrbuf_printf(c->wrbuf,
pp2_relevance_token_t prt;
const char *facet_component;
WRBUF facet_wrbuf = wrbuf_alloc();
+ WRBUF display_wrbuf = wrbuf_alloc();
prt = pp2_relevance_tokenize(service->facet_pct);
pp2_relevance_first(prt, value, 0);
while ((facet_component = pp2_relevance_token_next(prt)))
{
+ const char *display_component;
if (*facet_component)
{
if (wrbuf_len(facet_wrbuf))
wrbuf_puts(facet_wrbuf, " ");
wrbuf_puts(facet_wrbuf, facet_component);
}
+ display_component = pp2_get_display(prt);
+ if (display_component)
+ {
+ if (wrbuf_len(display_wrbuf))
+ wrbuf_puts(display_wrbuf, " ");
+ wrbuf_puts(display_wrbuf, display_component);
+ }
}
pp2_relevance_token_destroy(prt);
-
+
+ yaz_log(YLOG_LOG, "facet norm=%s", wrbuf_cstr(facet_wrbuf));
+ yaz_log(YLOG_LOG, "facet display=%s", wrbuf_cstr(display_wrbuf));
if (wrbuf_len(facet_wrbuf))
{
int i;
#if 0
session_log(s, YLOG_DEBUG, "Facets for %s: %s norm:%s (%d)", type, value, wrbuf_cstr(facet_wrbuf), count);
#endif
- termlist_insert(s->termlists[i].termlist, wrbuf_cstr(facet_wrbuf),
- count);
+ termlist_insert(s->termlists[i].termlist, wrbuf_cstr(display_wrbuf),
+ wrbuf_cstr(facet_wrbuf), count);
}
wrbuf_destroy(facet_wrbuf);
+ wrbuf_destroy(display_wrbuf);
}
static xmlDoc *record_to_xml(struct session *se,
}
}
-void termlist_insert(struct termlist *tl, const char *term, int freq)
+void termlist_insert(struct termlist *tl, const char *display_term,
+ const char *norm_term, int freq)
{
unsigned int bucket;
struct termlist_bucket **p;
char buf[256], *cp;
- if (strlen(term) > 255)
+ if (strlen(norm_term) > 255)
return;
- strcpy(buf, term);
+ strcpy(buf, norm_term);
/* chop right */
for (cp = buf + strlen(buf); cp != buf && strchr(",. -", cp[-1]); cp--)
cp[-1] = '\0';
bucket = jenkins_hash((unsigned char *)buf) % tl->hash_size;
for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next)
{
- if (!strcmp(buf, (*p)->term.term))
+ if (!strcmp(buf, (*p)->term.norm_term))
{
(*p)->term.frequency += freq;
update_highscore(tl, &((*p)->term));
{
struct termlist_bucket *new = nmem_malloc(tl->nmem,
sizeof(struct termlist_bucket));
- new->term.term = nmem_strdup(tl->nmem, buf);
+ new->term.norm_term = nmem_strdup(tl->nmem, buf);
+ new->term.display_term = *display_term ?
+ nmem_strdup(tl->nmem, display_term) : new->term.norm_term;
new->term.frequency = freq;
new->next = 0;
*p = new;
struct termlist_score
{
- char *term;
+ char *norm_term;
+ char *display_term;
int frequency;
};
struct termlist;
struct termlist *termlist_create(NMEM nmem, int highscore_size);
-void termlist_insert(struct termlist *tl, const char *term, int freq);
+void termlist_insert(struct termlist *tl, const char *display_term,
+ const char *norm_term, int freq);
struct termlist_score **termlist_highscore(struct termlist *tl, int *len);
#endif