From a1f5729d06659c681dfd68d56c756478bbdcdf99 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 5 Dec 2007 09:29:52 +0000 Subject: [PATCH] Sort facets by set frequency. --- index/retrieve.c | 28 ++++++++++++++++++++++++---- test/api/safari1.c | 8 ++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/index/retrieve.c b/index/retrieve.c index 021a7ea..ea16718 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.80 2007-12-04 12:52:33 adam Exp $ +/* $Id: retrieve.c,v 1.81 2007-12-05 09:29:52 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -629,6 +629,22 @@ zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) return hits; } +int term_qsort_handle(const void *a, const void *b) +{ + const struct term_collect *l = a; + const struct term_collect *r = b; + if (l->set_occur < r->set_occur) + return 1; + else if (l->set_occur > r->set_occur) + return -1; + else + { + const char *lterm = l->term ? l->term : ""; + const char *rterm = r->term ? r->term : ""; + return strcmp(lterm, rterm); + } +} + void term_collect_freq(ZebraHandle zh, struct term_collect *col, int no_terms_collect, int ord, RSET rset) @@ -639,6 +655,7 @@ void term_collect_freq(ZebraHandle zh, if (col[i].term) col[i].set_occur = freq_term(zh, ord, col[i].term, rset); } + qsort(col, no_terms_collect, sizeof(*col), term_qsort_handle); } struct term_collect *term_collect_create(zebra_strmap_t sm, @@ -662,14 +679,17 @@ struct term_collect *term_collect_create(zebra_strmap_t sm, it = zebra_strmap_it_create(sm); while ((term = zebra_strmap_it_next(it, &data_buf, &data_len))) { + /* invariant: + col[0] has lowest oc . col[no_terms_collect-1] has highest oc */ int oc = *(int*) data_buf; int j = 0; /* insertion may be slow but terms terms will be "infrequent" and - thus number of iterations should be small below */ + thus number of iterations should be small below + */ while (j < no_terms_collect && oc > col[j].oc) j++; - if (j) - { + if (j) + { /* oc <= col[j] and oc > col[j-1] */ --j; memmove(col, col+1, sizeof(*col) * j); col[j].term = term; diff --git a/test/api/safari1.c b/test/api/safari1.c index 0a88712..4d2337b 100644 --- a/test/api/safari1.c +++ b/test/api/safari1.c @@ -1,4 +1,4 @@ -/* $Id: safari1.c,v 1.21 2007-12-04 12:52:33 adam Exp $ +/* $Id: safari1.c,v 1.22 2007-12-05 09:29:53 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -109,11 +109,11 @@ static void tst(int argc, char **argv) yaz_oid_recsyn_xml, "\n" " \n" - " old\n" - " art\n" - " gamle\n" " mand\n" " the\n" + " art\n" + " gamle\n" + " old\n" " \n" "\n"), ZEBRA_OK); #endif -- 1.7.10.4