From 3371e5ba3a8d62b6edbb338984ac333bec6df01b Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 11 Jun 2012 19:54:18 +0200 Subject: [PATCH] CCL field ranking boost. New metadata attribute frank="cclfield value" specifies boost factor value for cclfield rather than the rank value, for search terms as part of cclfield. For example, if would specify rank weight 5 for au CCL field terms (author) and 2 for everything else. --- src/pazpar2_config.c | 19 +++++++++++--- src/pazpar2_config.h | 2 +- src/relevance.c | 21 +++++++++++---- src/relevance.h | 3 ++- src/session.c | 4 +-- test/test_http.cfg | 3 ++- test/test_http.urls | 3 +++ test/test_http_77.res | 3 +++ test/test_http_78.res | 2 ++ test/test_http_79.res | 52 +++++++++++++++++++++++++++++++++++++ test/z3950_indexdata_com_marc.xml | 2 +- 11 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 test/test_http_77.res create mode 100644 test/test_http_78.res create mode 100644 test/test_http_79.res diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index bff5810..d992a33 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -76,7 +76,8 @@ static void conf_metadata_assign(NMEM nmem, int sortkey_offset, enum conf_metadata_mergekey mt, const char *facetrule, - const char *limitmap) + const char *limitmap, + const char *frank) { assert(nmem && metadata && name); @@ -98,6 +99,7 @@ static void conf_metadata_assign(NMEM nmem, metadata->mergekey = mt; metadata->facetrule = nmem_strdup_null(nmem, facetrule); metadata->limitmap = nmem_strdup_null(nmem, limitmap); + metadata->frank = nmem_strdup_null(nmem, frank); } @@ -177,7 +179,8 @@ static struct conf_metadata* conf_service_add_metadata( int sortkey_offset, enum conf_metadata_mergekey mt, const char *facetrule, - const char *limitmap + const char *limitmap, + const char *frank ) { struct conf_metadata * md = 0; @@ -189,7 +192,7 @@ static struct conf_metadata* conf_service_add_metadata( md = service->metadata + field_id; conf_metadata_assign(service->nmem, md, name, type, merge, setting, brief, termlist, rank, sortkey_offset, - mt, facetrule, limitmap); + mt, facetrule, limitmap, frank); return md; } @@ -294,6 +297,8 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, xmlChar *xml_mergekey = 0; xmlChar *xml_limitmap = 0; xmlChar *xml_icu_chain = 0; + xmlChar *xml_frank = 0; + struct _xmlAttr *attr; for (attr = n->properties; attr; attr = attr->next) { @@ -330,6 +335,9 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, else if (!xmlStrcmp(attr->name, BAD_CAST "limitmap") && attr->children && attr->children->type == XML_TEXT_NODE) xml_limitmap = attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "frank") && + attr->children && attr->children->type == XML_TEXT_NODE) + xml_frank = attr->children->content; else { yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name); @@ -470,7 +478,10 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, (const char *) xml_name, type, merge, setting, brief, termlist, rank, sortkey_offset, - mergekey_type, (const char *) xml_icu_chain, (const char *) xml_limitmap); + mergekey_type, + (const char *) xml_icu_chain, + (const char *) xml_limitmap, + (const char *) xml_frank); (*md_node)++; return 0; } diff --git a/src/pazpar2_config.h b/src/pazpar2_config.h index 692d260..18dfde6 100644 --- a/src/pazpar2_config.h +++ b/src/pazpar2_config.h @@ -85,7 +85,7 @@ struct conf_metadata char *facetrule; char *limitmap; // Should be expanded into service-wide default e.g. pz:limitmap:=value setting - char *facetmap; // Should be expanded into service-wide default e.g. pz:facetmap:=value setting + char *frank; }; diff --git a/src/relevance.c b/src/relevance.c index 418dc9a..7f1943a 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -44,34 +44,45 @@ struct word_entry { struct word_entry *next; }; -int word_entry_match(struct word_entry *entries, const char *norm_str) +static int word_entry_match(struct word_entry *entries, const char *norm_str, + const char *frank, int *local_mult) { for (; entries; entries = entries->next) { if (!strcmp(norm_str, entries->norm_str)) + { + const char *cp = 0; + if (frank && (cp = strchr(frank, ' '))) + { + if ((cp - frank) == strlen(entries->ccl_field) && + memcmp(entries->ccl_field, frank, cp - frank) == 0) + *local_mult = atoi(cp + 1); + } return entries->termno; + } } return 0; } void relevance_countwords(struct relevance *r, struct record_cluster *cluster, - const char *words, int multiplier, const char *name) + const char *words, int multiplier, const char *name, + const char *frank) { int *mult = cluster->term_frequency_vec_tmp; const char *norm_str; int i, length = 0; - pp2_charset_token_first(r->prt, words, 0); for (i = 1; i < r->vec_len; i++) mult[i] = 0; while ((norm_str = pp2_charset_token_next(r->prt))) { - int res = word_entry_match(r->entries, norm_str); + int local_mult = multiplier; + int res = word_entry_match(r->entries, norm_str, frank, &local_mult); if (res) { assert(res < r->vec_len); - mult[res] += multiplier; + mult[res] += local_mult; } length++; } diff --git a/src/relevance.h b/src/relevance.h index f20cbec..b22a7a0 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -33,7 +33,8 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, void relevance_destroy(struct relevance **rp); void relevance_newrec(struct relevance *r, struct record_cluster *cluster); void relevance_countwords(struct relevance *r, struct record_cluster *cluster, - const char *words, int multiplier, const char *name); + const char *words, int multiplier, const char *name, + const char *frank); void relevance_donerecord(struct relevance *r, struct record_cluster *cluster); void relevance_prepare_read(struct relevance *rel, struct reclist *rec); diff --git a/src/session.c b/src/session.c index 105c064..4049360 100644 --- a/src/session.c +++ b/src/session.c @@ -1933,12 +1933,12 @@ static int ingest_to_cluster(struct client *cl, } } - // ranking of _all_ fields enabled ... if (rank) { relevance_countwords(se->relevance, cluster, - (char *) value, rank, ser_md->name); + (char *) value, rank, ser_md->name, + ser_md->frank); } // construct facets ... unless the client already has reported them diff --git a/test/test_http.cfg b/test/test_http.cfg index d084f9e..8904f3b 100644 --- a/test/test_http.cfg +++ b/test/test_http.cfg @@ -13,7 +13,8 @@ - + diff --git a/test/test_http.urls b/test/test_http.urls index 5da644d..47247d2 100644 --- a/test/test_http.urls +++ b/test/test_http.urls @@ -74,3 +74,6 @@ http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=M http://localhost:9763/search.pz2?session=9&command=show&block=1 http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=date%3D1977 http://localhost:9763/search.pz2?session=9&command=show&block=1 +http://localhost:9763/search.pz2?command=init +http://localhost:9763/search.pz2?session=10&command=search&query=au%3dadam +http://localhost:9763/search.pz2?session=10&command=show&block=1 diff --git a/test/test_http_77.res b/test/test_http_77.res new file mode 100644 index 0000000..c41b6b8 --- /dev/null +++ b/test/test_http_77.res @@ -0,0 +1,3 @@ + +OK10150000 + \ No newline at end of file diff --git a/test/test_http_78.res b/test/test_http_78.res new file mode 100644 index 0000000..ab63fe6 --- /dev/null +++ b/test/test_http_78.res @@ -0,0 +1,2 @@ + +OK \ No newline at end of file diff --git a/test/test_http_79.res b/test/test_http_79.res new file mode 100644 index 0000000..c4aa933 --- /dev/null +++ b/test/test_http_79.res @@ -0,0 +1,52 @@ + +OK +0 +2 +2 +0 +2 + + +The religious teachers of Greece +1972 +Adam, James +Greek literature +Philosophy, Ancient +Greece +Reprint of the 1909 ed., which was issued as the 1904-1906 Gifford lectures +The religious teachers of Greece +1972 +Adam, James +Greek literature +Philosophy, Ancient +Greece +Reprint of the 1909 ed., which was issued as the 1904-1906 Gifford lectures +Includes bibliographical references +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +150000 +content: title the religious teachers of greece author adam james medium book + + + +Four psalms +XXIII, XXXVI, LII, CXXI +1980 +Smith, George Adam +Bible +Four psalms +XXIII, XXXVI, LII, CXXI +1980 +Smith, George Adam +Bible +Bible +Bible +Bible +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +100000 +content: title four psalms author smith george adam medium book + + \ No newline at end of file diff --git a/test/z3950_indexdata_com_marc.xml b/test/z3950_indexdata_com_marc.xml index 8fe0a37..2a41e4b 100644 --- a/test/z3950_indexdata_com_marc.xml +++ b/test/z3950_indexdata_com_marc.xml @@ -6,7 +6,7 @@ - + -- 1.7.10.4