From 51f7471f0a30c9c6bf5b7a08d70646b1792b3c7c Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Wed, 25 Apr 2007 13:28:55 +0000 Subject: [PATCH] comed to the point where I can start testing sorting of records in the recordslist. But there is a nasty segfault in reclist_sort(list, sort_parms) which needs to be sorted out before proceeding - prpbably a global parameter referenced someplace. just checking in to save the work so far, continuing tomorrow --- src/test_relevance.c | 304 +++++++++++++++++--------------------------------- 1 file changed, 105 insertions(+), 199 deletions(-) diff --git a/src/test_relevance.c b/src/test_relevance.c index ec393f3..a7683a5 100644 --- a/src/test_relevance.c +++ b/src/test_relevance.c @@ -1,4 +1,4 @@ -/* $Id: test_relevance.c,v 1.9 2007-04-25 07:00:33 marc Exp $ +/* $Id: test_relevance.c,v 1.10 2007-04-25 13:28:55 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -37,236 +37,142 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "reclists.h" -void test_relevance(int argc, char **argv) +void test_relevance_7bit(int argc, char **argv) { NMEM nmem = nmem_create(); - int numrecs = 10; - const char * queryterms[] = - {"abe", "fisk", 0}; - // {"ål", "økologi", "æble", 0}; - - //struct record_cluster *cluster = 0; struct conf_service *service = 0; - struct reclist *list = 0; - struct record *record = 0; - //const char *mergekey = "amergekey"; - //int total = 0; - - struct relevance *rel = 0; - //struct client *client = 0; - - - rel = relevance_create(nmem, queryterms, numrecs); - YAZ_CHECK(rel); - - list = reclist_create(nmem, numrecs); - YAZ_CHECK(list); + service = conf_service_create(nmem, 1, 2); - service = conf_service_create(nmem, 4, 3); - YAZ_CHECK(service); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title", + conf_service_add_metadata(nmem, service, 0, "title", Metadata_type_generic, Metadata_merge_unique, - 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author", - Metadata_type_generic, Metadata_merge_longest, - 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn", - Metadata_type_number, Metadata_merge_no, - 1, 1, 1, 0)); + 1, 1, 1, 0); - YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year", - Metadata_type_year, Metadata_merge_range, - 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 0, "relevance", - Metadata_sortkey_relevance)); - - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 1, "title", - Metadata_sortkey_string)); + conf_service_add_sortkey(nmem, service, 0, "relevance", + Metadata_sortkey_relevance); - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 2, "year", - Metadata_sortkey_numeric)); + conf_service_add_sortkey(nmem, service, 1, "title", + Metadata_sortkey_string); - // testing record things - record = record_create(nmem, 4, 3); - YAZ_CHECK(record); - + // setting up records + // why on earth do we have a client dangeling from the record ?? // record->client = client; - char * bla = "blabla"; - union data_types data_text; - data_text.text = bla; + union data_types data_ape = {"ape"}; + union data_types data_bee = {"bee"}; + union data_types data_fish = {"fish"}; + union data_types data_zebra = {"zebra"}; + + //union data_types data_year; + //data_num.number.min = 2005; + //data_num.number.max = 2007; + + int no_recs = 4; + + const char *mk_ape_fish = "ape fish"; + struct record *rec_ape_fish = 0; + rec_ape_fish + = record_create(nmem, service->num_metadata, service->num_sortkeys); + record_add_metadata(nmem, rec_ape_fish, service, "title", data_ape); + record_assign_sortkey(nmem, rec_ape_fish, service, "relevance", data_ape); + record_assign_sortkey(nmem, rec_ape_fish, service, "title", data_ape); + record_add_metadata(nmem, rec_ape_fish, service, "title", data_fish); + YAZ_CHECK(rec_ape_fish); + + const char *mk_bee_fish = "bee fish"; + struct record *rec_bee_fish = 0; + rec_bee_fish + = record_create(nmem, service->num_metadata, service->num_sortkeys); + record_add_metadata(nmem, rec_bee_fish, service, "title", data_bee); + record_assign_sortkey(nmem, rec_bee_fish, service, "relevance", data_bee); + record_assign_sortkey(nmem, rec_bee_fish, service, "title", data_bee); + record_add_metadata(nmem, rec_bee_fish, service, "title", data_fish); + YAZ_CHECK(rec_bee_fish); + + const char *mk_fish_bee = "fish bee"; + struct record *rec_fish_bee = 0; + rec_fish_bee + = record_create(nmem, service->num_metadata, service->num_sortkeys); + record_add_metadata(nmem, rec_fish_bee, service, "title", data_fish); + record_assign_sortkey(nmem, rec_fish_bee, service, "relevance", data_fish); + record_assign_sortkey(nmem, rec_fish_bee, service, "title", data_fish); + record_add_metadata(nmem, rec_fish_bee, service, "title", data_bee); + YAZ_CHECK(rec_fish_bee); - union data_types data_num; - data_num.number.min = 2; - data_num.number.max = 5; + const char *mk_zebra_bee = "zebra bee"; + struct record *rec_zebra_bee = 0; + rec_zebra_bee + = record_create(nmem, service->num_metadata, service->num_sortkeys); + record_add_metadata(nmem, rec_zebra_bee, service, "title", data_zebra); + record_assign_sortkey(nmem, rec_zebra_bee, service, "relevance", data_zebra); + record_assign_sortkey(nmem, rec_zebra_bee, service, "title", data_zebra); + record_add_metadata(nmem, rec_zebra_bee, service, "title", data_bee); + YAZ_CHECK(rec_zebra_bee); - struct record_metadata * tmp_md = 0; - tmp_md = record_metadata_insert(nmem, &(record->metadata[0]), data_text); - YAZ_CHECK(tmp_md); - tmp_md = record_metadata_insert(nmem, &tmp_md, data_text); - YAZ_CHECK(tmp_md); + + struct reclist *list = 0; + list = reclist_create(nmem, no_recs); + YAZ_CHECK(list); - YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num)); - YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num)); + int no_merged = 0; - YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text)); - YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text)); + const char * queryterms[] = + {"ape", "fish", 0}; + // {"ål", "økologi", "æble", 0}; - YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 0, data_text)); - YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 1, data_text)); - YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 2, data_num)); - YAZ_CHECK(record_assign_sortkey(nmem, record, service, "relevance", data_text)); - YAZ_CHECK(record_assign_sortkey(nmem, record, service, "title", data_text)); - YAZ_CHECK(record_assign_sortkey(nmem, record, service, "year", data_num)); + struct relevance *rel = 0; + rel = relevance_create(nmem, queryterms, no_recs); + YAZ_CHECK(rel); + + struct record_cluster *cluster = 0; - - + // insert records into recordlist and get clusters + // since metadata keys differ, we get multiple clusters ?? + cluster + = reclist_insert(list, service, rec_ape_fish, mk_ape_fish, &no_merged); + YAZ_CHECK(cluster); + relevance_newrec(rel, cluster); + cluster + = reclist_insert(list, service, rec_bee_fish, mk_bee_fish, &no_merged); + YAZ_CHECK(cluster); + relevance_newrec(rel, cluster); + cluster + = reclist_insert(list, service, rec_fish_bee, mk_fish_bee, &no_merged); + YAZ_CHECK(cluster); + relevance_newrec(rel, cluster); - // now we need to put some actual data into the record ... how ?? - // there is a hell of a lot spagetti code in logic.c ingest_record() - // line 694 ff snippet from there: - // this code needs to be analyzed and the fundamental stuff extracted + cluster + = reclist_insert(list, service, rec_zebra_bee, mk_zebra_bee, &no_merged); + YAZ_CHECK(cluster); + relevance_newrec(rel, cluster); -#if 0 - service->metadata[imeta].name; - md = &service->metadata[imeta]; - if (md->sortkey_offset >= 0) - sk = &service->sortkeys[md->sortkey_offset]; - - // Find out where we are putting it if (md->merge == Metadata_merge_no) - wheretoput = &res->metadata[imeta]; - else - wheretoput = &cluster->metadata[imeta]; - - - // Put it there - newm = nmem_malloc(se->nmem, sizeof(struct record_metadata)); - newm->next = 0; - if (md->type == Metadata_type_generic) - { - char *p, *pe; - for (p = (char *) value; *p && isspace(*p); p++) - ; - for (pe = p + strlen(p) - 1; - pe > p && strchr(" ,/.:([", *pe); pe--) - *pe = '\0'; - newm->data.text = nmem_strdup(se->nmem, p); - - } - else if (md->type == Metadata_type_year) - { - if (extract_years((char *) value, &first, &last) < 0) - continue; - } - else - { - yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type); - continue; - } - if (md->type == Metadata_type_year && md->merge != Metadata_merge_range) - { - yaz_log(YLOG_WARN, "Only range merging supported for years"); - continue; - } - if (md->merge == Metadata_merge_unique) - { - struct record_metadata *mnode; - for (mnode = *wheretoput; mnode; mnode = mnode->next) - if (!strcmp((const char *) mnode->data.text, newm->data.text)) - break; - if (!mnode) - { - newm->next = *wheretoput; - *wheretoput = newm; - } - } - else if (md->merge == Metadata_merge_longest) - { - if (!*wheretoput || - strlen(newm->data.text) > strlen((*wheretoput)->data.text)) - { - *wheretoput = newm; - if (sk) - { - char *s = nmem_strdup(se->nmem, newm->data.text); - if (!cluster->sortkeys[md->sortkey_offset]) - cluster->sortkeys[md->sortkey_offset] = - nmem_malloc(se->nmem, sizeof(union data_types)); - normalize_mergekey(s, - (sk->type == Metadata_sortkey_skiparticle)); - cluster->sortkeys[md->sortkey_offset]->text = s; - } - } - } - else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no) - { - newm->next = *wheretoput; - *wheretoput = newm; - } - else if (md->merge == Metadata_merge_range) - { - assert(md->type == Metadata_type_year); - if (!*wheretoput) - { - *wheretoput = newm; - (*wheretoput)->data.number.min = first; - (*wheretoput)->data.number.max = last; - if (sk) - cluster->sortkeys[md->sortkey_offset] = &newm->data; - } - else - { - if (first < (*wheretoput)->data.number.min) - (*wheretoput)->data.number.min = first; - if (last > (*wheretoput)->data.number.max) - (*wheretoput)->data.number.max = last; - } - if (md->rank) - relevance_countwords(se->relevance, cluster, - (char *) value, md->rank); - if (md->termlist) - { - if (md->type == Metadata_type_year) - { - char year[64]; - sprintf(year, "%d", last); - add_facet(se, (char *) type, year); - if (first != last) - { - sprintf(year, "%d", first); - add_facet(se, (char *) type, year); - } - } - else - add_facet(se, (char *) type, (char *) value); - } -#endif + YAZ_CHECK(no_recs == no_merged); - //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey); - //normalize_mergekey((char *) mergekey_norm, 0); + // now sorting according to sorting criteria, here ascending title + struct reclist_sortparms *sort_parms = 0; + reclist_sortparms_insert(nmem, &sort_parms, service, "title", 1); + //reclist_sortparms_insert(nmem, &sort_parms, service, "relevance", 1); + + // crashes with a fat segmentation fault! To be traced tomorrow + //reclist_sort(list, sort_parms); + + -#if 0 - // insert one record into recordlist/cluster - what's a cluster, exactly?? - cluster = reclist_insert(list, service, record, (char *)mergekey, &total); - relevance_newrec(rel, cluster); -#endif + + //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey); + //normalize_mergekey((char *) mergekey_norm, 0); @@ -295,7 +201,7 @@ int main(int argc, char **argv) YAZ_CHECK_LOG(); - test_relevance(argc, argv); + test_relevance_7bit(argc, argv); YAZ_CHECK_TERM; -- 1.7.10.4