-/* $Id: test_relevance.c,v 1.9 2007-04-25 07:00:33 marc Exp $
+/* $Id: test_relevance.c,v 1.10 2007-04-25 13:28:55 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
#include "reclists.h"
-void test_relevance(int argc, char **argv)
+void test_relevance_7bit(int argc, char **argv)
{
NMEM nmem = nmem_create();
- int numrecs = 10;
- const char * queryterms[] =
- {"abe", "fisk", 0};
- // {"ål", "økologi", "æble", 0};
-
- //struct record_cluster *cluster = 0;
struct conf_service *service = 0;
- struct reclist *list = 0;
- struct record *record = 0;
- //const char *mergekey = "amergekey";
- //int total = 0;
-
- struct relevance *rel = 0;
- //struct client *client = 0;
-
-
- rel = relevance_create(nmem, queryterms, numrecs);
- YAZ_CHECK(rel);
-
- list = reclist_create(nmem, numrecs);
- YAZ_CHECK(list);
+ service = conf_service_create(nmem, 1, 2);
- service = conf_service_create(nmem, 4, 3);
- YAZ_CHECK(service);
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title",
+ conf_service_add_metadata(nmem, service, 0, "title",
Metadata_type_generic, Metadata_merge_unique,
- 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author",
- Metadata_type_generic, Metadata_merge_longest,
- 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn",
- Metadata_type_number, Metadata_merge_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0);
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year",
- Metadata_type_year, Metadata_merge_range,
- 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 0, "relevance",
- Metadata_sortkey_relevance));
-
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 1, "title",
- Metadata_sortkey_string));
+ conf_service_add_sortkey(nmem, service, 0, "relevance",
+ Metadata_sortkey_relevance);
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 2, "year",
- Metadata_sortkey_numeric));
+ conf_service_add_sortkey(nmem, service, 1, "title",
+ Metadata_sortkey_string);
- // testing record things
- record = record_create(nmem, 4, 3);
- YAZ_CHECK(record);
-
+ // setting up records
+
// why on earth do we have a client dangeling from the record ??
// record->client = client;
- char * bla = "blabla";
- union data_types data_text;
- data_text.text = bla;
+ union data_types data_ape = {"ape"};
+ union data_types data_bee = {"bee"};
+ union data_types data_fish = {"fish"};
+ union data_types data_zebra = {"zebra"};
+
+ //union data_types data_year;
+ //data_num.number.min = 2005;
+ //data_num.number.max = 2007;
+
+ int no_recs = 4;
+
+ const char *mk_ape_fish = "ape fish";
+ struct record *rec_ape_fish = 0;
+ rec_ape_fish
+ = record_create(nmem, service->num_metadata, service->num_sortkeys);
+ record_add_metadata(nmem, rec_ape_fish, service, "title", data_ape);
+ record_assign_sortkey(nmem, rec_ape_fish, service, "relevance", data_ape);
+ record_assign_sortkey(nmem, rec_ape_fish, service, "title", data_ape);
+ record_add_metadata(nmem, rec_ape_fish, service, "title", data_fish);
+ YAZ_CHECK(rec_ape_fish);
+
+ const char *mk_bee_fish = "bee fish";
+ struct record *rec_bee_fish = 0;
+ rec_bee_fish
+ = record_create(nmem, service->num_metadata, service->num_sortkeys);
+ record_add_metadata(nmem, rec_bee_fish, service, "title", data_bee);
+ record_assign_sortkey(nmem, rec_bee_fish, service, "relevance", data_bee);
+ record_assign_sortkey(nmem, rec_bee_fish, service, "title", data_bee);
+ record_add_metadata(nmem, rec_bee_fish, service, "title", data_fish);
+ YAZ_CHECK(rec_bee_fish);
+
+ const char *mk_fish_bee = "fish bee";
+ struct record *rec_fish_bee = 0;
+ rec_fish_bee
+ = record_create(nmem, service->num_metadata, service->num_sortkeys);
+ record_add_metadata(nmem, rec_fish_bee, service, "title", data_fish);
+ record_assign_sortkey(nmem, rec_fish_bee, service, "relevance", data_fish);
+ record_assign_sortkey(nmem, rec_fish_bee, service, "title", data_fish);
+ record_add_metadata(nmem, rec_fish_bee, service, "title", data_bee);
+ YAZ_CHECK(rec_fish_bee);
- union data_types data_num;
- data_num.number.min = 2;
- data_num.number.max = 5;
+ const char *mk_zebra_bee = "zebra bee";
+ struct record *rec_zebra_bee = 0;
+ rec_zebra_bee
+ = record_create(nmem, service->num_metadata, service->num_sortkeys);
+ record_add_metadata(nmem, rec_zebra_bee, service, "title", data_zebra);
+ record_assign_sortkey(nmem, rec_zebra_bee, service, "relevance", data_zebra);
+ record_assign_sortkey(nmem, rec_zebra_bee, service, "title", data_zebra);
+ record_add_metadata(nmem, rec_zebra_bee, service, "title", data_bee);
+ YAZ_CHECK(rec_zebra_bee);
- struct record_metadata * tmp_md = 0;
- tmp_md = record_metadata_insert(nmem, &(record->metadata[0]), data_text);
- YAZ_CHECK(tmp_md);
- tmp_md = record_metadata_insert(nmem, &tmp_md, data_text);
- YAZ_CHECK(tmp_md);
+
+ struct reclist *list = 0;
+ list = reclist_create(nmem, no_recs);
+ YAZ_CHECK(list);
- YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num));
- YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num));
+ int no_merged = 0;
- YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text));
- YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text));
+ const char * queryterms[] =
+ {"ape", "fish", 0};
+ // {"ål", "økologi", "æble", 0};
- YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 0, data_text));
- YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 1, data_text));
- YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 2, data_num));
- YAZ_CHECK(record_assign_sortkey(nmem, record, service, "relevance", data_text));
- YAZ_CHECK(record_assign_sortkey(nmem, record, service, "title", data_text));
- YAZ_CHECK(record_assign_sortkey(nmem, record, service, "year", data_num));
+ struct relevance *rel = 0;
+ rel = relevance_create(nmem, queryterms, no_recs);
+ YAZ_CHECK(rel);
+
+ struct record_cluster *cluster = 0;
-
-
+ // insert records into recordlist and get clusters
+ // since metadata keys differ, we get multiple clusters ??
+ cluster
+ = reclist_insert(list, service, rec_ape_fish, mk_ape_fish, &no_merged);
+ YAZ_CHECK(cluster);
+ relevance_newrec(rel, cluster);
+ cluster
+ = reclist_insert(list, service, rec_bee_fish, mk_bee_fish, &no_merged);
+ YAZ_CHECK(cluster);
+ relevance_newrec(rel, cluster);
+ cluster
+ = reclist_insert(list, service, rec_fish_bee, mk_fish_bee, &no_merged);
+ YAZ_CHECK(cluster);
+ relevance_newrec(rel, cluster);
- // now we need to put some actual data into the record ... how ??
- // there is a hell of a lot spagetti code in logic.c ingest_record()
- // line 694 ff snippet from there:
- // this code needs to be analyzed and the fundamental stuff extracted
+ cluster
+ = reclist_insert(list, service, rec_zebra_bee, mk_zebra_bee, &no_merged);
+ YAZ_CHECK(cluster);
+ relevance_newrec(rel, cluster);
-#if 0
- service->metadata[imeta].name;
- md = &service->metadata[imeta];
- if (md->sortkey_offset >= 0)
- sk = &service->sortkeys[md->sortkey_offset];
-
- // Find out where we are putting it if (md->merge == Metadata_merge_no)
- wheretoput = &res->metadata[imeta];
- else
- wheretoput = &cluster->metadata[imeta];
-
-
- // Put it there
- newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
- newm->next = 0;
- if (md->type == Metadata_type_generic)
- {
- char *p, *pe;
- for (p = (char *) value; *p && isspace(*p); p++)
- ;
- for (pe = p + strlen(p) - 1;
- pe > p && strchr(" ,/.:([", *pe); pe--)
- *pe = '\0';
- newm->data.text = nmem_strdup(se->nmem, p);
-
- }
- else if (md->type == Metadata_type_year)
- {
- if (extract_years((char *) value, &first, &last) < 0)
- continue;
- }
- else
- {
- yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
- continue;
- }
- if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
- {
- yaz_log(YLOG_WARN, "Only range merging supported for years");
- continue;
- }
- if (md->merge == Metadata_merge_unique)
- {
- struct record_metadata *mnode;
- for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp((const char *) mnode->data.text, newm->data.text))
- break;
- if (!mnode)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- }
- else if (md->merge == Metadata_merge_longest)
- {
- if (!*wheretoput ||
- strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- {
- *wheretoput = newm;
- if (sk)
- {
- char *s = nmem_strdup(se->nmem, newm->data.text);
- if (!cluster->sortkeys[md->sortkey_offset])
- cluster->sortkeys[md->sortkey_offset] =
- nmem_malloc(se->nmem, sizeof(union data_types));
- normalize_mergekey(s,
- (sk->type == Metadata_sortkey_skiparticle));
- cluster->sortkeys[md->sortkey_offset]->text = s;
- }
- }
- }
- else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- else if (md->merge == Metadata_merge_range)
- {
- assert(md->type == Metadata_type_year);
- if (!*wheretoput)
- {
- *wheretoput = newm;
- (*wheretoput)->data.number.min = first;
- (*wheretoput)->data.number.max = last;
- if (sk)
- cluster->sortkeys[md->sortkey_offset] = &newm->data;
- }
- else
- {
- if (first < (*wheretoput)->data.number.min)
- (*wheretoput)->data.number.min = first;
- if (last > (*wheretoput)->data.number.max)
- (*wheretoput)->data.number.max = last;
- }
- if (md->rank)
- relevance_countwords(se->relevance, cluster,
- (char *) value, md->rank);
- if (md->termlist)
- {
- if (md->type == Metadata_type_year)
- {
- char year[64];
- sprintf(year, "%d", last);
- add_facet(se, (char *) type, year);
- if (first != last)
- {
- sprintf(year, "%d", first);
- add_facet(se, (char *) type, year);
- }
- }
- else
- add_facet(se, (char *) type, (char *) value);
- }
-#endif
+ YAZ_CHECK(no_recs == no_merged);
- //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
- //normalize_mergekey((char *) mergekey_norm, 0);
+ // now sorting according to sorting criteria, here ascending title
+ struct reclist_sortparms *sort_parms = 0;
+ reclist_sortparms_insert(nmem, &sort_parms, service, "title", 1);
+ //reclist_sortparms_insert(nmem, &sort_parms, service, "relevance", 1);
+
+ // crashes with a fat segmentation fault! To be traced tomorrow
+ //reclist_sort(list, sort_parms);
+
+
-#if 0
- // insert one record into recordlist/cluster - what's a cluster, exactly??
- cluster = reclist_insert(list, service, record, (char *)mergekey, &total);
- relevance_newrec(rel, cluster);
-#endif
+
+ //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
+ //normalize_mergekey((char *) mergekey_norm, 0);
YAZ_CHECK_LOG();
- test_relevance(argc, argv);
+ test_relevance_7bit(argc, argv);
YAZ_CHECK_TERM;