struct reclist_sortparms *sort_parms;
};
+static void append_merge_keys(struct record_metadata_attr **p,
+ struct record_metadata_attr *a,
+ NMEM nmem)
+{
+ while (*p)
+ p = &(*p)->next;
+ for (; a; a = a->next)
+ {
+ *p = (struct record_metadata_attr *) nmem_malloc(nmem, sizeof(**p));
+ (*p)->name = nmem_strdup_null(nmem, a->name);
+ (*p)->value = nmem_strdup_null(nmem, a->value);
+ p = &(*p)->next;
+ }
+ *p = 0;
+}
+
struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms,
struct conf_service *service)
{
struct record_cluster *reclist_insert(struct reclist *l,
struct conf_service *service,
struct record *record,
- const char *merge_key, int *total)
+ struct record_metadata_attr *merge_keys,
+ int *total)
{
- unsigned int bucket;
- struct reclist_bucket **p;
struct record_cluster *cluster = 0;
+ struct record_metadata_attr *mkl = merge_keys;
+ struct reclist_bucket **p;
assert(service);
assert(l);
assert(record);
- assert(merge_key);
+ assert(merge_keys);
assert(total);
- bucket = jenkins_hash((unsigned char*) merge_key) % l->hash_size;
-
yaz_mutex_enter(l->mutex);
- for (p = &l->hashtable[bucket]; *p; p = &(*p)->hash_next)
+
+ for (; mkl; mkl = mkl->next)
{
- // We found a matching record. Merge them
- if (!strcmp(merge_key, (*p)->record->merge_key))
- {
- struct record **re;
+ const char *merge_key = mkl->value;
+ unsigned int bucket =
+ jenkins_hash((unsigned char*) merge_key) % l->hash_size;
- cluster = (*p)->record;
- for (re = &cluster->records; *re; re = &(*re)->next)
+ for (p = &l->hashtable[bucket]; *p; p = &(*p)->hash_next)
+ {
+ struct record_metadata_attr *mkr = (*p)->record->merge_keys;
+ for (; mkr; mkr = mkr->next)
{
- if ((*re)->client == record->client &&
- record_compare(record, *re, service))
+ // We found a matching record. Merge them
+ if (!strcmp(merge_key, mkr->value))
{
- yaz_mutex_leave(l->mutex);
- return 0;
+ struct record **re;
+
+ cluster = (*p)->record;
+ for (re = &cluster->records; *re; re = &(*re)->next)
+ {
+ if ((*re)->client == record->client &&
+ record_compare(record, *re, service))
+ {
+ yaz_mutex_leave(l->mutex);
+ return 0;
+ }
+ }
+ *re = record;
+ record->next = 0;
+ goto out;
}
}
- *re = record;
- record->next = 0;
- break;
}
}
+out:
if (!cluster)
{
struct reclist_bucket *new =
new->record = cluster;
new->hash_next = 0;
cluster->records = record;
- cluster->merge_key = nmem_strdup(l->nmem, merge_key);
+
+ cluster->merge_keys = 0;
+ append_merge_keys(&cluster->merge_keys, merge_keys, l->nmem);
+
cluster->relevance_score = 0;
cluster->term_frequency_vec = 0;
- cluster->recid = nmem_strdup(l->nmem, merge_key);
+ cluster->recid = merge_keys->value;
(*total)++;
cluster->metadata =
nmem_malloc(l->nmem,
xmlDoc *xdoc,
xmlNode *root,
int record_no,
- const char *mergekey_norm);
+ struct record_metadata_attr *mergekey);
static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
int record_no, NMEM nmem,
struct session_database *sdb,
- const char **mergekey_norm)
+ struct record_metadata_attr *mergekeys)
{
int ret = 0;
struct session *se = client_get_session(cl);
record_no, sdb->database->id);
return 0;
}
- if (!*mergekey_norm)
- {
- *mergekey_norm = get_mergekey(xdoc, root, cl, record_no, service, nmem,
- se->mergekey);
- }
- if (!*mergekey_norm)
- {
- session_log(se, YLOG_WARN, "Got no mergekey for record no %d from %s",
- record_no, sdb->database->id);
- return -1;
- }
session_enter(se, "ingest_sub_record");
if (client_get_session(cl) == se && se->relevance)
- ret = ingest_to_cluster(cl, xdoc, root, record_no, *mergekey_norm);
+ ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
session_leave(se, "ingest_sub_record");
return ret;
xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
int r = 0;
xmlNode *root;
- const char *mergekey_norm = 0;
if (!xdoc)
return -1;
if (!strcmp((const char *) root->name, "cluster"))
{
- for (root = root->children; root; root = root->next)
- if (root->type == XML_ELEMENT_NODE)
+ xmlNode *sroot;
+ for (sroot = root->children; sroot; sroot = sroot->next)
+ if (sroot->type == XML_ELEMENT_NODE)
{
- r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
- &mergekey_norm);
+ const char *mergekey_norm =
+ get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
+ se->mergekey);
+
+ struct record_metadata_attr *mk = (struct record_metadata_attr*)
+ nmem_malloc(nmem, sizeof(*mk));
+ mk->name = 0;
+ mk->value = nmem_strdup(nmem, mergekey_norm);
+ mk->next = 0;
+
+ r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
+ mk);
if (r)
break;
}
}
else if (!strcmp((const char *) root->name, "record"))
{
- r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
- &mergekey_norm);
+ const char *mergekey_norm =
+ get_mergekey(xdoc, root, cl, record_no, service, nmem,
+ se->mergekey);
+ if (mergekey_norm)
+ {
+ struct record_metadata_attr *mk = (struct record_metadata_attr*)
+ nmem_malloc(nmem, sizeof(*mk));
+ mk->name = 0;
+ mk->value = nmem_strdup(nmem, mergekey_norm);
+ mk->next = 0;
+
+ r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk);
+ }
}
else
{
xmlDoc *xdoc,
xmlNode *root,
int record_no,
- const char *mergekey_norm)
+ struct record_metadata_attr *merge_keys)
{
xmlNode *n;
xmlChar *type = 0;
return -2;
}
cluster = reclist_insert(se->reclist, service, record,
- mergekey_norm, &se->total_merged);
+ merge_keys, &se->total_merged);
if (!cluster)
return 0; // complete match with existing record