return "";
}
+void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)
+{
+ int i;
+
+ // Find the best record in a cluster - the one with lowest position
+ // (in this proto. Later, find a better one)
+ struct record *bestrecord = 0;
+ struct record *record;
+ struct normalizing *n;
+ float score;
+ for (record = cluster->records; record; record = record->next)
+ if ( bestrecord == 0 || bestrecord->position < record->position )
+ bestrecord = record;
+ n = findnorm(r,bestrecord->client);
+ n->count ++;
+ score = atof( getfield(bestrecord,"score") );
+ n->sum += score;
+ if ( n->max < score )
+ n->max = score;
+
+ for (i = 1; i < r->vec_len; i++)
+ if (cluster->term_frequency_vec[i] > 0)
+ r->doc_frequency_vec[i]++;
+
+ r->doc_frequency_vec[0]++;
+}
+
+
+ // Helper to compare floats, for qsort
+ static int sort_float(const void *x, const void *y)
+ {
+ const float *fx = x;
+ const float *fy = y;
- return *fx - *fy;
++ //yaz_log(YLOG_LOG,"sorting %f and %f", *fx, *fy); // ###
++ if ( *fx > *fy )
++ return 1;
++ if ( *fx < *fy )
++ return -1;
++ return 0; // do not return *fx-*fy, it is often too close to zero.
+ }
+
// Prepare for a relevance-sorted read
void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
enum conf_sortkey_type type)
int i;
float *idfvec = xmalloc(rel->vec_len * sizeof(float));
int n_clients = clients_count();
- struct client * clients[n_clients];
+ int clusternumber = 0;
+ yaz_log(YLOG_LOG,"round-robin: have %d clients", n_clients);
- for (i = 0; i < n_clients; i++)
- clients[i] = 0;
-
reclist_enter(reclist);
// Calculate document frequency vector for each term.
// get the log entries
if (type == Metadata_sortkey_relevance_h) {
struct record *record;
- int thisclient = 0;
+ struct normalizing *norm;
struct record *bestrecord = 0;
int nclust = 0;
- int tfrel = relevance; // keep the old tf/idf score;
- int robinscore;
- int solrscore;
+ int tfrel = relevance; // keep the old tf/idf score
+ int robinscore = 0;
+ int solrscore = 0;
+ int normscore;
+ const char *score;
+ const char *id;
+ const char *title;
+ char idbuf[64];
+ int mergescore = 0;
// Find the best record in a cluster - the one with lowest position
for (record = rec->records; record; record = record->next) {
if ( bestrecord == 0 || bestrecord->position < record->position )
bestrecord = record;
nclust++; // and count them all, for logging
}
- // find the client number for the record (we only have a pointer
- while ( clients[thisclient] != 0
- && clients[thisclient] != bestrecord->client )
- thisclient++;
- if ( clients[thisclient] == 0 )
- {
- yaz_log(YLOG_LOG,"round-robin: found new client at %d: p=%p\n", thisclient, bestrecord->client);
- clients[thisclient] = bestrecord->client;
- }
+ norm = findnorm(rel, bestrecord->client);
// Calculate a round-robin score
- robinscore = -(bestrecord->position * n_clients + thisclient) ;
+ robinscore = -(bestrecord->position * n_clients + norm->num) ;
wrbuf_printf(w,"round-robin score: pos=%d client=%d ncl=%d tfscore=%d score=%d\n",
- bestrecord->position, thisclient, nclust, tfrel, relevance );
+ bestrecord->position, norm->num, nclust, tfrel, relevance );
yaz_log(YLOG_LOG,"round-robin score: pos=%d client=%d ncl=%d score=%d",
- bestrecord->position, thisclient, nclust, relevance );
+ bestrecord->position, norm->num, nclust, relevance );
// Check if the record has a score field
+ score = getfield(bestrecord,"score");
++ id = getfield(bestrecord, "id");
++ title = getfield(bestrecord, "title");
+ solrscore = 10000.0 * atof(score);
-
++ // clear the id, we only want the first numerical part
++ i=0;
++ while( id[i] >= '0' && id[i] <= '9' ) {
++ idbuf[i] = id[i];
++ i++;
++ }
++ idbuf[i] = '\0';
++ if ( norm->count )
+ {
- const char *score = getfield(bestrecord,"score");
- const char *id = getfield(bestrecord, "id");
- const char *title = getfield(bestrecord, "title");
- // clear the id, we only want the first numerical part
- char idbuf[64];
- solrscore = 10000.0 * atof(score);
- i=0;
- while( id[i] >= '0' && id[i] <= '9' ) {
- idbuf[i] = id[i];
- i++;
++ //float avg = norm->sum / norm->count;
++ normscore = 10000.0 * ( atof(score) / norm->max );
++ wrbuf_printf(w, "normscore: score(%s) / max(%f) *10000 = %d\n",
++ score, norm->max, normscore);
++ } else
++ yaz_log(YLOG_LOG, "normscore: no count, can not normalize %s ", score );
++
+ // If we have a score in the best record, we probably have in them all
+ // and we can try to merge scores
+ if ( *score ) {
+ float scores[nclust];
+ float s = 0.0;
+ int i=0;
- for (record = rec->records; record; record = record->next, i++) {
- scores[i] = atof( getfield(record,"score") );
- yaz_log(YLOG_LOG,"mergescore %d: %f", i, scores[i] );
- wrbuf_printf(w,"mergeplot %d: %f x\n", clusternumber, 10000*scores[i] );
++ if ( rec->records && rec->records->next )
++ { // have more than one record
++ for (record = rec->records; record; record = record->next, i++)
++ {
++ scores[i] = atof( getfield(record,"score") );
++ yaz_log(YLOG_LOG,"mergescore %d: %f", i, scores[i] );
++ wrbuf_printf(w,"mergeplot %d: %f x\n", clusternumber, 10000*scores[i] );
++ }
++ qsort(scores, nclust, sizeof(float), sort_float );
++ for (i = 0; i<nclust; i++)
++ {
++ yaz_log(YLOG_LOG,"Sorted mergescore %d: %f + %f/%d = %f", i, s,scores[i],i+1, s+scores[i] / (i+1) );
++ wrbuf_printf(w,"Sorted mergescore %d: %f + %f/%d = %f\n", i, s,scores[i],i+1, s+scores[i] / (i+1));
++ s += scores[i] / (i+1);
++ }
++ mergescore = s * 10000;
}
- idbuf[i] = '\0';
- if ( norm->count )
- {
- float avg = norm->sum / norm->count;
- normscore = 10000.0 * ( atof(score) / norm->max );
- wrbuf_printf(w, "normscore: score(%s) / max(%f) *10000 = %d\n",
- score, norm->max, normscore);
- } else
- yaz_log(YLOG_LOG, "normscore: no count, can not normalize %s ", score );
-
- wrbuf_printf(w,"plotline: %d %d %d %d %d %d # %s %s\n",
- norm->num, bestrecord->position,
- tfrel, robinscore, solrscore, normscore, idbuf, title );
- qsort(scores, nclust, sizeof(float), sort_float );
- for (i = 0; i<nclust; i++) {
- s += scores[i] / (i+1);
- yaz_log(YLOG_LOG,"Sorted mergescore %d: %f makes %f", i, scores[i], s );
- wrbuf_printf(w,"Sorted mergescore %d: %f makes %f\n", i, scores[i], s );
++ else
++ { // only one record, take the easy way out of merging
++ mergescore = atof( score ) * 10000;
+ }
- mergescore = s * 10000;
+ wrbuf_printf(w,"mergeplot %d: x %d \n", clusternumber, mergescore );
+ // TODO - Should not use bestrecord->position, but something from rec that
+ // corresponds to the hit number, for plotting.
+ } // merge score
+ id = getfield(bestrecord, "id");
+ // clear the id, we only want the first numerical part
+ i=0;
+ while( id[i] >= '0' && id[i] <= '9' ) {
+ idbuf[i] = id[i];
+ i++;
}
- relevance = normscore; // ###
+ idbuf[i] = '\0';
+
+ title = getfield(bestrecord, "title");
- wrbuf_printf(w,"plotline: %d %d %d %d %d %d # %s %s\n",
- thisclient, bestrecord->position,
- tfrel, robinscore, solrscore, mergescore, idbuf, title );
++ wrbuf_printf(w,"plotline: %d %d %d %d %d %d %d # %s %s\n",
++ norm->num, bestrecord->position,
++ tfrel, robinscore, solrscore, normscore, mergescore, idbuf, title );
+ relevance = mergescore;
}
rec->relevance_score = relevance;
}