Clear relevancy info when result set is clear'd
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 6 May 2013 14:00:04 +0000 (16:00 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 6 May 2013 14:00:04 +0000 (16:00 +0200)
src/relevance.c
src/relevance.h
src/session.c
test/test_http_89.res
test/test_http_90.res

index b08d217..0551980 100644 (file)
@@ -215,6 +215,15 @@ static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
         break;
     }
 }
+void relevance_clear(struct relevance *r)
+{
+    if (r)
+    {
+        int i;
+        for (i = 0; i < r->vec_len; i++)
+            r->doc_frequency_vec[i] = 0;
+    }
+}
 
 struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
                                        struct ccl_rpn_node *query,
@@ -224,7 +233,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
 {
     NMEM nmem = nmem_create();
     struct relevance *res = nmem_malloc(nmem, sizeof(*res));
-    int i;
 
     res->nmem = nmem;
     res->entries = 0;
@@ -238,8 +246,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
     pull_terms(res, query);
 
     res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
-    for (i = 0; i < res->vec_len; i++)
-        res->doc_frequency_vec[i] = 0;
 
     // worker array
     res->term_frequency_vec_tmp =
@@ -249,6 +255,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
     res->term_pos =
         nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
 
+    relevance_clear(res);
     return res;
 }
 
index 1337b60..5a095eb 100644 (file)
@@ -32,6 +32,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
                                        struct ccl_rpn_node *query,
                                        int rank_cluster, double follow_factor,
                                        double lead_decay, int length_divide);
+void relevance_clear(struct relevance *r);
 void relevance_destroy(struct relevance **rp);
 void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
index 187ae38..400dd04 100644 (file)
@@ -631,6 +631,8 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
     se->total_records = se->total_merged = 0;
     se->num_termlists = 0;
 
+    relevance_clear(se->relevance);
+
     /* reset list of sorted results and clear to relevance search */
     se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
     se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
index 9c542be..ec81a10 100644 (file)
@@ -75,7 +75,7 @@
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>4</count>
- <relevance>43178</relevance>
+ <relevance>41119</relevance>
  <relevance_info>
 field=title content=Computer science &amp;amp; technology :;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
@@ -90,9 +90,9 @@ field=title content=A plan for community college computer development.;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
 computer: tf[1] += w[1](6) / length(7) (4.314286);
 relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.100083) (43178);
-score = relevance(43178);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.095310) (41119);
+score = relevance(41119);
  </relevance_info>
  <recid>content: date</recid>
 </hit>
@@ -176,7 +176,7 @@ score = relevance(43178);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>4</count>
- <relevance>34361</relevance>
+ <relevance>32723</relevance>
  <relevance_info>
 field=title content=Computer processing of dynamic images from an Ang ...;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
@@ -188,9 +188,9 @@ field=title-remainder content=a portfolio of thematic computer maps /;
 computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](5) / length(6) (3.433333);
 relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.100083) (34361);
-score = relevance(34361);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.095310) (32723);
+score = relevance(32723);
  </relevance_info>
  <recid>content: date c</recid>
 </hit>
@@ -206,15 +206,15 @@ score = relevance(34361);
         YYYYYYYYY</md-test-usersetting-2>
  </location>
  <count>1</count>
- <relevance>12010</relevance>
+ <relevance>11437</relevance>
  <relevance_info>
 field=title content=How to program a computer;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](6) / length(5) (1.200000);
 relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010);
-score = relevance(12010);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437);
+score = relevance(11437);
  </relevance_info>
  <recid>position: z3950.indexdata.com/marc-1</recid>
 </hit>
@@ -230,15 +230,15 @@ score = relevance(12010);
         YYYYYYYYY</md-test-usersetting-2>
  </location>
  <count>1</count>
- <relevance>12010</relevance>
+ <relevance>11437</relevance>
  <relevance_info>
 field=title content=How to program a computer;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](6) / length(5) (1.200000);
 relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010);
-score = relevance(12010);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437);
+score = relevance(11437);
  </relevance_info>
  <recid>position: z3950.indexdata.com/marc-2</recid>
 </hit>
index eee953c..837a9ff 100644 (file)
@@ -25,7 +25,7 @@
         YYYYYYYYY</md-test-usersetting-2>
  </location>
  <count>2</count>
- <relevance>24427</relevance>
+ <relevance>48160</relevance>
  <relevance_info>
 field=title content=How to program a computer;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
@@ -34,9 +34,9 @@ field=title content=How to program a computer;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](6) / length(5) (2.400000);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.101783) (24427);
-score = relevance(24427);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.200671) (48160);
+score = relevance(48160);
  </relevance_info>
  <recid>content: title how to program a computer author jack collins medium book</recid>
 </hit>
@@ -59,15 +59,15 @@ score = relevance(24427);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>20356</relevance>
+ <relevance>40134</relevance>
  <relevance_info>
 field=title content=Computer science &amp;amp; technology :;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
 computer: tf[1] += w[1](6) / length(3) (2.000000);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356);
-score = relevance(20356);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134);
+score = relevance(40134);
  </relevance_info>
  <recid>content: title computer science technology author medium book</recid>
 </hit>
@@ -90,15 +90,15 @@ score = relevance(20356);
         YYYYYYYYY</md-test-usersetting-2>
  </location>
  <count>1</count>
- <relevance>20356</relevance>
+ <relevance>40134</relevance>
  <relevance_info>
 field=title content=The Computer Bible /;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(1)));
 computer: tf[1] += w[1](6) / length(3) (2.000000);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356);
-score = relevance(20356);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134);
+score = relevance(40134);
  </relevance_info>
  <recid>content: title the computer bible author medium book</recid>
 </hit>
@@ -126,15 +126,15 @@ score = relevance(20356);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>8724</relevance>
+ <relevance>17200</relevance>
  <relevance_info>
 field=title content=A plan for community college computer development.;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
 computer: tf[1] += w[1](6) / length(7) (0.857143);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724);
-score = relevance(8724);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200);
+score = relevance(17200);
  </relevance_info>
  <recid>content: title a plan for community college computer development author medium book</recid>
 </hit>
@@ -161,15 +161,15 @@ score = relevance(8724);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>8724</relevance>
+ <relevance>17200</relevance>
  <relevance_info>
 field=title content=Washington metropolitan area rail computer feasib ...;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](6) / length(7) (0.857143);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724);
-score = relevance(8724);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200);
+score = relevance(17200);
  </relevance_info>
  <recid>content: title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
 </hit>
@@ -199,15 +199,15 @@ score = relevance(8724);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>8481</relevance>
+ <relevance>16722</relevance>
  <relevance_info>
 field=title-remainder content=a portfolio of thematic computer maps /;
 computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
 computer: tf[1] += w[1](5) / length(6) (0.833333);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.101783) (8481);
-score = relevance(8481);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.200671) (16722);
+score = relevance(16722);
  </relevance_info>
  <recid>content: title the puget sound region author mairs john w medium book</recid>
 </hit>
@@ -242,15 +242,15 @@ score = relevance(8481);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>6106</relevance>
+ <relevance>12040</relevance>
  <relevance_info>
 field=title content=Computer processing of dynamic images from an Ang ...;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
 computer: tf[1] += w[1](6) / length(10) (0.600000);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106);
-score = relevance(6106);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040);
+score = relevance(12040);
  </relevance_info>
  <recid>content: title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
 </hit>
@@ -273,15 +273,15 @@ score = relevance(6106);
   <md-subjects>PAZPAR2_NULL_c</md-subjects>
  </location>
  <count>1</count>
- <relevance>6106</relevance>
+ <relevance>12040</relevance>
  <relevance_info>
 field=title content=The use of passwords for controlled access to com ...;
 computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(8)));
 computer: tf[1] += w[1](6) / length(10) (0.600000);
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106);
-score = relevance(6106);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040);
+score = relevance(12040);
  </relevance_info>
  <recid>content: title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
 </hit>
@@ -309,8 +309,8 @@ score = relevance(6106);
  <relevance>0</relevance>
  <relevance_info>
 relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.101783) (0);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.200671) (0);
 score = relevance(0);
  </relevance_info>
  <recid>content: title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>