Merge scores, also for DBC output
authorHeikki Levanto <heikki@indexdata.dk>
Wed, 4 Dec 2013 14:15:51 +0000 (15:15 +0100)
committerHeikki Levanto <heikki@indexdata.dk>
Wed, 4 Dec 2013 14:15:51 +0000 (15:15 +0100)
heikki/dbc-os/test2.cfg
heikki/dbc-os/test2.sh
heikki/solr/test3.sh
src/relevance.c

index 86540f5..6523dd5 100644 (file)
         <metadata name="available"/>
         <metadata name="due"/>
         <metadata name="thumburl" brief="yes" merge="unique"/>
+        <metadata name="score" brief="yes" />
 
     </service>
 
index bb96855..ec9f381 100755 (executable)
@@ -27,7 +27,7 @@ fi
 PIDFILE=pz2.pid
 
 # Start the gateway.
-  ./dbc-opensearch-gw.pl -1 \
+  ../../../dbc-opensearch-gw/dbc-opensearch-gw.pl -1 \
       -c dbc-opensearch-gw.cfg \
       -l dbc-opensearch-gw.log \
       @:9994 &
@@ -98,6 +98,22 @@ grep "round-robin" show.out |
   sed 's/[^0-9 ]//g' |
   awk '{print FNR,$0}'> $DF.data
 
+grep mergeplot show.out > merge.tmp
+LINENUMBER="1"
+LAST=""
+echo "0 0 0" > merge.data
+for lno in `cat merge.tmp | cut -d ' ' -f2`
+do
+  if [ "$lno" != "$LAST" ]
+  then
+    echo "Found line $lno at $LINENUMBER"
+    grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data
+    LAST=$lno
+    LINENUMBER=$(($LINENUMBER + 1))
+  fi
+done
+echo "$LINENUMBER 0 0 0" >> merge.data
+
 
 
 echo '\
@@ -115,6 +131,18 @@ echo "0 notitle" >> plot.cmd
 
 gnuplot < plot.cmd
 
+
+echo "
+  set term png
+  set out \"cluster.png\"
+  set title \"$HEADLINE\"
+  plot \"merge.data\" using 1:3 with points title \"records\", \
+       \"merge.data\" using 1:4 with points title \"merged score\", \
+       \"merge.data\" using 1:5 with points title \"sum score\", \
+       \"merge.data\" using 1:6 with points title \"avg score\"
+" > plot.cmd
+cat plot.cmd | gnuplot
+
 echo
 
 echo "All done"
index 649c79c..f117635 100755 (executable)
@@ -97,7 +97,23 @@ echo "Client numbers"
 cat scores.data | cut -d' ' -f2 | sort -u
 head -10 scores.data
 
-exit 1
+grep mergeplot show.out > merge.tmp
+LINENUMBER="1"
+LAST=""
+echo "0 0 0" > merge.data
+for lno in `cat merge.tmp | cut -d ' ' -f2`
+do
+  if [ "$lno" != "$LAST" ]
+  then
+    echo "Found line $lno at $LINENUMBER"
+    grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data
+    LAST=$lno
+    LINENUMBER=$(($LINENUMBER + 1))
+  fi
+done
+echo "$LINENUMBER 0 0 0" >> merge.data
+
+#exit 1
 
 T1=`grep ": 1 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2`
 T2=`grep ": 2 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2`
@@ -119,6 +135,16 @@ echo "
 " > plot.cmd
 cat plot.cmd | gnuplot
 
+echo "
+  set term png
+  set out \"cluster.png\"
+  set title \"$HEADLINE\"
+  plot \"merge.data\" using 1:3 with points title \"records\", \
+       \"merge.data\" using 1:4 with points title \"merged score\", \
+       \"merge.data\" using 1:5 with points title \"sum score\", \
+       \"merge.data\" using 1:6 with points title \"avg score\"
+" > plot.cmd
+cat plot.cmd | gnuplot
 
 
 echo "All done"
index 5450cae..b403048 100644 (file)
@@ -510,7 +510,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
             int tfrel = relevance; // keep the old tf/idf score
             int robinscore = 0;
             int solrscore = 0;
-            int normscore;
+            int normscore = 0;
             const char *score;
             const char *id;
             const char *title;
@@ -542,20 +542,21 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
                 i++;
             }
             idbuf[i] = '\0';
-            if ( norm->count )
+            if ( norm->count && *score )
             {
                 //float avg = norm->sum / norm->count;
                 normscore = 10000.0 * (  atof(score) / norm->max );
                 wrbuf_printf(w, "normscore: score(%s) / max(%f) *10000 = %d\n",
                         score, norm->max, normscore);
             } else
-                yaz_log(YLOG_LOG, "normscore: no count, can not normalize %s ", score );
+                yaz_log(YLOG_LOG, "normscore: no count, can not normalize score '%s' ", score );
 
             // If we have a score in the best record, we probably have in them all
             // and we can try to merge scores
             if ( *score ) {
                 float scores[nclust];
                 float s = 0.0;
+                float sum = 0.0;
                 int i=0;
                 if ( rec->records && rec->records->next ) 
                 { // have more than one record
@@ -563,7 +564,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
                     {
                         scores[i] = atof( getfield(record,"score") );
                         yaz_log(YLOG_LOG,"mergescore %d: %f", i, scores[i] );
-                        wrbuf_printf(w,"mergeplot %d: %f x\n", clusternumber, 10000*scores[i] );
+                        wrbuf_printf(w,"mergeplot %d  %f x\n", clusternumber, 10000*scores[i] );
                     }
                     qsort(scores, nclust, sizeof(float), sort_float );
                     for (i = 0; i<nclust; i++)
@@ -571,16 +572,18 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
                         yaz_log(YLOG_LOG,"Sorted mergescore %d: %f + %f/%d = %f", i, s,scores[i],i+1, s+scores[i] / (i+1) );
                         wrbuf_printf(w,"Sorted mergescore %d: %f + %f/%d = %f\n",  i, s,scores[i],i+1, s+scores[i] / (i+1));
                         s += scores[i] / (i+1);
+                        sum += scores[i];
                     }
                     mergescore = s * 10000;
+                    wrbuf_printf(w,"mergeplot %d  x %d %f %f %d\n", clusternumber, mergescore,
+                        10000.0*sum, 10000.0*sum/nclust, nclust );
+                    yaz_log(YLOG_LOG,"mergeplot %d  x %d %f %f %d", clusternumber, mergescore,
+                        10000.0*sum, 10000.0*sum/nclust, nclust );
                 }
                 else
-                { // only one record, take the easy way out of merging
+                { // only one record, take the easy way out of merging (and don't bother plotting)
                     mergescore = atof( score ) * 10000;
                 }
-                wrbuf_printf(w,"mergeplot %d: x %d \n", clusternumber, mergescore );
-                // TODO - Should not use bestrecord->position, but something from rec that
-                // corresponds to the hit number, for plotting.
             } // merge score
             id = getfield(bestrecord, "id");
             // clear the id, we only want the first numerical part