PIDFILE=pz2.pid
# Start the gateway.
- ./dbc-opensearch-gw.pl -1 \
+ ../../../dbc-opensearch-gw/dbc-opensearch-gw.pl -1 \
-c dbc-opensearch-gw.cfg \
-l dbc-opensearch-gw.log \
@:9994 &
sed 's/[^0-9 ]//g' |
awk '{print FNR,$0}'> $DF.data
+grep mergeplot show.out > merge.tmp
+LINENUMBER="1"
+LAST=""
+echo "0 0 0" > merge.data
+for lno in `cat merge.tmp | cut -d ' ' -f2`
+do
+ if [ "$lno" != "$LAST" ]
+ then
+ echo "Found line $lno at $LINENUMBER"
+ grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data
+ LAST=$lno
+ LINENUMBER=$(($LINENUMBER + 1))
+ fi
+done
+echo "$LINENUMBER 0 0 0" >> merge.data
+
echo '\
gnuplot < plot.cmd
+
+echo "
+ set term png
+ set out \"cluster.png\"
+ set title \"$HEADLINE\"
+ plot \"merge.data\" using 1:3 with points title \"records\", \
+ \"merge.data\" using 1:4 with points title \"merged score\", \
+ \"merge.data\" using 1:5 with points title \"sum score\", \
+ \"merge.data\" using 1:6 with points title \"avg score\"
+" > plot.cmd
+cat plot.cmd | gnuplot
+
echo
echo "All done"
cat scores.data | cut -d' ' -f2 | sort -u
head -10 scores.data
-exit 1
+grep mergeplot show.out > merge.tmp
+LINENUMBER="1"
+LAST=""
+echo "0 0 0" > merge.data
+for lno in `cat merge.tmp | cut -d ' ' -f2`
+do
+ if [ "$lno" != "$LAST" ]
+ then
+ echo "Found line $lno at $LINENUMBER"
+ grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data
+ LAST=$lno
+ LINENUMBER=$(($LINENUMBER + 1))
+ fi
+done
+echo "$LINENUMBER 0 0 0" >> merge.data
+
+#exit 1
T1=`grep ": 1 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2`
T2=`grep ": 2 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2`
" > plot.cmd
cat plot.cmd | gnuplot
+echo "
+ set term png
+ set out \"cluster.png\"
+ set title \"$HEADLINE\"
+ plot \"merge.data\" using 1:3 with points title \"records\", \
+ \"merge.data\" using 1:4 with points title \"merged score\", \
+ \"merge.data\" using 1:5 with points title \"sum score\", \
+ \"merge.data\" using 1:6 with points title \"avg score\"
+" > plot.cmd
+cat plot.cmd | gnuplot
echo "All done"
int tfrel = relevance; // keep the old tf/idf score
int robinscore = 0;
int solrscore = 0;
- int normscore;
+ int normscore = 0;
const char *score;
const char *id;
const char *title;
i++;
}
idbuf[i] = '\0';
- if ( norm->count )
+ if ( norm->count && *score )
{
//float avg = norm->sum / norm->count;
normscore = 10000.0 * ( atof(score) / norm->max );
wrbuf_printf(w, "normscore: score(%s) / max(%f) *10000 = %d\n",
score, norm->max, normscore);
} else
- yaz_log(YLOG_LOG, "normscore: no count, can not normalize %s ", score );
+ yaz_log(YLOG_LOG, "normscore: no count, can not normalize score '%s' ", score );
// If we have a score in the best record, we probably have in them all
// and we can try to merge scores
if ( *score ) {
float scores[nclust];
float s = 0.0;
+ float sum = 0.0;
int i=0;
if ( rec->records && rec->records->next )
{ // have more than one record
{
scores[i] = atof( getfield(record,"score") );
yaz_log(YLOG_LOG,"mergescore %d: %f", i, scores[i] );
- wrbuf_printf(w,"mergeplot %d: %f x\n", clusternumber, 10000*scores[i] );
+ wrbuf_printf(w,"mergeplot %d %f x\n", clusternumber, 10000*scores[i] );
}
qsort(scores, nclust, sizeof(float), sort_float );
for (i = 0; i<nclust; i++)
yaz_log(YLOG_LOG,"Sorted mergescore %d: %f + %f/%d = %f", i, s,scores[i],i+1, s+scores[i] / (i+1) );
wrbuf_printf(w,"Sorted mergescore %d: %f + %f/%d = %f\n", i, s,scores[i],i+1, s+scores[i] / (i+1));
s += scores[i] / (i+1);
+ sum += scores[i];
}
mergescore = s * 10000;
+ wrbuf_printf(w,"mergeplot %d x %d %f %f %d\n", clusternumber, mergescore,
+ 10000.0*sum, 10000.0*sum/nclust, nclust );
+ yaz_log(YLOG_LOG,"mergeplot %d x %d %f %f %d", clusternumber, mergescore,
+ 10000.0*sum, 10000.0*sum/nclust, nclust );
}
else
- { // only one record, take the easy way out of merging
+ { // only one record, take the easy way out of merging (and don't bother plotting)
mergescore = atof( score ) * 10000;
}
- wrbuf_printf(w,"mergeplot %d: x %d \n", clusternumber, mergescore );
- // TODO - Should not use bestrecord->position, but something from rec that
- // corresponds to the hit number, for plotting.
} // merge score
id = getfield(bestrecord, "id");
// clear the id, we only want the first numerical part