From 9ca612132e1ef66f43d61e5cefa854ba18c6e1a6 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 19 Jun 2002 08:28:55 +0000 Subject: [PATCH] added dmoz --- test/Makefile.am | 2 +- test/dmoz/all.sh | 5 +++++ test/dmoz/con1.pl | 29 +++++++++++++++++++++++++++++ test/dmoz/con2.pl | 41 +++++++++++++++++++++++++++++++++++++++++ test/dmoz/fetch.sh | 5 +++++ test/dmoz/plot.dem | 12 ++++++++++++ test/dmoz/update.sh | 16 ++++++++++++++++ test/dmoz/zebra-b.cfg | 17 +++++++++++++++++ test/dmoz/zebra-c.cfg | 17 +++++++++++++++++ 9 files changed, 143 insertions(+), 1 deletion(-) create mode 100755 test/dmoz/all.sh create mode 100755 test/dmoz/con1.pl create mode 100755 test/dmoz/con2.pl create mode 100755 test/dmoz/fetch.sh create mode 100755 test/dmoz/plot.dem create mode 100755 test/dmoz/update.sh create mode 100644 test/dmoz/zebra-b.cfg create mode 100644 test/dmoz/zebra-c.cfg diff --git a/test/Makefile.am b/test/Makefile.am index 2a440d3..facee4d 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS=gils usmarc api +SUBDIRS=gils usmarc api dmoz diff --git a/test/dmoz/all.sh b/test/dmoz/all.sh new file mode 100755 index 0000000..36da4e8 --- /dev/null +++ b/test/dmoz/all.sh @@ -0,0 +1,5 @@ +#!/bin/sh +rm zebraidx.log +./update.sh b +./update.sh c +gnuplot plot.dem diff --git a/test/dmoz/con1.pl b/test/dmoz/con1.pl new file mode 100755 index 0000000..d2591c3 --- /dev/null +++ b/test/dmoz/con1.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl -w + +my $state = 'init'; +my $topic = ''; +my $title; +my $description; + +while ($_ = ) { + if (//) { + $topic = $1; + } + elsif (//) { + $url = $1; + } + elsif (/(.*?)<\/d:Title>/) { + $title = $1; + } + elsif (/(.*?)<\/d:Description>/) { + $description = $1; + } + elsif (/<\/ExternalPage>/) { + print "\n"; + print " $title\n"; + print " $description\n"; + print " $url\n"; + print " $topic\n"; + print "\n"; + } +} diff --git a/test/dmoz/con2.pl b/test/dmoz/con2.pl new file mode 100755 index 0000000..4cecd81 --- /dev/null +++ b/test/dmoz/con2.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w + +my $state = 'init'; +my $topic = ''; +my $title; +my $description; + +my $no = 0; + +while ($_ = ) { + if (//) { + $topic = $1; + } + elsif (//) { + $url = $1; + } + elsif (/(.*?)<\/d:Title>/) { + $title = $1; + } + elsif (/(.*?)<\/d:Description>/) { + $description = $1; + } + elsif (/<\/ExternalPage>/) { + if (($no % 30000) == 0) { + if ($no) { + close(XO); + } + open(XO, ">dmoz." . ($no / 30000) . ".xml"); + } + print XO "\n"; + print XO " $title\n"; + print XO " $description\n"; + print XO " $url\n"; + print XO " $topic\n"; + print XO "\n"; + $no++; + } +} +if ($no != 0) { + close(XO); +} diff --git a/test/dmoz/fetch.sh b/test/dmoz/fetch.sh new file mode 100755 index 0000000..4b32ca4 --- /dev/null +++ b/test/dmoz/fetch.sh @@ -0,0 +1,5 @@ +#!/bin/sh +if test ! -f content.rdf.u8; then + wget http://dmoz.org/rdf/content.rdf.u8.gz + gunzip content.rdf.u8.gz +fi diff --git a/test/dmoz/plot.dem b/test/dmoz/plot.dem new file mode 100755 index 0000000..7be3aba --- /dev/null +++ b/test/dmoz/plot.dem @@ -0,0 +1,12 @@ +set xlabel "runs" +set ylabel "seconds" +plot [0:] [0:] 'times-c.log' using 3 title 'ISAMC(user)' with linespoints, 'times-b.log' using 3 title 'ISAMB(user)' with linespoints, 'times-c.log' using 2 title 'ISAMC(total)' with linespoints,'times-b.log' using 2 title 'ISAMB(total)' with linespoints +set output "times.ps" +set terminal postscript +replot +set output +set terminal x11 +pause -1 "Hit return to continue" + + + diff --git a/test/dmoz/update.sh b/test/dmoz/update.sh new file mode 100755 index 0000000..1835eeb --- /dev/null +++ b/test/dmoz/update.sh @@ -0,0 +1,16 @@ +#!/bin/sh +t=$1 +test -n "$t" || exit 1 +rm -f *.mf *.LCK *.tmp +../../index/zebraidx -l zebraidx.log init +i=0 +rm -f times-$t.log +while test -f dmoz.$i.xml; do + echo -n "$i " >>times-$1.log + /usr/bin/time -f '%e %U %P' -a -o times-$t.log ../../index/zebraidx -l zebraidx.log -c zebra-$t.cfg -f 10 update dmoz.$i.xml + ../../index/zebraidx -l zebraidx.log -c zebra-$t.cfg stat + i=`expr $i + 1` + if test $i = 29; then + break + fi +done diff --git a/test/dmoz/zebra-b.cfg b/test/dmoz/zebra-b.cfg new file mode 100644 index 0000000..baa9c1e --- /dev/null +++ b/test/dmoz/zebra-b.cfg @@ -0,0 +1,17 @@ +# Simple Zebra configuration file +# $Id: zebra-b.cfg,v 1.1 2002-06-19 08:28:55 adam Exp $ +# +# Where the schema files, attribute files, etc are located. +profilePath: .:../../tab:../../../yaz/tab + +# Files that describe the attribute sets supported. +attset: bib1.att +attset: gils.att +attset: explain.att + +recordtype: grs.sgml + +#storekeys: 1 +#storedata: 1 +#recordId: (bib1,identifier-standard) +isam: b diff --git a/test/dmoz/zebra-c.cfg b/test/dmoz/zebra-c.cfg new file mode 100644 index 0000000..7229218 --- /dev/null +++ b/test/dmoz/zebra-c.cfg @@ -0,0 +1,17 @@ +# Simple Zebra configuration file +# $Id: zebra-c.cfg,v 1.1 2002-06-19 08:28:55 adam Exp $ +# +# Where the schema files, attribute files, etc are located. +profilePath: .:../../tab:../../../yaz/tab + +# Files that describe the attribute sets supported. +attset: bib1.att +attset: gils.att +attset: explain.att + +recordtype: grs.sgml + +#storekeys: 1 +#storedata: 1 +#recordId: (bib1,identifier-standard) +isam: c -- 1.7.10.4