From: Sebastian Hammer Date: Wed, 20 Dec 2006 20:47:16 +0000 (+0000) Subject: Reorganized source tree X-Git-Tag: before.append.child~93 X-Git-Url: http://sru.miketaylor.org.uk/cgi-bin?a=commitdiff_plain;h=fe3383c0559a453df1e5076fc6faac6d1a11685c;p=pazpar2-moved-to-github.git Reorganized source tree --- diff --git a/Makefile b/Makefile deleted file mode 100644 index b5eefb2..0000000 --- a/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -# ParaZ. Copyright (C) 2000-2004, Index Data ApS -# All rights reserved. -# $Id: Makefile,v 1.6 2006-12-19 04:49:34 quinn Exp $ - -SHELL=/bin/sh - -CC=gcc - -YAZCONF=yaz-config -YAZLIBS=`$(YAZCONF) --libs` -YAZCFLAGS=`$(YAZCONF) --cflags` - -PROG=pazpar2 -PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \ - reclists.o relevance.o - -all: $(PROG) - -$(PROG): $(PROGO) - $(CC) $(CFLAGS) $(YAZCFLAGS) -o $(PROG) $(PROGO) $(YAZLIBS) - -.c.o: - $(CC) -c $(CFLAGS) -I. $(YAZCFLAGS) $< - -clean: - rm -f *.[oa] test core mon.out gmon.out errlist $(PROG) - - -## Dependencies go below - -command.o: command.c command.h util.h eventl.h pazpar2.h termlists.h \ - relevance.h reclists.h -eventl.o: eventl.c eventl.h -http.o: http.c command.h util.h eventl.h pazpar2.h termlists.h \ - relevance.h reclists.h http.h http_command.h -http_command.o: http_command.c command.h util.h eventl.h pazpar2.h \ - termlists.h relevance.h reclists.h http.h http_command.h -pazpar2.o: pazpar2.c pazpar2.h termlists.h relevance.h reclists.h \ - eventl.h command.h http.h -reclists.o: reclists.c pazpar2.h termlists.h relevance.h reclists.h \ - eventl.h -relevance.o: relevance.c relevance.h pazpar2.h termlists.h eventl.h \ - reclists.h -termlists.o: termlists.c termlists.h -util.o: util.c diff --git a/bibs.pz b/bibs.pz deleted file mode 100644 index bafdf1c..0000000 --- a/bibs.pz +++ /dev/null @@ -1,12 +0,0 @@ -target amicus.nlc-bnc.ca/NL -target melvyl.cdlib.org/cdl90 -target z3950.loc.gov:7090/voyager -target library2.mcmaster.ca/MORRIS -target melvyl.cdlib.org/CDL90 -target info.library.mun.ca:2200/innopac -target 137.122.27.60:210/innopac -target z3950.rlg.org/BIB -target 141.117.13.11:210/innopac -target siris-libraries.si.edu -target tpldynix.torontopubliclibrary.ca:210/server -target sirsi.library.utoronto.ca:2200/UNICORN diff --git a/command.c b/command.c deleted file mode 100644 index 1b9fd6d..0000000 --- a/command.c +++ /dev/null @@ -1,392 +0,0 @@ -/* $Id: command.c,v 1.5 2006-12-03 06:43:24 quinn Exp $ */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" - -extern IOCHAN channel_list; - -struct command_session { - IOCHAN channel; - char *outbuf; - - int outbuflen; - int outbufwrit; - - struct session *psession; -}; - -void command_destroy(struct command_session *s); -void command_prompt(struct command_session *s); -void command_puts(struct command_session *s, const char *buf); - -static int cmd_quit(struct command_session *s, char **argv, int argc) -{ - IOCHAN i = s->channel; - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return 0; -} - -#ifdef GAGA -static int cmd_load(struct command_session *s, char **argv, int argc) -{ - if (argc != 2) { - command_puts(s, "Usage: load filename\n"); - } - if (load_targets(s->psession, argv[1]) < 0) - command_puts(s, "Failed to open file\n"); - return 1; -} -#endif - -static int cmd_search(struct command_session *s, char **argv, int argc) -{ - if (argc != 2) - { - command_puts(s, "Usage: search word\n"); - return 1; - } - search(s->psession, argv[1]); - return 1; -} - -static int cmd_hitsbytarget(struct command_session *s, char **argv, int argc) -{ - int count; - int i; - - struct hitsbytarget *ht = hitsbytarget(s->psession, &count); - for (i = 0; i < count; i++) - { - char buf[1024]; - - sprintf(buf, "%s: %d (%d records, diag=%d, state=%s conn=%d)\n", ht[i].id, ht[i].hits, - ht[i].records, ht[i].diagnostic, ht[i].state, ht[i].connected); - command_puts(s, buf); - } - return 1; -} - -static int cmd_show(struct command_session *s, char **argv, int argc) -{ - struct record **recs; - int num = 10; - int merged, total; - int i; - - if (argc == 2) - num = atoi(argv[1]); - - recs = show(s->psession, 0, &num, &merged, &total); - - for (i = 0; i < num; i++) - { - int rc; - struct record *cnode; - struct record *r = recs[i]; - - command_puts(s, r->merge_key); - for (rc = 1, cnode = r->next_cluster; cnode; cnode = cnode->next_cluster, rc++) - ; - if (rc > 1) - { - char buf[256]; - sprintf(buf, " (%d records)", rc); - command_puts(s, buf); - } - command_puts(s, "\n"); - } - return 1; -} - -static int cmd_stat(struct command_session *s, char **argv, int argc) -{ - char buf[1024]; - struct statistics stat; - - statistics(s->psession, &stat); - sprintf(buf, "Number of connections: %d\n", stat.num_clients); - command_puts(s, buf); - if (stat.num_no_connection) - { - sprintf(buf, "#No_connection: %d\n", stat.num_no_connection); - command_puts(s, buf); - } - if (stat.num_connecting) - { - sprintf(buf, "#Connecting: %d\n", stat.num_connecting); - command_puts(s, buf); - } - if (stat.num_initializing) - { - sprintf(buf, "#Initializing: %d\n", stat.num_initializing); - command_puts(s, buf); - } - if (stat.num_searching) - { - sprintf(buf, "#Searching: %d\n", stat.num_searching); - command_puts(s, buf); - } - if (stat.num_presenting) - { - sprintf(buf, "#Ppresenting: %d\n", stat.num_presenting); - command_puts(s, buf); - } - if (stat.num_idle) - { - sprintf(buf, "#Idle: %d\n", stat.num_idle); - command_puts(s, buf); - } - if (stat.num_failed) - { - sprintf(buf, "#Failed: %d\n", stat.num_failed); - command_puts(s, buf); - } - if (stat.num_error) - { - sprintf(buf, "#Error: %d\n", stat.num_error); - command_puts(s, buf); - } - return 1; -} - -static struct { - char *cmd; - int (*fun)(struct command_session *s, char *argv[], int argc); -} cmd_array[] = { - {"quit", cmd_quit}, -#ifdef GAGA - {"load", cmd_load}, -#endif - {"find", cmd_search}, - {"ht", cmd_hitsbytarget}, - {"stat", cmd_stat}, - {"show", cmd_show}, - {0,0} -}; - -void command_command(struct command_session *s, char *command) -{ - char *p; - char *argv[20]; - int argc = 0; - int i; - int res = -1; - - p = command; - while (*p) - { - while (isspace(*p)) - p++; - if (!*p) - break; - argv[argc++] = p; - while (*p && !isspace(*p)) - p++; - if (!*p) - break; - *(p++) = '\0'; - } - if (argc) { - for (i = 0; cmd_array[i].cmd; i++) - { - if (!strcmp(cmd_array[i].cmd, argv[0])) { - res = (cmd_array[i].fun)(s, argv, argc); - - break; - } - } - if (res < 0) { - command_puts(s, "Unknown command.\n"); - command_prompt(s); - } - else if (res == 1) { - command_prompt(s); - } - } - else - command_prompt(s); - -} - - -static void command_io(IOCHAN i, int event) -{ - int res; - char buf[1024]; - struct command_session *s; - - s = iochan_getdata(i); - - - switch (event) - { - case EVENT_INPUT: - res = read(iochan_getfd(i), buf, 1024); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "read command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return; - } - if (!index(buf, '\n')) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Did not receive complete command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return; - } - buf[res] = '\0'; - command_command(s, buf); - break; - case EVENT_OUTPUT: - if (!s->outbuflen || s->outbufwrit < 0) - { - yaz_log(YLOG_WARN, "Called with outevent but no data"); - iochan_clearflag(i, EVENT_OUTPUT); - } - else - { - res = write(iochan_getfd(i), s->outbuf + s->outbufwrit, s->outbuflen - - s->outbufwrit); - if (res < 0) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - } - else - { - s->outbufwrit += res; - if (s->outbufwrit >= s->outbuflen) - { - s->outbuflen = s->outbufwrit = 0; - iochan_clearflag(i, EVENT_OUTPUT); - } - } - } - break; - default: - yaz_log(YLOG_WARN, "Bad voodoo on socket"); - } -} - -void command_puts(struct command_session *s, const char *buf) -{ - int len = strlen(buf); - memcpy(s->outbuf + s->outbuflen, buf, len); - s->outbuflen += len; - iochan_setflag(s->channel, EVENT_OUTPUT); -} - -void command_prompt(struct command_session *s) -{ - command_puts(s, "Pazpar2> "); -} - - -/* Accept a new command connection */ -static void command_accept(IOCHAN i, int event) -{ - struct sockaddr_in addr; - int fd = iochan_getfd(i); - socklen_t len; - int s; - IOCHAN c; - struct command_session *ses; - int flags; - - len = sizeof addr; - if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); - return; - } - if ((flags = fcntl(s, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - - yaz_log(YLOG_LOG, "New command connection"); - c = iochan_create(s, command_io, EVENT_INPUT | EVENT_EXCEPT); - - ses = xmalloc(sizeof(*ses)); - ses->outbuf = xmalloc(50000); - ses->outbuflen = 0; - ses->outbufwrit = 0; - ses->channel = c; - ses->psession = new_session(); - iochan_setdata(c, ses); - - command_puts(ses, "Welcome to pazpar2\n\n"); - command_prompt(ses); - - c->next = channel_list; - channel_list = c; -} - -void command_destroy(struct command_session *s) { - xfree(s->outbuf); - xfree(s); -} - -/* Create a command-channel listener */ -void command_init(int port) -{ - IOCHAN c; - int l; - struct protoent *p; - struct sockaddr_in myaddr; - int one = 1; - - yaz_log(YLOG_LOG, "Command port is %d", port); - if (!(p = getprotobyname("tcp"))) { - abort(); - } - if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); - if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - - bzero(&myaddr, sizeof myaddr); - myaddr.sin_family = AF_INET; - myaddr.sin_addr.s_addr = INADDR_ANY; - myaddr.sin_port = htons(port); - if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); - if (listen(l, SOMAXCONN) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); - - c = iochan_create(l, command_accept, EVENT_INPUT | EVENT_EXCEPT); - //iochan_setdata(c, &l); - c->next = channel_list; - channel_list = c; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/command.h b/command.h deleted file mode 100644 index 25e5b99..0000000 --- a/command.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef COMMAND_H -#define COMMAND_H - -void command_init(int port); - -#endif diff --git a/danish.pz b/danish.pz deleted file mode 100644 index ba1cc52..0000000 --- a/danish.pz +++ /dev/null @@ -1,21 +0,0 @@ -target cosmos.dnlb.dk:9909/DNL01 -target odin.dtv.dk:210/DTV01 -target dvjb1.kvl.dk:9909/dvj01 -target hermes.lib.cbs.dk:2100/S -target a800.hha.dk:9909/hba01 -target z3950.kb.dk:2100/kgl01 -target hans.ruc.dk:2210/S -target a500.aub.auc.dk:9909/AUB01 -target z3950.dbc.dk:213/def1forsk -target z3950.dbc.dk:213/def1total -target z3950.dbc.dk:213/danbibv2 -target hcb.bibnet.dk:2100/S -target z3950.bibsys.no:2100/BIBSYS -target z3950.libris.kb.se:210/libr -target bagel.indexdata.dk/gils -target www.deff.dk:2100/Default -target www.deff.dk:2102/Default -connect -init -search @attr 1=4 danmarks -present diff --git a/defport.pz b/defport.pz deleted file mode 100644 index de0df23..0000000 --- a/defport.pz +++ /dev/null @@ -1,5 +0,0 @@ -target www.deff.dk:2102/Default -connect -init -search @attr 1=4 danmarks -present diff --git a/etc/bibs.pz b/etc/bibs.pz new file mode 100644 index 0000000..bafdf1c --- /dev/null +++ b/etc/bibs.pz @@ -0,0 +1,12 @@ +target amicus.nlc-bnc.ca/NL +target melvyl.cdlib.org/cdl90 +target z3950.loc.gov:7090/voyager +target library2.mcmaster.ca/MORRIS +target melvyl.cdlib.org/CDL90 +target info.library.mun.ca:2200/innopac +target 137.122.27.60:210/innopac +target z3950.rlg.org/BIB +target 141.117.13.11:210/innopac +target siris-libraries.si.edu +target tpldynix.torontopubliclibrary.ca:210/server +target sirsi.library.utoronto.ca:2200/UNICORN diff --git a/etc/danish.pz b/etc/danish.pz new file mode 100644 index 0000000..ba1cc52 --- /dev/null +++ b/etc/danish.pz @@ -0,0 +1,21 @@ +target cosmos.dnlb.dk:9909/DNL01 +target odin.dtv.dk:210/DTV01 +target dvjb1.kvl.dk:9909/dvj01 +target hermes.lib.cbs.dk:2100/S +target a800.hha.dk:9909/hba01 +target z3950.kb.dk:2100/kgl01 +target hans.ruc.dk:2210/S +target a500.aub.auc.dk:9909/AUB01 +target z3950.dbc.dk:213/def1forsk +target z3950.dbc.dk:213/def1total +target z3950.dbc.dk:213/danbibv2 +target hcb.bibnet.dk:2100/S +target z3950.bibsys.no:2100/BIBSYS +target z3950.libris.kb.se:210/libr +target bagel.indexdata.dk/gils +target www.deff.dk:2100/Default +target www.deff.dk:2102/Default +connect +init +search @attr 1=4 danmarks +present diff --git a/etc/defport.pz b/etc/defport.pz new file mode 100644 index 0000000..de0df23 --- /dev/null +++ b/etc/defport.pz @@ -0,0 +1,5 @@ +target www.deff.dk:2102/Default +connect +init +search @attr 1=4 danmarks +present diff --git a/etc/many.pz b/etc/many.pz new file mode 100644 index 0000000..db1bd9c --- /dev/null +++ b/etc/many.pz @@ -0,0 +1,299 @@ +target 12.4.242.230:210/Unicorn +target 128.119.168.1:210/INNOPAC +target 128.119.168.2:210/INNOPAC +target 128.197.130.200:210/INNOPAC +target 128.218.15.173:210/INNOPAC +target 128.83.82.249:210/INNOPAC +target 129.234.4.6:210/INNOPAC +target 129.65.20.27:210/Innopac +target 129.78.72.7:210/INNOPAC +target 130.102.42.45:210/Innopac +target 130.111.64.52:210/INNOPAC +target 130.206.134.239:210/INNOPAC +target 130.212.18.200:210/Innopac +target 130.91.144.191:210/INNOPAC +target 131.165.177.76:2100/S +target 131.165.97.2:210/S +target 131.232.13.6:210/INNOPAC +target 134.115.152.130:210/Innopac +target 134.154.30.10:210/Innopac +target 137.187.166.250:210/INNOPAC +target 137.45.212.100:210/INNOPAC +target 141.215.16.4:210/INNOPAC +target 144.37.1.4:210/Innopac +target 147.143.2.5:210/INNOPAC +target 147.175.67.227:1111/DEFAULT +target 147.175.67.227:2222/DEFAULT +target 147.29.116.113:2100/DAB01 +target 147.29.116.113:2100/KGB01 +target 147.29.116.113:2100/PVS +target 147.29.116.113:2100/vej01 +target 149.130.90.2:210/INNOPAC +target 149.175.20.20:210/INNOPAC +target 161.210.213.100:210/INNOPAC +target 169.139.225.46:210/horizon +target 192.188.131.54:2200/UNICORN +target 192.197.190.4:210/Advance +target 192.65.218.23:210/DREWDB +target 193.162.157.130:2100/S +target 193.43.102.194:21210/ADVANCE +target 193.52.199.5:21210/ADVANCE +target 193.59.172.100:210/INNOPAC +target 194.182.134.04:210/S +target 194.239.156.133:2100/S +target 195.215.203.98:2100/S +target 195.249.206.204:210/Default +target 195.249.206.204:210/default +target 195.41.8.218:2100/S +target 195.50.60.15:2100/CB +target 195.50.60.15:2101/FB +target 195.50.60.15:2102/MB +target 195.50.60.15:2103/PB +target 196.6.221.16:210/INNOPAC +target 198.62.73.72:210/INNOPAC +target 199.184.19.10:210/innopac +target 199.88.71.7:210/INNOPAC +target 202.14.152.4:210/INNOPAC +target 202.40.151.2:210/INNOPAC +target 202.40.216.17:210/INNOPAC +target 206.48.3.167:7090/Voyager +target 207.194.254.193:210/advance +target Exlibris.albertslundbib.dk:2100/S +target Exlibris.daimi.au.dk:2104/FYS01 +target Exlibris.daimi.au.dk:2104/GEO01 +target Exlibris.daimi.au.dk:2104/KEM01 +target Exlibris.daimi.au.dk:2104/MAT01 +target Exlibris.daimi.au.dk:2104/VID01 +target Z3950.maribo.integrabib.dk:210/Default +target Z3950cat.bl.uk:9909/BLAC +target acorn.library.vanderbilt.edu:2200/ACORN +target advance.biblio.polymtl.ca:210/ADVANCE +target advance.lib.rmit.edu.au:21210/Advance +target albert.rit.edu:210/INNOPAC +target aleph.mcgill.ca:210/MUSE +target aleph.unibas.ch:9909/DSV01 +target alpha.svkol.cz:9909/SVK01 +target alpha.svkol.cz:9909/SVK02 +target atrium.bib.umontreal.ca:210/ADVANCE +target bagel.indexdata.dk:210/gils +target bagel.indexdata.dk:210/marc +target bcr1.larioja.org:210/AbsysE +target bib.gladsaxe.dk:2100/S +target bib.gladsaxe.dk:2101/H +target bib.gladsaxe.dk:2101/S +target biblio.crusca.fi.it:9909/ADC01 +target biblio.hec.ca:210/hec +target biblios.dic.uchile.cl:210/BELLO +target biblioteca.uc3m.es:2200/unicorn +target bobcat.nyu.edu:210/ADVANCE +target books.luther.edu:210/innopac +target brocar.unavarra.es:9999/libros +target brocar.unavarra.es:9999/revistas +target cat.cisti.nrc.ca:210/INNOPAC +target cat.lib.grin.edu:210/innopac +target catalog.bedfordlibrary.org:210/Innopac +target catalog.crl.edu:210/INNOPAC +target catalog.lib.jhu.edu:210/horizon +target cisne.sim.ucm.es:210/INNOPAC +target clavis.ucalgary.ca:2200/UNICORN +target cornelia.whoi.edu:7090/VOYAGER +target corpsgeo1.usace.army.mil:2210/geo1 +target csulib.ctstateu.edu:210/INNOPAC +target echea.ru.ac.za:210/INNOPAC +target edcsns17.cr.usgs.gov:6675/AVHRR +target edcsns17.cr.usgs.gov:6675/DOQ +target edcsns17.cr.usgs.gov:6675/HAZARDS +target edcsns17.cr.usgs.gov:6675/LANDSAT_MSS +target edcsns17.cr.usgs.gov:6675/LANDSAT_TM +target edcsns17.cr.usgs.gov:6675/NAPP +target edcsns17.cr.usgs.gov:6675/NHAP +target eleanor.lib.gla.ac.uk:210/INNOPAC +target eusa.library.net:5666/eusa +target explore.up.ac.za:210/INNOPAC +target gaudi.ua.es:2200/unicorn +target gb.glostrupbib.dk:2100/S +target hcb.bibnet.dk:2100/S +target helios.nlib.ee:210/INNOPAC +target hermes.lib.cbs.dk:2100/S +target hkbulib.hkbu.edu.hk:210/INNOPAC +target ikast.ikast.bibnet.dk:2100/S +target indexgeo.com.au:6668/dataset +target indexgeo.net:5511/act +target info.library.mun.ca:2200/UNICORN +target innopac.lib.kth.se:210/innopac +target innopac.wits.ac.za:210/INNOPAC +target itec.mty.itesm.mx:210/innopac +target jasper.acadiau.ca:2200/UNICORN +target jenda.lib.nccu.edu.tw:210/INNOPAC +target kat.vkol.cz:9909/SVK01 +target kat.vkol.cz:9909/SVK02 +target kraka.birkerod.bibnet.dk:2100/S +target ksclib.keene.edu:210/INNOPAC +target l1.uwaterloo.ca:7090/VOYAGER +target lance.missouri.edu:210/INNOPAC +target lib.leeds.ac.uk:210/INNOPAC +target lib.soas.ac.uk:210/innopac +target libadfa.adfa.edu.au:7090/VOYAGER +target libcat.qut.edu.au:210/INNOPAC +target liber.acadlib.lv:211/codex.previous +target liber.acadlib.lv:211/liber1 +target liber.acadlib.lv:212/liber1 +target library.anu.edu.au:210/INNOPAC +target library.ballarat.edu.au:210/INNOPAC +target library.bangor.ac.uk:210/INNOPAC +target library.bilgi.edu.tr:210/INNOPAC +target library.brad.ac.uk:210/xxDefault +target library.brunel.ac.uk:2200/UNICORN +target library.daemen.edu:210/innopac +target library.deakin.edu.au:210/INNOPAC +target library.gu.edu.au:21210/ADVANCE +target library.hud.ac.uk:210/HORIZON +target library.hull.ac.uk:210/INNOPAC +target library.latrobe.edu.au:210/INNOPAC +target library.lbc.edu:7090/voyager +target library.mdx.ac.uk:210/mdx +target library.newcastle.edu.au:210/INNOPAC +target library.ox.ac.uk:210/ADVANCE +target library.tcd.ie:210/advance +target library.ucc.ie:210/INNOPAC +target library.uh.edu:210/INNOPAC +target library.vu.edu.au:210/INNOPAC +target library2.open.ac.uk:7090/voyager +target libsys.lib.hull.ac.uk:210/INNOPAC +target libuni01.ccc.govt.nz:2200/unicorn +target libuni01.ccc.govt.nz:2220/cinch +target libuni01.ccc.govt.nz:2220/papers +target libunix.ku.edu.tr:210/INNOPAC +target linc.nus.edu.sg:210/innopac +target lion.swem.wm.edu:2200/unicorn +target loke.dcbib.dk:2100/S +target lrpapp.cc.umanitoba.ca:2200/unicorn +target malad2.mala.bc.ca:2200/UNICORN +target malad2.mala.bc.ca:2200/unicorn +target marte.biblioteca.upm.es:2200/unicorn +target medupe.ub.bw:210/INNOPAC +target melvyl.cdlib.org:210/CDL90 +target mercury.concordia.ca:210/Innopac +target merihobu.utlib.ee:210/INNOPAC +target merlinweb.ville.montreal.qc.ca:2100/Z3950S +target nbinet.ncl.edu.tw:210/INNOPAC +target ncsulib.lib.ncsu.edu:210/MARION +target newlib.ci.lubbock.tx.us:2200/unicorn +target nobis.njl.dk:210/S +target nrhcat.library.nrhtx.com:210/INNOPAC +target oda.fynbib.dk:2100/S +target odin2.bib.sdu.dk:210/Horizon +target odin2.bib.sdu.dk:210/otb +target opac.nebis.ch:9909/NEBIS +target opac.sbn.it:2100/nopac +target opac.sbn.it:3950/nopac +target opac.shu.ac.uk:210/INNOPAC +target opac.unifi.it:210/OPAC +target opac.utmem.edu:210/INNOPAC +target orac.lib.uts.edu.au:210/INNOPAC +target pollux.dslak.co.nz:210/MARNSL +target prodorbis.library.yale.edu:7090/voyager +target quest.unb.ca:2200/unicorn +target rebiun.crue.org:210/absysREBIUN +target roble.unizar.es:210/INNOPAC +target rs6000.nshpl.library.ns.ca:210/AVR +target rs6000.nshpl.library.ns.ca:210/CBR +target rs6000.nshpl.library.ns.ca:210/CEH +target rs6000.nshpl.library.ns.ca:210/CUR +target rs6000.nshpl.library.ns.ca:210/ECR +target rs6000.nshpl.library.ns.ca:210/NSP +target rs6000.nshpl.library.ns.ca:210/PAR +target rs6000.nshpl.library.ns.ca:210/SSR +target rs6000.nshpl.library.ns.ca:210/WCR +target sabio.library.arizona.edu:210/innopac +target salty.slcpl.lib.ut.us:210/INNOPAC +target scotty.mhsl.uab.edu:7090/VOYAGER +target serapis.leedsmet.ac.uk:2200/unicorn +target serapis.lmu.ac.uk:2200/unicorn +target sflwww.er.usgs.gov:251/sflwwwmeta +target silkbib.bib.dk:2100/S +target sirsi.library.utoronto.ca:2200/UNICORN +target star.tsl.state.tx.us:2200/unicorn +target strife.library.uwa.edu.au:210/INNOPAC +target strife.library.uwa.edu.au:210/innopac +target sundog.usask.ca:210/INNOPAC +target tegument.nlm.nih.gov:7090/VOYAGER +target titus.folger.edu:7090/VOYAGER +target tora.htk.dk:2100/S +target troy.lib.sfu.ca:210/INNOPAC +target unicorn.lib.ic.ac.uk:2200/IC +target unicorn.qmced.ac.uk:2200/unicorn +target unicornio.umb.edu.co:2200/Unicorn +target ustlib.ust.hk:210/INNOPAC +target vax.lib.state.ca.us:210/marion +target vlsirsi.rdg.ac.uk:2200/UNICORN +target voyager.its.csiro.au:7090/VOYAGER +target voyager.tcs.tulane.edu:7090/VOYAGER +target voyager.wrlc.org:7090/VOYAGER +target www.agralin.nl:210/clcz3950 +target www.bibliotek.taarnby.dk:210/S +target www.congreso.es:2100/ABSYSBCD +target www.csc.noaa.gov:2210/CSC_Products +target www.eevl.ac.uk:2100/eevlacuk +target www.grimes.lib.ia.us:210/main +target www.knihovna.cz:8888/un_cat +target www.library.nps.gov:7090/VOYAGER +target www.sbn.it:2100/nopac +target www.scran.ac.uk:3950/default +target yulib001.mc.yu.edu:1111/DEFAULT +target z39.libis.lt:210/KNYGOS +target z3950.ahds.ac.uk:210/CMF +target z3950.ahds.ac.uk:210/DC +target z3950.bcl.jcyl.es:210/AbsysBCL +target z3950.bcl.jcyl.es:210/AbsysCCFL +target z3950.bergen.folkebibl.no:210/data +target z3950.biblos.pk.edu.pl:4210/books +target z3950.bibsys.no:2100/BIBSYS +target z3950.bibsys.no:2100/PERI +target z3950.copac.ac.uk:210/copac +target z3950.copac.ac.uk:2100/COPAC +target z3950.deich.folkebibl.no:210/data +target z3950.dragsholm.integrabib.dk:210/default +target z3950.fcla.edu:210/CF +target z3950.fcla.edu:210/FA +target z3950.fcla.edu:210/FI +target z3950.fcla.edu:210/FO +target z3950.fcla.edu:210/QB +target z3950.fcla.edu:210/RF +target z3950.fcla.edu:210/SR +target z3950.fcla.edu:210/ST +target z3950.gbv.de:20010/GVK +target z3950.gbv.de:20011/GVK +target z3950.gbv.de:20011/GVK +target z3950.gbv.de:210/GVK +target z3950.gbv.de:210/GVK +target z3950.haderslev.integrabib.dk:210/default +target z3950.haslev.integrabib.dk:210/Default +target z3950.hoerning.integrabib.dk:210/default +target z3950.kb.dk:2100/BGF01 +target z3950.kb.dk:2100/BHS01 +target z3950.kb.dk:2100/DRA01 +target z3950.kb.dk:2100/KBB01 +target z3950.kb.dk:2100/KBH01 +target z3950.kb.dk:2100/KGL01 +target z3950.libh.uoc.gr:210/default +target z3950.libr.uoc.gr:210/logios +target z3950.libr.uoc.gr:210/ptolemeos_ii +target z3950.library.wisc.edu:210/madison +target z3950.libris.kb.se:210/libr +target z3950.lillehammer.folkebibl.no:210/data +target z3950.loc.gov:7090/voyager +target z3950.nakskov.integrabib.dk:210/default +target z3950.nb.no:2100/Norbok +target z3950.nb.no:2100/Norper +target z3950.nb.no:2100/Sambok +target z3950.nls.uk:7290/voyager +target z3950.ovid.com:2213/eric +target z3950.ovid.com:2213/pmed +target z3950.rdn.ac.uk:210/xxdefault +target z3950.ringsted.integrabib.dk:210/Default +target z3950.skagen.integrabib.dk:210/default +target z3950.sydfalster.integrabib.dk:210/default +target z3950.trondheim.folkebibl.no:210/data +target z3950.vallensbaek.integrabib.dk:210/default +target z3950.vejen.integrabib.dk:210/default diff --git a/etc/slow.pz b/etc/slow.pz new file mode 100644 index 0000000..933ee67 --- /dev/null +++ b/etc/slow.pz @@ -0,0 +1 @@ +target localhost:9999/Slow diff --git a/etc/test.pz b/etc/test.pz new file mode 100644 index 0000000..0fb10c8 --- /dev/null +++ b/etc/test.pz @@ -0,0 +1 @@ +target localhost:9999/Default diff --git a/eventl.c b/eventl.c deleted file mode 100644 index 87bd1ba..0000000 --- a/eventl.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * ParaZ - a simple tool for harvesting performance data for parallel - * operations using Z39.50. - * Copyright (c) 2000-2004 Index Data ApS - * See LICENSE file for details. - */ - -/* - * $Id: eventl.c,v 1.2 2006-12-12 02:36:24 quinn Exp $ - * Based on revision YAZ' server/eventl.c 1.29. - */ - -#include -#include -#ifdef WIN32 -#include -#else -#include -#endif -#include -#include -#include - -#include -#include -#include -#include -#include "eventl.h" -#include - -IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags) -{ - IOCHAN new_iochan; - - if (!(new_iochan = (IOCHAN)xmalloc(sizeof(*new_iochan)))) - return 0; - new_iochan->destroyed = 0; - new_iochan->fd = fd; - new_iochan->flags = flags; - new_iochan->fun = cb; - new_iochan->force_event = 0; - new_iochan->last_event = new_iochan->max_idle = 0; - new_iochan->next = NULL; - return new_iochan; -} - -int event_loop(IOCHAN *iochans) -{ - do /* loop as long as there are active associations to process */ - { - IOCHAN p, nextp; - fd_set in, out, except; - int res, max; - static struct timeval nullto = {0, 0}, to; - struct timeval *timeout; - - FD_ZERO(&in); - FD_ZERO(&out); - FD_ZERO(&except); - timeout = &to; /* hang on select */ - to.tv_sec = 30; - to.tv_usec = 0; - max = 0; - for (p = *iochans; p; p = p->next) - { - if (p->fd < 0) - continue; - if (p->force_event) - timeout = &nullto; /* polling select */ - if (p->flags & EVENT_INPUT) - FD_SET(p->fd, &in); - if (p->flags & EVENT_OUTPUT) - FD_SET(p->fd, &out); - if (p->flags & EVENT_EXCEPT) - FD_SET(p->fd, &except); - if (p->fd > max) - max = p->fd; - } - if ((res = select(max + 1, &in, &out, &except, timeout)) < 0) - { - if (errno == EINTR) - continue; - else - abort(); - } - for (p = *iochans; p; p = p->next) - { - int force_event = p->force_event; - time_t now = time(0); - - p->force_event = 0; - if (!p->destroyed && ((p->max_idle && now - p->last_event > - p->max_idle) || force_event == EVENT_TIMEOUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_TIMEOUT); - } - if (p->fd < 0) - continue; - if (!p->destroyed && (FD_ISSET(p->fd, &in) || - force_event == EVENT_INPUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_INPUT); - } - if (!p->destroyed && (FD_ISSET(p->fd, &out) || - force_event == EVENT_OUTPUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_OUTPUT); - } - if (!p->destroyed && (FD_ISSET(p->fd, &except) || - force_event == EVENT_EXCEPT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_EXCEPT); - } - } - for (p = *iochans; p; p = nextp) - { - nextp = p->next; - - if (p->destroyed) - { - IOCHAN tmp = p, pr; - - /* Now reset the pointers */ - if (p == *iochans) - *iochans = p->next; - else - { - for (pr = *iochans; pr; pr = pr->next) - if (pr->next == p) - break; - assert(pr); /* grave error if it weren't there */ - pr->next = p->next; - } - if (nextp == p) - nextp = p->next; - xfree(tmp); - } - } - } - while (*iochans); - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/eventl.h b/eventl.h deleted file mode 100644 index 9e10817..0000000 --- a/eventl.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 1995-1999, Index Data - * See the file LICENSE for details. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: eventl.h,v $ - * Revision 1.3 2006-12-12 02:36:24 quinn - * Implemented session timeout; ping command - * - * Revision 1.2 2006/11/18 05:00:38 quinn - * Added record retrieval, etc. - * - * Revision 1.1.1.1 2006/11/14 20:44:38 quinn - * PazPar2 - * - * Revision 1.1.1.1 2000/02/23 14:40:18 heikki - * Original import to cvs - * - * Revision 1.11 1999/04/20 09:56:48 adam - * Added 'name' paramter to encoder/decoder routines (typedef Odr_fun). - * Modified all encoders/decoders to reflect this change. - * - * Revision 1.10 1998/01/29 13:30:23 adam - * Better event handle system for NT/Unix. - * - * Revision 1.9 1997/09/01 09:31:48 adam - * Removed definition statserv_remove from statserv.h to eventl.h. - * - * Revision 1.8 1995/06/19 12:39:09 quinn - * Fixed bug in timeout code. Added BER dumper. - * - * Revision 1.7 1995/06/16 10:31:34 quinn - * Added session timeout. - * - * Revision 1.6 1995/05/16 08:51:02 quinn - * License, documentation, and memory fixes - * - * Revision 1.5 1995/05/15 11:56:37 quinn - * Asynchronous facilities. Restructuring of seshigh code. - * - * Revision 1.4 1995/03/27 08:34:23 quinn - * Added dynamic server functionality. - * Released bindings to session.c (is now redundant) - * - * Revision 1.3 1995/03/15 08:37:42 quinn - * Now we're pretty much set for nonblocking I/O. - * - * Revision 1.2 1995/03/14 10:28:00 quinn - * More work on demo server. - * - * Revision 1.1 1995/03/10 18:22:45 quinn - * The rudiments of an asynchronous server. - * - */ - -#ifndef EVENTL_H -#define EVENTL_H - -#include - -struct iochan; - -typedef void (*IOC_CALLBACK)(struct iochan *i, int event); - -typedef struct iochan -{ - int fd; - int flags; -#define EVENT_INPUT 0x01 -#define EVENT_OUTPUT 0x02 -#define EVENT_EXCEPT 0x04 -#define EVENT_TIMEOUT 0x08 -#define EVENT_WORK 0x10 - int force_event; - IOC_CALLBACK fun; - void *data; - int destroyed; - time_t last_event; - time_t max_idle; - - struct iochan *next; -} *IOCHAN; - -#define iochan_destroy(i) (void)((i)->destroyed = 1) -#define iochan_getfd(i) ((i)->fd) -#define iochan_setfd(i, f) ((i)->fd = (f)) -#define iochan_getdata(i) ((i)->data) -#define iochan_setdata(i, d) ((i)->data = d) -#define iochan_getflags(i) ((i)->flags) -#define iochan_setflags(i, d) ((i)->flags = d) -#define iochan_setflag(i, d) ((i)->flags |= d) -#define iochan_clearflag(i, d) ((i)->flags &= ~(d)) -#define iochan_getflag(i, d) ((i)->flags & d ? 1 : 0) -#define iochan_getfun(i) ((i)->fun) -#define iochan_setfun(i, d) ((i)->fun = d) -#define iochan_setevent(i, e) ((i)->force_event = (e)) -#define iochan_getnext(i) ((i)->next) -#define iochan_settimeout(i, t) ((i)->max_idle = (t), (i)->last_event = time(0)) -#define iochan_activity(i) ((i)->last_event = time(0)) - -IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags); -int event_loop(IOCHAN *iochans); - -#endif diff --git a/http.c b/http.c deleted file mode 100644 index bf47090..0000000 --- a/http.c +++ /dev/null @@ -1,854 +0,0 @@ -/* - * $Id: http.c,v 1.6 2006-12-19 04:49:34 quinn Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" -#include "http.h" -#include "http_command.h" - -static void proxy_io(IOCHAN i, int event); -static struct http_channel *http_create(void); -static void http_destroy(IOCHAN i); - -extern IOCHAN channel_list; - -static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests -static char proxy_url[256] = ""; -static struct http_buf *http_buf_freelist = 0; -static struct http_channel *http_channel_freelist = 0; - -static struct http_buf *http_buf_create() -{ - struct http_buf *r; - - if (http_buf_freelist) - { - r = http_buf_freelist; - http_buf_freelist = http_buf_freelist->next; - } - else - r = xmalloc(sizeof(struct http_buf)); - r->offset = 0; - r->len = 0; - r->next = 0; - return r; -} - -static void http_buf_destroy(struct http_buf *b) -{ - b->next = http_buf_freelist; - http_buf_freelist = b; -} - -static void http_buf_destroy_queue(struct http_buf *b) -{ - struct http_buf *p; - while (b) - { - p = b->next; - http_buf_destroy(b); - b = p; - } -} - -#ifdef GAGA -// Calculate length of chain -static int http_buf_len(struct http_buf *b) -{ - int sum = 0; - for (; b; b = b->next) - sum += b->len; - return sum; -} -#endif - -static struct http_buf *http_buf_bybuf(char *b, int len) -{ - struct http_buf *res = 0; - struct http_buf **p = &res; - - while (len) - { - *p = http_buf_create(); - int tocopy = len; - if (tocopy > HTTP_BUF_SIZE) - tocopy = HTTP_BUF_SIZE; - memcpy((*p)->buf, b, tocopy); - (*p)->len = tocopy; - len -= tocopy; - b += tocopy; - p = &(*p)->next; - } - return res; -} - -// Add a (chain of) buffers to the end of an existing queue. -static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b) -{ - while (*queue) - queue = &(*queue)->next; - *queue = b; -} - -static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) -{ - // Heavens to Betsy (buf)! - return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); -} - -// Non-destructively collapse chain of buffers into a string (max *len) -// Return -static int http_buf_peek(struct http_buf *b, char *buf, int len) -{ - int rd = 0; - while (b && rd < len) - { - int toread = len - rd; - if (toread > b->len) - toread = b->len; - memcpy(buf + rd, b->buf + b->offset, toread); - rd += toread; - b = b->next; - } - buf[rd] = '\0'; - return rd; -} - -// Ddestructively munch up to len from head of queue. -static int http_buf_read(struct http_buf **b, char *buf, int len) -{ - int rd = 0; - while ((*b) && rd < len) - { - int toread = len - rd; - if (toread > (*b)->len) - toread = (*b)->len; - memcpy(buf + rd, (*b)->buf + (*b)->offset, toread); - rd += toread; - if (toread < (*b)->len) - { - (*b)->len -= toread; - (*b)->offset += toread; - break; - } - else - { - struct http_buf *n = (*b)->next; - http_buf_destroy(*b); - *b = n; - } - } - buf[rd] = '\0'; - return rd; -} - -void static urldecode(char *i, char *o) -{ - while (*i) - { - if (*i == '+') - { - *(o++) = ' '; - i++; - } - else if (*i == '%') - { - i++; - sscanf(i, "%2hhx", o); - i += 2; - o++; - } - else - *(o++) = *(i++); - } - *o = '\0'; -} - -void http_addheader(struct http_response *r, const char *name, const char *value) -{ - struct http_channel *c = r->channel; - struct http_header *h = nmem_malloc(c->nmem, sizeof *h); - h->name = nmem_strdup(c->nmem, name); - h->value = nmem_strdup(c->nmem, value); - h->next = r->headers; - r->headers = h; -} - -char *http_argbyname(struct http_request *r, char *name) -{ - struct http_argument *p; - if (!name) - return 0; - for (p = r->arguments; p; p = p->next) - if (!strcmp(p->name, name)) - return p->value; - return 0; -} - -char *http_headerbyname(struct http_request *r, char *name) -{ - struct http_header *p; - for (p = r->headers; p; p = p->next) - if (!strcmp(p->name, name)) - return p->value; - return 0; -} - -struct http_response *http_create_response(struct http_channel *c) -{ - struct http_response *r = nmem_malloc(c->nmem, sizeof(*r)); - strcpy(r->code, "200"); - r->msg = "OK"; - r->channel = c; - r->headers = 0; - r->payload = 0; - return r; -} - -// Check if we have a complete request. Return 0 or length (including trailing newline) -// FIXME: Does not deal gracefully with requests carrying payload -// but this is kind of OK since we will reject anything other than an empty GET -static int request_check(struct http_buf *queue) -{ - char tmp[4096]; - int len = 0; - char *buf = tmp; - - http_buf_peek(queue, tmp, 4096); - while (*buf) // Check if we have a sequence of lines terminated by an empty line - { - char *b = strstr(buf, "\r\n"); - - if (!b) - return 0; - - len += (b - buf) + 2; - if (b == buf) - return len; - buf = b + 2; - } - return 0; -} - -struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, - int len) -{ - struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); - char *p, *p2; - char tmp[4096]; - char *buf = tmp; - - if (len > 4096) - return 0; - if (http_buf_read(queue, buf, len) < len) - return 0; - - r->channel = c; - r->arguments = 0; - r->headers = 0; - // Parse first line - for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) - *(p2++) = *p; - if (*p != ' ') - { - yaz_log(YLOG_WARN, "Unexpected HTTP method in request"); - return 0; - } - *p2 = '\0'; - - if (!(buf = strchr(buf, ' '))) - { - yaz_log(YLOG_WARN, "Syntax error in request (1)"); - return 0; - } - buf++; - if (!(p = strchr(buf, ' '))) - { - yaz_log(YLOG_WARN, "Syntax error in request (2)"); - return 0; - } - *(p++) = '\0'; - if ((p2 = strchr(buf, '?'))) // Do we have arguments? - *(p2++) = '\0'; - r->path = nmem_strdup(c->nmem, buf); - if (p2) - { - // Parse Arguments - while (*p2) - { - struct http_argument *a; - char *equal = strchr(p2, '='); - char *eoa = strchr(p2, '&'); - if (!equal) - { - yaz_log(YLOG_WARN, "Expected '=' in argument"); - return 0; - } - if (!eoa) - eoa = equal + strlen(equal); // last argument - else - *(eoa++) = '\0'; - a = nmem_malloc(c->nmem, sizeof(struct http_argument)); - *(equal++) = '\0'; - a->name = nmem_strdup(c->nmem, p2); - urldecode(equal, equal); - a->value = nmem_strdup(c->nmem, equal); - a->next = r->arguments; - r->arguments = a; - p2 = eoa; - } - } - buf = p; - - if (strncmp(buf, "HTTP/", 5)) - strcpy(r->http_version, "1.0"); - else - { - buf += 5; - if (!(p = strstr(buf, "\r\n"))) - return 0; - *(p++) = '\0'; - p++; - strcpy(r->http_version, buf); - buf = p; - } - strcpy(c->version, r->http_version); - - r->headers = 0; - while (*buf) - { - if (!(p = strstr(buf, "\r\n"))) - return 0; - if (p == buf) - break; - else - { - struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); - if (!(p2 = strchr(buf, ':'))) - return 0; - *(p2++) = '\0'; - h->name = nmem_strdup(c->nmem, buf); - while (isspace(*p2)) - p2++; - if (p2 >= p) // Empty header? - { - buf = p + 2; - continue; - } - *p = '\0'; - h->value = nmem_strdup(c->nmem, p2); - h->next = r->headers; - r->headers = h; - buf = p + 2; - } - } - - return r; -} - - -static struct http_buf *http_serialize_response(struct http_channel *c, - struct http_response *r) -{ - wrbuf_rewind(c->wrbuf); - struct http_header *h; - - wrbuf_printf(c->wrbuf, "HTTP/1.1 %s %s\r\n", r->code, r->msg); - for (h = r->headers; h; h = h->next) - wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); - wrbuf_printf(c->wrbuf, "Content-length: %d\r\n", r->payload ? (int) strlen(r->payload) : 0); - wrbuf_printf(c->wrbuf, "Content-type: text/xml\r\n"); - wrbuf_puts(c->wrbuf, "\r\n"); - - if (r->payload) - wrbuf_puts(c->wrbuf, r->payload); - - return http_buf_bywrbuf(c->wrbuf); -} - -// Serialize a HTTP request -static struct http_buf *http_serialize_request(struct http_request *r) -{ - struct http_channel *c = r->channel; - wrbuf_rewind(c->wrbuf); - struct http_header *h; - struct http_argument *a; - - wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); - - if (r->arguments) - { - wrbuf_putc(c->wrbuf, '?'); - for (a = r->arguments; a; a = a->next) { - if (a != r->arguments) - wrbuf_putc(c->wrbuf, '&'); - wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); - } - } - - wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); - - for (h = r->headers; h; h = h->next) - wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); - - wrbuf_puts(c->wrbuf, "\r\n"); - - return http_buf_bywrbuf(c->wrbuf); -} - - -static int http_weshouldproxy(struct http_request *rq) -{ - if (proxy_addr && !strstr(rq->path, "search.pz2")) - return 1; - return 0; -} - -static int http_proxy(struct http_request *rq) -{ - struct http_channel *c = rq->channel; - struct http_proxy *p = c->proxy; - struct http_header *hp; - struct http_buf *requestbuf; - - if (!p) // This is a new connection. Create a proxy channel - { - int sock; - struct protoent *pe; - int one = 1; - int flags; - - if (!(pe = getprotobyname("tcp"))) { - abort(); - } - if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "socket"); - return -1; - } - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - if ((flags = fcntl(sock, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0) - if (errno != EINPROGRESS) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect"); - return -1; - } - - p = xmalloc(sizeof(struct http_proxy)); - p->oqueue = 0; - p->channel = c; - c->proxy = p; - // We will add EVENT_OUTPUT below - p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT); - iochan_setdata(p->iochan, p); - p->iochan->next = channel_list; - channel_list = p->iochan; - } - - // Modify Host: header - for (hp = rq->headers; hp; hp = hp->next) - if (!strcmp(hp->name, "Host")) - break; - if (!hp) - { - yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); - return -1; - } - hp->value = nmem_strdup(c->nmem, proxy_url); - requestbuf = http_serialize_request(rq); - http_buf_enqueue(&p->oqueue, requestbuf); - iochan_setflag(p->iochan, EVENT_OUTPUT); - return 0; -} - -void http_send_response(struct http_channel *ch) -{ - struct http_response *rs = ch->response; - assert(rs); - struct http_buf *hb = http_serialize_response(ch, rs); - if (!hb) - { - yaz_log(YLOG_WARN, "Failed to serialize HTTP response"); - http_destroy(ch->iochan); - } - else - { - http_buf_enqueue(&ch->oqueue, hb); - iochan_setflag(ch->iochan, EVENT_OUTPUT); - ch->state = Http_Idle; - } -} - -static void http_io(IOCHAN i, int event) -{ - struct http_channel *hc = iochan_getdata(i); - - switch (event) - { - int res, reqlen; - struct http_buf *htbuf; - - case EVENT_INPUT: - htbuf = http_buf_create(); - res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1); - if (res <= 0 && errno != EAGAIN) - { - http_buf_destroy(htbuf); - http_destroy(i); - return; - } - if (res > 0) - { - htbuf->buf[res] = '\0'; - htbuf->len = res; - http_buf_enqueue(&hc->iqueue, htbuf); - } - - if (hc->state == Http_Busy) - return; - - if ((reqlen = request_check(hc->iqueue)) <= 2) - return; - - nmem_reset(hc->nmem); - if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) - { - yaz_log(YLOG_WARN, "Failed to parse request"); - http_destroy(i); - return; - } - hc->response = 0; - yaz_log(YLOG_LOG, "Request: %s %s v %s", hc->request->method, - hc->request->path, hc->request->http_version); - if (http_weshouldproxy(hc->request)) - http_proxy(hc->request); - else - { - // Execute our business logic! - hc->state = Http_Busy; - http_command(hc); - } - if (hc->iqueue) - { - yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); - iochan_setevent(i, EVENT_INPUT); - } - - break; - - case EVENT_OUTPUT: - if (hc->oqueue) - { - struct http_buf *wb = hc->oqueue; - res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); - http_destroy(i); - return; - } - if (res == wb->len) - { - hc->oqueue = hc->oqueue->next; - http_buf_destroy(wb); - } - else - { - wb->len -= res; - wb->offset += res; - } - if (!hc->oqueue) { - if (!strcmp(hc->version, "1.0")) - { - http_destroy(i); - return; - } - else - { - iochan_clearflag(i, EVENT_OUTPUT); - if (hc->iqueue) - iochan_setevent(hc->iochan, EVENT_INPUT); - } - } - } - - if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) - http_destroy(i); // Server closed; we're done - break; - default: - yaz_log(YLOG_WARN, "Unexpected event on connection"); - http_destroy(i); - } -} - -// Handles I/O on a client connection to a backend web server (proxy mode) -static void proxy_io(IOCHAN pi, int event) -{ - struct http_proxy *pc = iochan_getdata(pi); - struct http_channel *hc = pc->channel; - - switch (event) - { - int res; - struct http_buf *htbuf; - - case EVENT_INPUT: - htbuf = http_buf_create(); - res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1); - if (res == 0 || (res < 0 && errno != EINPROGRESS)) - { - if (hc->oqueue) - { - yaz_log(YLOG_WARN, "Proxy read came up short"); - // Close channel and alert client HTTP channel that we're gone - http_buf_destroy(htbuf); - close(iochan_getfd(pi)); - iochan_destroy(pi); - pc->iochan = 0; - } - else - { - http_destroy(hc->iochan); - return; - } - } - else - { - htbuf->buf[res] = '\0'; - htbuf->len = res; - http_buf_enqueue(&hc->oqueue, htbuf); - } - iochan_setflag(hc->iochan, EVENT_OUTPUT); - break; - case EVENT_OUTPUT: - if (!(htbuf = pc->oqueue)) - { - iochan_clearflag(pi, EVENT_OUTPUT); - return; - } - res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); - http_destroy(hc->iochan); - return; - } - if (res == htbuf->len) - { - struct http_buf *np = htbuf->next; - http_buf_destroy(htbuf); - pc->oqueue = np; - } - else - { - htbuf->len -= res; - htbuf->offset += res; - } - - if (!pc->oqueue) { - iochan_setflags(pi, EVENT_INPUT); // Turns off output flag - } - break; - default: - yaz_log(YLOG_WARN, "Unexpected event on connection"); - http_destroy(hc->iochan); - } -} - -// Cleanup channel -static void http_destroy(IOCHAN i) -{ - struct http_channel *s = iochan_getdata(i); - - if (s->proxy) - { - if (s->proxy->iochan) - { - close(iochan_getfd(s->proxy->iochan)); - iochan_destroy(s->proxy->iochan); - } - http_buf_destroy_queue(s->proxy->oqueue); - xfree(s->proxy); - } - s->next = http_channel_freelist; - http_channel_freelist = s; - close(iochan_getfd(i)); - iochan_destroy(i); -} - -static struct http_channel *http_create(void) -{ - struct http_channel *r = http_channel_freelist; - - if (r) - { - http_channel_freelist = r->next; - nmem_reset(r->nmem); - wrbuf_rewind(r->wrbuf); - } - else - { - r = xmalloc(sizeof(struct http_channel)); - r->nmem = nmem_create(); - r->wrbuf = wrbuf_alloc(); - } - r->proxy = 0; - r->iochan = 0; - r->iqueue = r->oqueue = 0; - r->state = Http_Idle; - r->request = 0; - r->response = 0; - return r; -} - - -/* Accept a new command connection */ -static void http_accept(IOCHAN i, int event) -{ - struct sockaddr_in addr; - int fd = iochan_getfd(i); - socklen_t len; - int s; - IOCHAN c; - int flags; - struct http_channel *ch; - - len = sizeof addr; - if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); - return; - } - if ((flags = fcntl(s, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - - yaz_log(YLOG_LOG, "New command connection"); - c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT); - - ch = http_create(); - ch->iochan = c; - iochan_setdata(c, ch); - - c->next = channel_list; - channel_list = c; -} - -/* Create a http-channel listener, syntax [host:]port */ -void http_init(const char *addr) -{ - IOCHAN c; - int l; - struct protoent *p; - struct sockaddr_in myaddr; - int one = 1; - const char *pp; - int port; - - yaz_log(YLOG_LOG, "HTTP listener is %s", addr); - - bzero(&myaddr, sizeof myaddr); - myaddr.sin_family = AF_INET; - pp = strchr(addr, ':'); - if (pp) - { - int len = pp - addr; - char hostname[128]; - struct hostent *he; - - strncpy(hostname, addr, len); - hostname[len] = '\0'; - if (!(he = gethostbyname(hostname))) - { - yaz_log(YLOG_FATAL, "Unable to resolve '%s'", hostname); - exit(1); - } - memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length); - port = atoi(pp + 1); - } - else - { - port = atoi(addr); - myaddr.sin_addr.s_addr = INADDR_ANY; - } - myaddr.sin_port = htons(port); - - if (!(p = getprotobyname("tcp"))) { - abort(); - } - if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); - if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - - if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); - if (listen(l, SOMAXCONN) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); - - c = iochan_create(l, http_accept, EVENT_INPUT | EVENT_EXCEPT); - c->next = channel_list; - channel_list = c; -} - -void http_set_proxyaddr(char *host) -{ - char *p; - int port; - struct hostent *he; - - strcpy(proxy_url, host); - p = strchr(host, ':'); - yaz_log(YLOG_DEBUG, "Proxying for %s", host); - if (p) { - port = atoi(p + 1); - *p = '\0'; - } - else - port = 80; - if (!(he = gethostbyname(host))) - { - fprintf(stderr, "Failed to lookup '%s'\n", host); - exit(1); - } - proxy_addr = xmalloc(sizeof(struct sockaddr_in)); - proxy_addr->sin_family = he->h_addrtype; - memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length); - proxy_addr->sin_port = htons(port); -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/http.h b/http.h deleted file mode 100644 index 7501424..0000000 --- a/http.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef HTTP_H -#define HTTP_H - -// Generic I/O buffer -struct http_buf -{ -#define HTTP_BUF_SIZE 4096 - char buf[4096]; - int offset; - int len; - struct http_buf *next; -}; - -struct http_channel -{ - IOCHAN iochan; - struct http_buf *iqueue; - struct http_buf *oqueue; - char version[10]; - struct http_proxy *proxy; - enum - { - Http_Idle, - Http_Busy // Don't process new HTTP requests while we're busy - } state; - NMEM nmem; - WRBUF wrbuf; - struct http_request *request; - struct http_response *response; - struct http_channel *next; // for freelist -}; - -struct http_proxy // attached to iochan for proxy connection -{ - IOCHAN iochan; - struct http_channel *channel; - struct http_buf *oqueue; -}; - -struct http_header -{ - char *name; - char *value; - struct http_header *next; -}; - -struct http_argument -{ - char *name; - char *value; - struct http_argument *next; -}; - -struct http_request -{ - struct http_channel *channel; - char http_version[20]; - char method[20]; - char *path; - struct http_header *headers; - struct http_argument *arguments; -}; - -struct http_response -{ - char code[4]; - char *msg; - struct http_channel *channel; - struct http_header *headers; - char *payload; -}; - -void http_set_proxyaddr(char *url); -void http_init(const char *addr); -void http_addheader(struct http_response *r, const char *name, const char *value); -char *http_argbyname(struct http_request *r, char *name); -char *http_headerbyname(struct http_request *r, char *name); -struct http_response *http_create_response(struct http_channel *c); -void http_send_response(struct http_channel *c); - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ -#endif diff --git a/http_command.c b/http_command.c deleted file mode 100644 index d3939dc..0000000 --- a/http_command.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * $Id: http_command.c,v 1.9 2006-12-17 13:42:47 quinn Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" -#include "http.h" -#include "http_command.h" - -extern struct parameters global_parameters; -extern IOCHAN channel_list; - -struct http_session { - IOCHAN timeout_iochan; // NOTE: This is NOT associated with a socket - struct session *psession; - unsigned int session_id; - int timestamp; - struct http_session *next; -}; - -static struct http_session *session_list = 0; - -void http_session_destroy(struct http_session *s); - -static void session_timeout(IOCHAN i, int event) -{ - struct http_session *s = iochan_getdata(i); - http_session_destroy(s); -} - -struct http_session *http_session_create() -{ - struct http_session *r = xmalloc(sizeof(*r)); - r->psession = new_session(); - r->session_id = 0; - r->timestamp = 0; - r->next = session_list; - session_list = r; - r->timeout_iochan = iochan_create(-1, session_timeout, 0); - iochan_setdata(r->timeout_iochan, r); - iochan_settimeout(r->timeout_iochan, global_parameters.session_timeout); - r->timeout_iochan->next = channel_list; - channel_list = r->timeout_iochan; - return r; -} - -void http_session_destroy(struct http_session *s) -{ - struct http_session **p; - - for (p = &session_list; *p; p = &(*p)->next) - if (*p == s) - { - *p = (*p)->next; - break; - } - iochan_destroy(s->timeout_iochan); - destroy_session(s->psession); - xfree(s); -} - -static void error(struct http_response *rs, char *code, char *msg, char *txt) -{ - struct http_channel *c = rs->channel; - char tmp[1024]; - - if (!txt) - txt = msg; - rs->msg = nmem_strdup(c->nmem, msg); - strcpy(rs->code, code); - sprintf(tmp, "%s", txt); - rs->payload = nmem_strdup(c->nmem, tmp); - http_send_response(c); -} - -unsigned int make_sessionid() -{ - struct timeval t; - unsigned int res; - static int seq = 0; - - seq++; - if (gettimeofday(&t, 0) < 0) - abort(); - res = t.tv_sec; - res = ((res << 8) | (seq & 0xff)) & ((unsigned int) (1 << 31) - 1); - return res; -} - -static struct http_session *locate_session(struct http_request *rq, struct http_response *rs) -{ - struct http_session *p; - char *session = http_argbyname(rq, "session"); - unsigned int id; - - if (!session) - { - error(rs, "417", "Must supply session", 0); - return 0; - } - id = atoi(session); - for (p = session_list; p; p = p->next) - if (id == p->session_id) - { - iochan_activity(p->timeout_iochan); - return p; - } - error(rs, "417", "Session does not exist, or it has expired", 0); - return 0; -} - -static void cmd_exit(struct http_channel *c) -{ - yaz_log(YLOG_WARN, "exit"); - exit(0); -} - - -static void cmd_init(struct http_channel *c) -{ - unsigned int sesid; - char buf[1024]; - struct http_session *s = http_session_create(); - struct http_response *rs = c->response; - - yaz_log(YLOG_DEBUG, "HTTP Session init"); - sesid = make_sessionid(); - s->session_id = sesid; - sprintf(buf, "OK%u", sesid); - rs->payload = nmem_strdup(c->nmem, buf); - http_send_response(c); -} - -static void cmd_termlist(struct http_channel *c) -{ - struct http_response *rs = c->response; - struct http_request *rq = c->request; - struct http_session *s = locate_session(rq, rs); - struct termlist_score **p; - int len; - int i; - - if (!s) - return; - wrbuf_rewind(c->wrbuf); - - wrbuf_puts(c->wrbuf, ""); - p = termlist(s->psession, &len); - if (p) - for (i = 0; i < len; i++) - { - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s", p[i]->term); - wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); - wrbuf_puts(c->wrbuf, ""); - } - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - - -static void cmd_bytarget(struct http_channel *c) -{ - struct http_response *rs = c->response; - struct http_request *rq = c->request; - struct http_session *s = locate_session(rq, rs); - struct hitsbytarget *ht; - int count, i; - - if (!s) - return; - if (!(ht = hitsbytarget(s->psession, &count))) - { - error(rs, "500", "Failed to retrieve hitcounts", 0); - return; - } - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, "OK"); - - for (i = 0; i < count; i++) - { - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s\n", ht[i].id); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].hits); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].diagnostic); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].records); - wrbuf_printf(c->wrbuf, "%s\n", ht[i].state); - wrbuf_puts(c->wrbuf, ""); - } - - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - -static void show_records(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - struct record **rl; - char *start = http_argbyname(rq, "start"); - char *num = http_argbyname(rq, "num"); - int startn = 0; - int numn = 20; - int total; - int total_hits; - int i; - - if (!s) - return; - - if (start) - startn = atoi(start); - if (num) - numn = atoi(num); - - rl = show(s->psession, startn, &numn, &total, &total_hits); - - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, "\nOK\n"); - wrbuf_printf(c->wrbuf, "%d\n", total); - wrbuf_printf(c->wrbuf, "%d\n", total_hits); - wrbuf_printf(c->wrbuf, "%d\n", startn); - wrbuf_printf(c->wrbuf, "%d\n", numn); - - for (i = 0; i < numn; i++) - { - int ccount; - struct record *p; - - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s\n", rl[i]->title); - for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) - ; - if (ccount > 1) - wrbuf_printf(c->wrbuf, "%d\n", ccount); - wrbuf_puts(c->wrbuf, "\n"); - } - - wrbuf_puts(c->wrbuf, "\n"); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - -static void show_records_ready(void *data) -{ - struct http_channel *c = (struct http_channel *) data; - - show_records(c); -} - -static void cmd_show(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - char *block = http_argbyname(rq, "block"); - - if (!s) - return; - - if (block) - { - if (!s->psession->reclist || !s->psession->reclist->num_records) - { - session_set_watch(s->psession, SESSION_WATCH_RECORDS, show_records_ready, c); - yaz_log(YLOG_DEBUG, "Blocking on cmd_show"); - return; - } - } - - show_records(c); -} - -static void cmd_ping(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - if (!s) - return; - rs->payload = "OK"; - http_send_response(c); -} - -static void cmd_search(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - char *query = http_argbyname(rq, "query"); - char *res; - - if (!s) - return; - if (!query) - { - error(rs, "417", "Must supply query", 0); - return; - } - res = search(s->psession, query); - if (res) - { - error(rs, "417", res, res); - return; - } - rs->payload = "OK"; - http_send_response(c); -} - - -static void cmd_stat(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - struct statistics stat; - - if (!s) - return; - - statistics(s->psession, &stat); - - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, ""); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_hits); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_records); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_clients); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_no_connection); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_connecting); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_initializing); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_searching); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_presenting); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_idle); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_failed); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_error); - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - - -struct { - char *name; - void (*fun)(struct http_channel *c); -} commands[] = { - { "init", cmd_init }, - { "stat", cmd_stat }, - { "bytarget", cmd_bytarget }, - { "show", cmd_show }, - { "search", cmd_search }, - { "termlist", cmd_termlist }, - { "exit", cmd_exit }, - { "ping", cmd_ping }, - {0,0} -}; - -void http_command(struct http_channel *c) -{ - char *command = http_argbyname(c->request, "command"); - struct http_response *rs = http_create_response(c); - int i; - - c->response = rs; - if (!command) - { - error(rs, "417", "Must supply command", 0); - return; - } - for (i = 0; commands[i].name; i++) - if (!strcmp(commands[i].name, command)) - { - (*commands[i].fun)(c); - break; - } - if (!commands[i].name) - error(rs, "417", "Unknown command", 0); - - return; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/http_command.h b/http_command.h deleted file mode 100644 index e127925..0000000 --- a/http_command.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef HTTP_COMMAND_H -#define HTTP_COMMAND - -#include "http.h" - -void http_command(struct http_channel *c); - -#endif diff --git a/many.pz b/many.pz deleted file mode 100644 index db1bd9c..0000000 --- a/many.pz +++ /dev/null @@ -1,299 +0,0 @@ -target 12.4.242.230:210/Unicorn -target 128.119.168.1:210/INNOPAC -target 128.119.168.2:210/INNOPAC -target 128.197.130.200:210/INNOPAC -target 128.218.15.173:210/INNOPAC -target 128.83.82.249:210/INNOPAC -target 129.234.4.6:210/INNOPAC -target 129.65.20.27:210/Innopac -target 129.78.72.7:210/INNOPAC -target 130.102.42.45:210/Innopac -target 130.111.64.52:210/INNOPAC -target 130.206.134.239:210/INNOPAC -target 130.212.18.200:210/Innopac -target 130.91.144.191:210/INNOPAC -target 131.165.177.76:2100/S -target 131.165.97.2:210/S -target 131.232.13.6:210/INNOPAC -target 134.115.152.130:210/Innopac -target 134.154.30.10:210/Innopac -target 137.187.166.250:210/INNOPAC -target 137.45.212.100:210/INNOPAC -target 141.215.16.4:210/INNOPAC -target 144.37.1.4:210/Innopac -target 147.143.2.5:210/INNOPAC -target 147.175.67.227:1111/DEFAULT -target 147.175.67.227:2222/DEFAULT -target 147.29.116.113:2100/DAB01 -target 147.29.116.113:2100/KGB01 -target 147.29.116.113:2100/PVS -target 147.29.116.113:2100/vej01 -target 149.130.90.2:210/INNOPAC -target 149.175.20.20:210/INNOPAC -target 161.210.213.100:210/INNOPAC -target 169.139.225.46:210/horizon -target 192.188.131.54:2200/UNICORN -target 192.197.190.4:210/Advance -target 192.65.218.23:210/DREWDB -target 193.162.157.130:2100/S -target 193.43.102.194:21210/ADVANCE -target 193.52.199.5:21210/ADVANCE -target 193.59.172.100:210/INNOPAC -target 194.182.134.04:210/S -target 194.239.156.133:2100/S -target 195.215.203.98:2100/S -target 195.249.206.204:210/Default -target 195.249.206.204:210/default -target 195.41.8.218:2100/S -target 195.50.60.15:2100/CB -target 195.50.60.15:2101/FB -target 195.50.60.15:2102/MB -target 195.50.60.15:2103/PB -target 196.6.221.16:210/INNOPAC -target 198.62.73.72:210/INNOPAC -target 199.184.19.10:210/innopac -target 199.88.71.7:210/INNOPAC -target 202.14.152.4:210/INNOPAC -target 202.40.151.2:210/INNOPAC -target 202.40.216.17:210/INNOPAC -target 206.48.3.167:7090/Voyager -target 207.194.254.193:210/advance -target Exlibris.albertslundbib.dk:2100/S -target Exlibris.daimi.au.dk:2104/FYS01 -target Exlibris.daimi.au.dk:2104/GEO01 -target Exlibris.daimi.au.dk:2104/KEM01 -target Exlibris.daimi.au.dk:2104/MAT01 -target Exlibris.daimi.au.dk:2104/VID01 -target Z3950.maribo.integrabib.dk:210/Default -target Z3950cat.bl.uk:9909/BLAC -target acorn.library.vanderbilt.edu:2200/ACORN -target advance.biblio.polymtl.ca:210/ADVANCE -target advance.lib.rmit.edu.au:21210/Advance -target albert.rit.edu:210/INNOPAC -target aleph.mcgill.ca:210/MUSE -target aleph.unibas.ch:9909/DSV01 -target alpha.svkol.cz:9909/SVK01 -target alpha.svkol.cz:9909/SVK02 -target atrium.bib.umontreal.ca:210/ADVANCE -target bagel.indexdata.dk:210/gils -target bagel.indexdata.dk:210/marc -target bcr1.larioja.org:210/AbsysE -target bib.gladsaxe.dk:2100/S -target bib.gladsaxe.dk:2101/H -target bib.gladsaxe.dk:2101/S -target biblio.crusca.fi.it:9909/ADC01 -target biblio.hec.ca:210/hec -target biblios.dic.uchile.cl:210/BELLO -target biblioteca.uc3m.es:2200/unicorn -target bobcat.nyu.edu:210/ADVANCE -target books.luther.edu:210/innopac -target brocar.unavarra.es:9999/libros -target brocar.unavarra.es:9999/revistas -target cat.cisti.nrc.ca:210/INNOPAC -target cat.lib.grin.edu:210/innopac -target catalog.bedfordlibrary.org:210/Innopac -target catalog.crl.edu:210/INNOPAC -target catalog.lib.jhu.edu:210/horizon -target cisne.sim.ucm.es:210/INNOPAC -target clavis.ucalgary.ca:2200/UNICORN -target cornelia.whoi.edu:7090/VOYAGER -target corpsgeo1.usace.army.mil:2210/geo1 -target csulib.ctstateu.edu:210/INNOPAC -target echea.ru.ac.za:210/INNOPAC -target edcsns17.cr.usgs.gov:6675/AVHRR -target edcsns17.cr.usgs.gov:6675/DOQ -target edcsns17.cr.usgs.gov:6675/HAZARDS -target edcsns17.cr.usgs.gov:6675/LANDSAT_MSS -target edcsns17.cr.usgs.gov:6675/LANDSAT_TM -target edcsns17.cr.usgs.gov:6675/NAPP -target edcsns17.cr.usgs.gov:6675/NHAP -target eleanor.lib.gla.ac.uk:210/INNOPAC -target eusa.library.net:5666/eusa -target explore.up.ac.za:210/INNOPAC -target gaudi.ua.es:2200/unicorn -target gb.glostrupbib.dk:2100/S -target hcb.bibnet.dk:2100/S -target helios.nlib.ee:210/INNOPAC -target hermes.lib.cbs.dk:2100/S -target hkbulib.hkbu.edu.hk:210/INNOPAC -target ikast.ikast.bibnet.dk:2100/S -target indexgeo.com.au:6668/dataset -target indexgeo.net:5511/act -target info.library.mun.ca:2200/UNICORN -target innopac.lib.kth.se:210/innopac -target innopac.wits.ac.za:210/INNOPAC -target itec.mty.itesm.mx:210/innopac -target jasper.acadiau.ca:2200/UNICORN -target jenda.lib.nccu.edu.tw:210/INNOPAC -target kat.vkol.cz:9909/SVK01 -target kat.vkol.cz:9909/SVK02 -target kraka.birkerod.bibnet.dk:2100/S -target ksclib.keene.edu:210/INNOPAC -target l1.uwaterloo.ca:7090/VOYAGER -target lance.missouri.edu:210/INNOPAC -target lib.leeds.ac.uk:210/INNOPAC -target lib.soas.ac.uk:210/innopac -target libadfa.adfa.edu.au:7090/VOYAGER -target libcat.qut.edu.au:210/INNOPAC -target liber.acadlib.lv:211/codex.previous -target liber.acadlib.lv:211/liber1 -target liber.acadlib.lv:212/liber1 -target library.anu.edu.au:210/INNOPAC -target library.ballarat.edu.au:210/INNOPAC -target library.bangor.ac.uk:210/INNOPAC -target library.bilgi.edu.tr:210/INNOPAC -target library.brad.ac.uk:210/xxDefault -target library.brunel.ac.uk:2200/UNICORN -target library.daemen.edu:210/innopac -target library.deakin.edu.au:210/INNOPAC -target library.gu.edu.au:21210/ADVANCE -target library.hud.ac.uk:210/HORIZON -target library.hull.ac.uk:210/INNOPAC -target library.latrobe.edu.au:210/INNOPAC -target library.lbc.edu:7090/voyager -target library.mdx.ac.uk:210/mdx -target library.newcastle.edu.au:210/INNOPAC -target library.ox.ac.uk:210/ADVANCE -target library.tcd.ie:210/advance -target library.ucc.ie:210/INNOPAC -target library.uh.edu:210/INNOPAC -target library.vu.edu.au:210/INNOPAC -target library2.open.ac.uk:7090/voyager -target libsys.lib.hull.ac.uk:210/INNOPAC -target libuni01.ccc.govt.nz:2200/unicorn -target libuni01.ccc.govt.nz:2220/cinch -target libuni01.ccc.govt.nz:2220/papers -target libunix.ku.edu.tr:210/INNOPAC -target linc.nus.edu.sg:210/innopac -target lion.swem.wm.edu:2200/unicorn -target loke.dcbib.dk:2100/S -target lrpapp.cc.umanitoba.ca:2200/unicorn -target malad2.mala.bc.ca:2200/UNICORN -target malad2.mala.bc.ca:2200/unicorn -target marte.biblioteca.upm.es:2200/unicorn -target medupe.ub.bw:210/INNOPAC -target melvyl.cdlib.org:210/CDL90 -target mercury.concordia.ca:210/Innopac -target merihobu.utlib.ee:210/INNOPAC -target merlinweb.ville.montreal.qc.ca:2100/Z3950S -target nbinet.ncl.edu.tw:210/INNOPAC -target ncsulib.lib.ncsu.edu:210/MARION -target newlib.ci.lubbock.tx.us:2200/unicorn -target nobis.njl.dk:210/S -target nrhcat.library.nrhtx.com:210/INNOPAC -target oda.fynbib.dk:2100/S -target odin2.bib.sdu.dk:210/Horizon -target odin2.bib.sdu.dk:210/otb -target opac.nebis.ch:9909/NEBIS -target opac.sbn.it:2100/nopac -target opac.sbn.it:3950/nopac -target opac.shu.ac.uk:210/INNOPAC -target opac.unifi.it:210/OPAC -target opac.utmem.edu:210/INNOPAC -target orac.lib.uts.edu.au:210/INNOPAC -target pollux.dslak.co.nz:210/MARNSL -target prodorbis.library.yale.edu:7090/voyager -target quest.unb.ca:2200/unicorn -target rebiun.crue.org:210/absysREBIUN -target roble.unizar.es:210/INNOPAC -target rs6000.nshpl.library.ns.ca:210/AVR -target rs6000.nshpl.library.ns.ca:210/CBR -target rs6000.nshpl.library.ns.ca:210/CEH -target rs6000.nshpl.library.ns.ca:210/CUR -target rs6000.nshpl.library.ns.ca:210/ECR -target rs6000.nshpl.library.ns.ca:210/NSP -target rs6000.nshpl.library.ns.ca:210/PAR -target rs6000.nshpl.library.ns.ca:210/SSR -target rs6000.nshpl.library.ns.ca:210/WCR -target sabio.library.arizona.edu:210/innopac -target salty.slcpl.lib.ut.us:210/INNOPAC -target scotty.mhsl.uab.edu:7090/VOYAGER -target serapis.leedsmet.ac.uk:2200/unicorn -target serapis.lmu.ac.uk:2200/unicorn -target sflwww.er.usgs.gov:251/sflwwwmeta -target silkbib.bib.dk:2100/S -target sirsi.library.utoronto.ca:2200/UNICORN -target star.tsl.state.tx.us:2200/unicorn -target strife.library.uwa.edu.au:210/INNOPAC -target strife.library.uwa.edu.au:210/innopac -target sundog.usask.ca:210/INNOPAC -target tegument.nlm.nih.gov:7090/VOYAGER -target titus.folger.edu:7090/VOYAGER -target tora.htk.dk:2100/S -target troy.lib.sfu.ca:210/INNOPAC -target unicorn.lib.ic.ac.uk:2200/IC -target unicorn.qmced.ac.uk:2200/unicorn -target unicornio.umb.edu.co:2200/Unicorn -target ustlib.ust.hk:210/INNOPAC -target vax.lib.state.ca.us:210/marion -target vlsirsi.rdg.ac.uk:2200/UNICORN -target voyager.its.csiro.au:7090/VOYAGER -target voyager.tcs.tulane.edu:7090/VOYAGER -target voyager.wrlc.org:7090/VOYAGER -target www.agralin.nl:210/clcz3950 -target www.bibliotek.taarnby.dk:210/S -target www.congreso.es:2100/ABSYSBCD -target www.csc.noaa.gov:2210/CSC_Products -target www.eevl.ac.uk:2100/eevlacuk -target www.grimes.lib.ia.us:210/main -target www.knihovna.cz:8888/un_cat -target www.library.nps.gov:7090/VOYAGER -target www.sbn.it:2100/nopac -target www.scran.ac.uk:3950/default -target yulib001.mc.yu.edu:1111/DEFAULT -target z39.libis.lt:210/KNYGOS -target z3950.ahds.ac.uk:210/CMF -target z3950.ahds.ac.uk:210/DC -target z3950.bcl.jcyl.es:210/AbsysBCL -target z3950.bcl.jcyl.es:210/AbsysCCFL -target z3950.bergen.folkebibl.no:210/data -target z3950.biblos.pk.edu.pl:4210/books -target z3950.bibsys.no:2100/BIBSYS -target z3950.bibsys.no:2100/PERI -target z3950.copac.ac.uk:210/copac -target z3950.copac.ac.uk:2100/COPAC -target z3950.deich.folkebibl.no:210/data -target z3950.dragsholm.integrabib.dk:210/default -target z3950.fcla.edu:210/CF -target z3950.fcla.edu:210/FA -target z3950.fcla.edu:210/FI -target z3950.fcla.edu:210/FO -target z3950.fcla.edu:210/QB -target z3950.fcla.edu:210/RF -target z3950.fcla.edu:210/SR -target z3950.fcla.edu:210/ST -target z3950.gbv.de:20010/GVK -target z3950.gbv.de:20011/GVK -target z3950.gbv.de:20011/GVK -target z3950.gbv.de:210/GVK -target z3950.gbv.de:210/GVK -target z3950.haderslev.integrabib.dk:210/default -target z3950.haslev.integrabib.dk:210/Default -target z3950.hoerning.integrabib.dk:210/default -target z3950.kb.dk:2100/BGF01 -target z3950.kb.dk:2100/BHS01 -target z3950.kb.dk:2100/DRA01 -target z3950.kb.dk:2100/KBB01 -target z3950.kb.dk:2100/KBH01 -target z3950.kb.dk:2100/KGL01 -target z3950.libh.uoc.gr:210/default -target z3950.libr.uoc.gr:210/logios -target z3950.libr.uoc.gr:210/ptolemeos_ii -target z3950.library.wisc.edu:210/madison -target z3950.libris.kb.se:210/libr -target z3950.lillehammer.folkebibl.no:210/data -target z3950.loc.gov:7090/voyager -target z3950.nakskov.integrabib.dk:210/default -target z3950.nb.no:2100/Norbok -target z3950.nb.no:2100/Norper -target z3950.nb.no:2100/Sambok -target z3950.nls.uk:7290/voyager -target z3950.ovid.com:2213/eric -target z3950.ovid.com:2213/pmed -target z3950.rdn.ac.uk:210/xxdefault -target z3950.ringsted.integrabib.dk:210/Default -target z3950.skagen.integrabib.dk:210/default -target z3950.sydfalster.integrabib.dk:210/default -target z3950.trondheim.folkebibl.no:210/data -target z3950.vallensbaek.integrabib.dk:210/default -target z3950.vejen.integrabib.dk:210/default diff --git a/pazpar2.c b/pazpar2.c deleted file mode 100644 index 4105ba2..0000000 --- a/pazpar2.c +++ /dev/null @@ -1,1395 +0,0 @@ -/* $Id: pazpar2.c,v 1.17 2006-12-19 04:49:34 quinn Exp $ */; - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "pazpar2.h" -#include "eventl.h" -#include "command.h" -#include "http.h" -#include "termlists.h" -#include "reclists.h" -#include "relevance.h" - -#define PAZPAR2_VERSION "0.1" -#define MAX_CHUNK 15 - -static void client_fatal(struct client *cl); -static void connection_destroy(struct connection *co); -static int client_prep_connection(struct client *cl); -static void ingest_records(struct client *cl, Z_Records *r); -void session_alert_watch(struct session *s, int what); - -IOCHAN channel_list = 0; // Master list of connections we're handling events to - -static struct connection *connection_freelist = 0; -static struct client *client_freelist = 0; - -static struct host *hosts = 0; // The hosts we know about -static struct database *databases = 0; // The databases we know about - -static char *client_states[] = { - "Client_Connecting", - "Client_Connected", - "Client_Idle", - "Client_Initializing", - "Client_Searching", - "Client_Presenting", - "Client_Error", - "Client_Failed", - "Client_Disconnected", - "Client_Stopped" -}; - -struct parameters global_parameters = -{ - 30, - "81", - "Index Data PazPar2 (MasterKey)", - PAZPAR2_VERSION, - 600, // 10 minutes - 60, - 100, - MAX_CHUNK, - 0, - 0, - 0, - 0 -}; - - -static int send_apdu(struct client *c, Z_APDU *a) -{ - struct connection *co = c->connection; - char *buf; - int len, r; - - if (!z_APDU(global_parameters.odr_out, &a, 0, 0)) - { - odr_perror(global_parameters.odr_out, "Encoding APDU"); - abort(); - } - buf = odr_getbuf(global_parameters.odr_out, &len, 0); - r = cs_put(co->link, buf, len); - if (r < 0) - { - yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(co->link))); - return -1; - } - else if (r == 1) - { - fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n"); - exit(1); - } - odr_reset(global_parameters.odr_out); /* release the APDU structure */ - co->state = Conn_Waiting; - return 0; -} - - -static void send_init(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_initRequest); - - a->u.initRequest->implementationId = global_parameters.implementationId; - a->u.initRequest->implementationName = global_parameters.implementationName; - a->u.initRequest->implementationVersion = - global_parameters.implementationVersion; - ODR_MASK_SET(a->u.initRequest->options, Z_Options_search); - ODR_MASK_SET(a->u.initRequest->options, Z_Options_present); - ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets); - - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1); - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2); - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3); - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Initializing; - } - else - cl->state = Client_Error; - odr_reset(global_parameters.odr_out); -} - -static void send_search(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = cl->session; - struct database *db = cl->database; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest); - int ndb, cerror, cpos; - char **databaselist; - Z_Query *zquery; - struct ccl_rpn_node *cn; - int ssub = 0, lslb = 100000, mspn = 10; - - yaz_log(YLOG_DEBUG, "Sending search"); - - cn = ccl_find_str(global_parameters.ccl_filter, se->query, &cerror, &cpos); - if (!cn) - return; - a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out, - sizeof(Z_Query)); - zquery->which = Z_Query_type_1; - zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn); - ccl_rpn_delete(cn); - - for (ndb = 0; *db->databases[ndb]; ndb++) - ; - databaselist = odr_malloc(global_parameters.odr_out, sizeof(char*) * ndb); - for (ndb = 0; *db->databases[ndb]; ndb++) - databaselist[ndb] = db->databases[ndb]; - - a->u.presentRequest->preferredRecordSyntax = - yaz_oidval_to_z3950oid(global_parameters.odr_out, - CLASS_RECSYN, VAL_USMARC); - a->u.searchRequest->smallSetUpperBound = &ssub; - a->u.searchRequest->largeSetLowerBound = &lslb; - a->u.searchRequest->mediumSetPresentNumber = &mspn; - a->u.searchRequest->resultSetName = "Default"; - a->u.searchRequest->databaseNames = databaselist; - a->u.searchRequest->num_databaseNames = ndb; - - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Searching; - cl->requestid = se->requestid; - } - else - cl->state = Client_Error; - - odr_reset(global_parameters.odr_out); -} - -static void send_present(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_presentRequest); - int toget; - int start = cl->records + 1; - - toget = global_parameters.chunk; - if (toget > cl->hits - cl->records) - toget = cl->hits - cl->records; - - yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget); - - a->u.presentRequest->resultSetStartPoint = &start; - a->u.presentRequest->numberOfRecordsRequested = &toget; - - a->u.presentRequest->resultSetId = "Default"; - - a->u.presentRequest->preferredRecordSyntax = - yaz_oidval_to_z3950oid(global_parameters.odr_out, - CLASS_RECSYN, VAL_USMARC); - - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Presenting; - } - else - cl->state = Client_Error; - odr_reset(global_parameters.odr_out); -} - -static void do_initResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_InitResponse *r = a->u.initResponse; - - yaz_log(YLOG_DEBUG, "Received init response"); - - if (*r->result) - { - cl->state = Client_Idle; - } - else - cl->state = Client_Failed; // FIXME need to do something to the connection -} - -static void do_searchResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = cl->session; - Z_SearchResponse *r = a->u.searchResponse; - - yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus); - - if (*r->searchStatus) - { - cl->hits = *r->resultCount; - se->total_hits += cl->hits; - if (r->presentStatus && !*r->presentStatus && r->records) - { - yaz_log(YLOG_DEBUG, "Records in search response"); - cl->records += *r->numberOfRecordsReturned; - ingest_records(cl, r->records); - } - cl->state = Client_Idle; - } - else - { /*"FAILED"*/ - cl->hits = 0; - cl->state = Client_Error; - if (r->records) { - Z_Records *recs = r->records; - if (recs->which == Z_Records_NSD) - { - yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); - cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; - cl->state = Client_Error; - } - } - } -} - -const char *find_field(const char *rec, const char *field) -{ - char lbuf[5]; - char *line; - - lbuf[0] = '\n'; - strcpy(lbuf + 1, field); - - if ((line = strstr(rec, lbuf))) - return ++line; - else - return 0; -} - -const char *find_subfield(const char *field, char subfield) -{ - const char *p = field; - - while (*p && *p != '\n') - { - while (*p != '\n' && *p != '\t') - p++; - if (*p == '\t' && *(++p) == subfield) { - if (*(++p) == ' ') - { - while (isspace(*p)) - p++; - return p; - } - } - } - return 0; -} - -// Extract 245 $a $b 100 $a -char *extract_title(struct session *s, const char *rec) -{ - const char *field, *subfield; - char *e, *ef; - unsigned char *obuf, *p; - - wrbuf_rewind(s->wrbuf); - - if (!(field = find_field(rec, "245"))) - return 0; - if (!(subfield = find_subfield(field, 'a'))) - return 0; - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_write(s->wrbuf, subfield, ef - subfield); - if ((subfield = find_subfield(field, 'b'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_putc(s->wrbuf, ' '); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - if ((field = find_field(rec, "100"))) - { - if ((subfield = find_subfield(field, 'a'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, ", by "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - wrbuf_putc(s->wrbuf, '\0'); - obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf)); - for (p = obuf; *p; p++) - if (*p == '&' || *p == '<' || *p > 122 || *p < ' ') - *p = ' '; - return (char*) obuf; -} - -// Extract 245 $a $b 100 $a -char *extract_mergekey(struct session *s, const char *rec) -{ - const char *field, *subfield; - char *e, *ef; - char *out, *p, *pout; - - wrbuf_rewind(s->wrbuf); - - if (!(field = find_field(rec, "245"))) - return 0; - if (!(subfield = find_subfield(field, 'a'))) - return 0; - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_write(s->wrbuf, subfield, ef - subfield); - if ((subfield = find_subfield(field, 'b'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, " field "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - if ((field = find_field(rec, "100"))) - { - if ((subfield = find_subfield(field, 'a'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, " field "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - wrbuf_putc(s->wrbuf, '\0'); - p = wrbuf_buf(s->wrbuf); - out = pout = nmem_malloc(s->nmem, strlen(p) + 1); - - while (*p) - { - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); - while (*p && !isalnum(*p)) - p++; - *(pout++) = ' '; - } - if (out != pout) - *(--pout) = '\0'; - - return out; -} - -#ifdef RECHEAP -static void push_record(struct session *s, struct record *r) -{ - int p; - assert(s->recheap_max + 1 < s->recheap_size); - - s->recheap[p = ++s->recheap_max] = r; - while (p > 0) - { - int parent = (p - 1) >> 1; - if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0) - { - struct record *tmp; - tmp = s->recheap[parent]; - s->recheap[parent] = s->recheap[p]; - s->recheap[p] = tmp; - p = parent; - } - else - break; - } -} - -static struct record *top_record(struct session *s) -{ - return s-> recheap_max >= 0 ? s->recheap[0] : 0; -} - -static struct record *pop_record(struct session *s) -{ - struct record *res; - int p = 0; - int lastnonleaf = (s->recheap_max - 1) >> 1; - - if (s->recheap_max < 0) - return 0; - - res = s->recheap[0]; - - s->recheap[p] = s->recheap[s->recheap_max--]; - - while (p <= lastnonleaf) - { - int right = (p + 1) << 1; - int left = right - 1; - int min = left; - - if (right < s->recheap_max && - strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0) - min = right; - if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0) - { - struct record *tmp = s->recheap[min]; - s->recheap[min] = s->recheap[p]; - s->recheap[p] = tmp; - p = min; - } - else - break; - } - return res; -} - -// Like pop_record but collapses identical (merge_key) records -// The heap will contain multiple independent matching records and possibly -// one cluster, created the last time the list was scanned -static struct record *pop_mrecord(struct session *s) -{ - struct record *this; - struct record *next; - - if (!(this = pop_record(s))) - return 0; - - // Collapse identical records - while ((next = top_record(s))) - { - struct record *p, *tmpnext; - if (strcmp(this->merge_key, next->merge_key)) - break; - // Absorb record (and clustersiblings) into a supercluster - for (p = next; p; p = tmpnext) { - tmpnext = p->next_cluster; - p->next_cluster = this->next_cluster; - this->next_cluster = p; - } - - pop_record(s); - } - return this; -} - -// Reads records in sort order. Store records in top of heapspace until rewind is called. -static struct record *read_recheap(struct session *s) -{ - struct record *r = pop_mrecord(s); - - if (r) - { - if (s->recheap_scratch < 0) - s->recheap_scratch = s->recheap_size; - s->recheap[--s->recheap_scratch] = r; - } - - return r; -} - -// Return records to heap after read -static void rewind_recheap(struct session *s) -{ - while (s->recheap_scratch >= 0) { - push_record(s, s->recheap[s->recheap_scratch++]); - if (s->recheap_scratch >= s->recheap_size) - s->recheap_scratch = -1; - } -} - -#endif - -// FIXME needs to be generalized. Should flexibly generate X lists per search -static void extract_subject(struct session *s, const char *rec) -{ - const char *field, *subfield; - - while ((field = find_field(rec, "650"))) - { - rec = field; - if ((subfield = find_subfield(field, 'a'))) - { - char *e, *ef; - char buf[1024]; - int len; - - ef = index(subfield, '\n'); - if (!ef) - return; - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')') - ef--; - len = ef - subfield; - assert(len < 1023); - memcpy(buf, subfield, len); - buf[len] = '\0'; - if (*buf) - termlist_insert(s->termlist, buf); - } - } -} - -static void pull_relevance_field(struct session *s, struct record *head, const char *rec, - char *field, int mult) -{ - const char *fb; - while ((fb = find_field(rec, field))) - { - char *ffield = strchr(fb, '\t'); - if (!ffield) - return; - char *eol = strchr(ffield, '\n'); - if (!eol) - return; - relevance_countwords(s->relevance, head, ffield, eol - ffield, mult); - rec = field + 1; // Crude way to cause a loop through repeating fields - } -} - -static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec) -{ - relevance_newrec(s->relevance, head); - pull_relevance_field(s, head, rec->buf, "100", 2); - pull_relevance_field(s, head, rec->buf, "245", 4); - //pull_relevance_field(s, head, rec->buf, "530", 1); - pull_relevance_field(s, head, rec->buf, "630", 1); - pull_relevance_field(s, head, rec->buf, "650", 1); - pull_relevance_field(s, head, rec->buf, "700", 1); - relevance_donerecord(s->relevance, head); -} - -static struct record *ingest_record(struct client *cl, char *buf, int len) -{ - struct session *se = cl->session; - struct record *res; - struct record *head; - const char *recbuf; - - wrbuf_rewind(se->wrbuf); - yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE); - if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0) - { - yaz_log(YLOG_WARN, "Failed to decode MARC record"); - return 0; - } - wrbuf_putc(se->wrbuf, '\0'); - recbuf = wrbuf_buf(se->wrbuf); - - res = nmem_malloc(se->nmem, sizeof(struct record)); - res->buf = nmem_strdup(se->nmem, recbuf); - - extract_subject(se, res->buf); - - res->title = extract_title(se, res->buf); - res->merge_key = extract_mergekey(se, res->buf); - if (!res->merge_key) - return 0; - res->client = cl; - res->next_cluster = 0; - res->target_offset = -1; - res->term_frequency_vec = 0; - - head = reclist_insert(se->reclist, res); - - pull_relevance_keys(se, head, res); - - se->total_records++; - - return res; -} - -static void ingest_records(struct client *cl, Z_Records *r) -{ - struct record *rec; - struct session *s = cl->session; - Z_NamePlusRecordList *rlist; - int i; - - if (r->which != Z_Records_DBOSD) - return; - rlist = r->u.databaseOrSurDiagnostics; - for (i = 0; i < rlist->num_records; i++) - { - Z_NamePlusRecord *npr = rlist->records[i]; - Z_External *e; - char *buf; - int len; - - if (npr->which != Z_NamePlusRecord_databaseRecord) - { - yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic"); - continue; - } - e = npr->u.databaseRecord; - if (e->which != Z_External_octet) - { - yaz_log(YLOG_WARN, "Unexpected external branch, probably BER"); - continue; - } - buf = (char*) e->u.octet_aligned->buf; - len = e->u.octet_aligned->len; - - rec = ingest_record(cl, buf, len); - if (!rec) - continue; - } - if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records) - session_alert_watch(s, SESSION_WATCH_RECORDS); -} - -static void do_presentResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_PresentResponse *r = a->u.presentResponse; - - if (r->records) { - Z_Records *recs = r->records; - if (recs->which == Z_Records_NSD) - { - yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); - cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; - cl->state = Client_Error; - } - } - - if (!*r->presentStatus && cl->state != Client_Error) - { - yaz_log(YLOG_DEBUG, "Good Present response"); - cl->records += *r->numberOfRecordsReturned; - ingest_records(cl, r->records); - cl->state = Client_Idle; - } - else if (*r->presentStatus) - { - yaz_log(YLOG_WARN, "Bad Present response"); - cl->state = Client_Error; - } -} - -static void handler(IOCHAN i, int event) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = 0; - - if (cl) - se = cl->session; - else - { - yaz_log(YLOG_WARN, "Destroying orphan connection (fix me?)"); - connection_destroy(co); - return; - } - - if (co->state == Conn_Connecting && event & EVENT_OUTPUT) - { - int errcode; - socklen_t errlen = sizeof(errcode); - - if (getsockopt(cs_fileno(co->link), SOL_SOCKET, SO_ERROR, &errcode, - &errlen) < 0 || errcode != 0) - { - client_fatal(cl); - return; - } - else - { - yaz_log(YLOG_DEBUG, "Connect OK"); - co->state = Conn_Open; - if (cl) - cl->state = Client_Connected; - } - } - - else if (event & EVENT_INPUT) - { - int len = cs_get(co->link, &co->ibuf, &co->ibufsize); - - if (len < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Error reading from Z server"); - connection_destroy(co); - return; - } - else if (len == 0) - { - yaz_log(YLOG_WARN, "EOF reading from Z server"); - connection_destroy(co); - return; - } - else if (len > 1) // We discard input if we have no connection - { - co->state = Conn_Open; - - if (cl && (cl->requestid == se->requestid || cl->state == Client_Initializing)) - { - Z_APDU *a; - - odr_reset(global_parameters.odr_in); - odr_setbuf(global_parameters.odr_in, co->ibuf, len, 0); - if (!z_APDU(global_parameters.odr_in, &a, 0, 0)) - { - client_fatal(cl); - return; - } - switch (a->which) - { - case Z_APDU_initResponse: - do_initResponse(i, a); - break; - case Z_APDU_searchResponse: - do_searchResponse(i, a); - break; - case Z_APDU_presentResponse: - do_presentResponse(i, a); - break; - default: - yaz_log(YLOG_WARN, "Unexpected result from server"); - client_fatal(cl); - return; - } - // We aren't expecting staggered output from target - // if (cs_more(t->link)) - // iochan_setevent(i, EVENT_INPUT); - } - else // we throw away response and go to idle mode - { - yaz_log(YLOG_DEBUG, "Ignoring result of expired operation"); - cl->state = Client_Idle; - } - } - /* if len==1 we do nothing but wait for more input */ - } - - if (cl->state == Client_Connected) { - send_init(i); - } - - if (cl->state == Client_Idle) - { - if (cl->requestid != se->requestid && *se->query) { - send_search(i); - } - else if (cl->hits > 0 && cl->records < global_parameters.toget && - cl->records < cl->hits) { - send_present(i); - } - } -} - -// Disassociate connection from client -static void connection_release(struct connection *co) -{ - struct client *cl = co->client; - - yaz_log(YLOG_DEBUG, "Connection release %s", co->host->hostport); - if (!cl) - return; - cl->connection = 0; - co->client = 0; -} - -// Close connection and recycle structure -static void connection_destroy(struct connection *co) -{ - struct host *h = co->host; - cs_close(co->link); - iochan_destroy(co->iochan); - - yaz_log(YLOG_DEBUG, "Connection destroy %s", co->host->hostport); - if (h->connections == co) - h->connections = co->next; - else - { - struct connection *pco; - for (pco = h->connections; pco && pco->next != co; pco = pco->next) - ; - if (pco) - pco->next = co->next; - else - abort(); - } - if (co->client) - { - if (co->client->state != Client_Idle) - co->client->state = Client_Disconnected; - co->client->connection = 0; - } - co->next = connection_freelist; - connection_freelist = co; -} - -// Creates a new connection for client, associated with the host of -// client's database -static struct connection *connection_create(struct client *cl) -{ - struct connection *new; - COMSTACK link; - int res; - void *addr; - - yaz_log(YLOG_DEBUG, "Connection create %s", cl->database->url); - if (!(link = cs_create(tcpip_type, 0, PROTO_Z3950))) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack"); - exit(1); - } - - if (!(addr = cs_straddr(link, cl->database->host->ipport))) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address failed?"); - return 0; - } - - res = cs_connect(link, addr); - if (res < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "cs_connect %s", cl->database->url); - return 0; - } - - if ((new = connection_freelist)) - connection_freelist = new->next; - else - { - new = xmalloc(sizeof (struct connection)); - new->ibuf = 0; - new->ibufsize = 0; - } - new->state = Conn_Connecting; - new->host = cl->database->host; - new->next = new->host->connections; - new->host->connections = new; - new->client = cl; - cl->connection = new; - new->link = link; - - new->iochan = iochan_create(cs_fileno(link), handler, 0); - iochan_setdata(new->iochan, new); - new->iochan->next = channel_list; - channel_list = new->iochan; - return new; -} - -// Close connection and set state to error -static void client_fatal(struct client *cl) -{ - yaz_log(YLOG_WARN, "Fatal error from %s", cl->database->url); - connection_destroy(cl->connection); - cl->state = Client_Error; -} - -// Ensure that client has a connection associated -static int client_prep_connection(struct client *cl) -{ - struct connection *co; - struct session *se = cl->session; - struct host *host = cl->database->host; - - co = cl->connection; - - yaz_log(YLOG_DEBUG, "Client prep %s", cl->database->url); - - if (!co) - { - // See if someone else has an idle connection - // We should look at timestamps here to select the longest-idle connection - for (co = host->connections; co; co = co->next) - if (co->state == Conn_Open && (!co->client || co->client->session != se)) - break; - if (co) - { - connection_release(co); - cl->connection = co; - co->client = cl; - } - else - co = connection_create(cl); - } - if (co) - { - if (co->state == Conn_Connecting) - cl->state = Client_Connecting; - else if (co->state == Conn_Open) - { - if (cl->state == Client_Error || cl->state == Client_Disconnected) - cl->state = Client_Idle; - } - iochan_setflag(co->iochan, EVENT_OUTPUT); - return 1; - } - else - return 0; -} - -void load_simpletargets(const char *fn) -{ - FILE *f = fopen(fn, "r"); - char line[256]; - - if (!f) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn); - exit(1); - } - - while (fgets(line, 255, f)) - { - char *url, *db; - struct host *host; - struct database *database; - - if (strncmp(line, "target ", 7)) - continue; - url = line + 7; - url[strlen(url) - 1] = '\0'; - yaz_log(YLOG_DEBUG, "Target: %s", url); - if ((db = strchr(url, '/'))) - *(db++) = '\0'; - else - db = "Default"; - - for (host = hosts; host; host = host->next) - if (!strcmp(url, host->hostport)) - break; - if (!host) - { - struct addrinfo *addrinfo, hints; - char *port; - char ipport[128]; - unsigned char addrbuf[4]; - int res; - - host = xmalloc(sizeof(struct host)); - host->hostport = xstrdup(url); - host->connections = 0; - - if ((port = strchr(url, ':'))) - *(port++) = '\0'; - else - port = "210"; - - hints.ai_flags = 0; - hints.ai_family = PF_INET; - hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = IPPROTO_TCP; - hints.ai_addrlen = 0; - hints.ai_addr = 0; - hints.ai_canonname = 0; - hints.ai_next = 0; - // This is not robust code. It assumes that getaddrinfo returns AF_INET - // address. - if ((res = getaddrinfo(url, port, &hints, &addrinfo))) - { - yaz_log(YLOG_WARN, "Failed to resolve %s: %s", url, gai_strerror(res)); - continue; - } - assert(addrinfo->ai_family == PF_INET); - memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); - sprintf(ipport, "%hhd.%hhd.%hhd.%hhd:%s", - addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); - host->ipport = xstrdup(ipport); - freeaddrinfo(addrinfo); - host->next = hosts; - hosts = host; - } - database = xmalloc(sizeof(struct database)); - database->host = host; - database->url = xmalloc(strlen(url) + strlen(db) + 2); - strcpy(database->url, url); - strcat(database->url, "/"); - strcat(database->url, db); - strcpy(database->databases[0], db); - *database->databases[1] = '\0'; - database->errors = 0; - database->next = databases; - databases = database; - - } - fclose(f); -} - -static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) -{ - switch (n->kind) - { - case CCL_RPN_AND: - case CCL_RPN_OR: - case CCL_RPN_NOT: - case CCL_RPN_PROX: - pull_terms(nmem, n->u.p[0], termlist, num); - pull_terms(nmem, n->u.p[1], termlist, num); - break; - case CCL_RPN_TERM: - termlist[(*num)++] = nmem_strdup(nmem, n->u.t.term); - break; - default: // NOOP - break; - } -} - -// Extract terms from query into null-terminated termlist -static int extract_terms(NMEM nmem, char *query, char **termlist) -{ - int error, pos; - struct ccl_rpn_node *n; - int num = 0; - - n = ccl_find_str(global_parameters.ccl_filter, query, &error, &pos); - if (!n) - return -1; - pull_terms(nmem, n, termlist, &num); - termlist[num] = 0; - ccl_rpn_delete(n); - return 0; -} - -static struct client *client_create(void) -{ - struct client *r; - if (client_freelist) - { - r = client_freelist; - client_freelist = client_freelist->next; - } - else - r = xmalloc(sizeof(struct client)); - r->database = 0; - r->connection = 0; - r->session = 0; - r->hits = 0; - r->records = 0; - r->setno = 0; - r->requestid = -1; - r->diagnostic = 0; - r->state = Client_Disconnected; - r->next = 0; - return r; -} - -void client_destroy(struct client *c) -{ - struct session *se = c->session; - if (c == se->clients) - se->clients = c->next; - else - { - struct client *cc; - for (cc = se->clients; cc && cc->next != c; cc = cc->next) - ; - if (cc) - cc->next = c->next; - } - if (c->connection) - connection_release(c->connection); - c->next = client_freelist; - client_freelist = c; -} - -void session_set_watch(struct session *s, int what, session_watchfun fun, void *data) -{ - s->watchlist[what].fun = fun; - s->watchlist[what].data = data; -} - -void session_alert_watch(struct session *s, int what) -{ - if (!s->watchlist[what].fun) - return; - (*s->watchlist[what].fun)(s->watchlist[what].data); - s->watchlist[what].fun = 0; - s->watchlist[what].data = 0; -} - -// This should be extended with parameters to control selection criteria -// Associates a set of clients with a session; -int select_targets(struct session *se) -{ - struct database *db; - int c = 0; - - while (se->clients) - client_destroy(se->clients); - for (db = databases; db; db = db->next) - { - struct client *cl = client_create(); - cl->database = db; - cl->session = se; - cl->next = se->clients; - se->clients = cl; - c++; - } - return c; -} - -char *search(struct session *se, char *query) -{ - int live_channels = 0; - struct client *cl; - - yaz_log(YLOG_DEBUG, "Search"); - - strcpy(se->query, query); - se->requestid++; - nmem_reset(se->nmem); - for (cl = se->clients; cl; cl = cl->next) - { - cl->hits = -1; - cl->records = 0; - cl->diagnostic = 0; - - if (client_prep_connection(cl)) - live_channels++; - } - if (live_channels) - { - char *p[512]; - int maxrecs = live_channels * global_parameters.toget; - se->termlist = termlist_create(se->nmem, maxrecs, 15); - se->reclist = reclist_create(se->nmem, maxrecs); - extract_terms(se->nmem, query, p); - se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs); - se->total_records = se->total_hits = 0; - } - else - return "NOTARGETS"; - - return 0; -} - -void destroy_session(struct session *s) -{ - yaz_log(YLOG_LOG, "Destroying session"); - while (s->clients) - client_destroy(s->clients); - nmem_destroy(s->nmem); - wrbuf_free(s->wrbuf, 1); -} - -struct session *new_session() -{ - int i; - struct session *session = xmalloc(sizeof(*session)); - - yaz_log(YLOG_DEBUG, "New pazpar2 session"); - - session->total_hits = 0; - session->total_records = 0; - session->termlist = 0; - session->reclist = 0; - session->requestid = -1; - session->clients = 0; - session->query[0] = '\0'; - session->nmem = nmem_create(); - session->wrbuf = wrbuf_alloc(); - for (i = 0; i <= SESSION_WATCH_MAX; i++) - { - session->watchlist[i].data = 0; - session->watchlist[i].fun = 0; - } - - select_targets(session); - - return session; -} - -struct hitsbytarget *hitsbytarget(struct session *se, int *count) -{ - static struct hitsbytarget res[1000]; // FIXME MM - struct client *cl; - - *count = 0; - for (cl = se->clients; cl; cl = cl->next) - { - strcpy(res[*count].id, cl->database->host->hostport); - res[*count].hits = cl->hits; - res[*count].records = cl->records; - res[*count].diagnostic = cl->diagnostic; - res[*count].state = client_states[cl->state]; - res[*count].connected = cl->connection ? 1 : 0; - (*count)++; - } - - return res; -} - -struct termlist_score **termlist(struct session *s, int *num) -{ - return termlist_highscore(s->termlist, num); -} - -struct record **show(struct session *s, int start, int *num, int *total, int *sumhits) -{ - struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *)); - int i; - - relevance_prepare_read(s->relevance, s->reclist); - - *total = s->reclist->num_records; - *sumhits = s->total_hits; - - for (i = 0; i < start; i++) - if (!reclist_read_record(s->reclist)) - { - *num = 0; - return 0; - } - - for (i = 0; i < *num; i++) - { - struct record *r = reclist_read_record(s->reclist); - if (!r) - { - *num = i; - break; - } - recs[i] = r; - } - return recs; -} - -void statistics(struct session *se, struct statistics *stat) -{ - struct client *cl; - int count = 0; - - bzero(stat, sizeof(*stat)); - for (cl = se->clients; cl; cl = cl->next) - { - if (!cl->connection) - stat->num_no_connection++; - switch (cl->state) - { - case Client_Connecting: stat->num_connecting++; break; - case Client_Initializing: stat->num_initializing++; break; - case Client_Searching: stat->num_searching++; break; - case Client_Presenting: stat->num_presenting++; break; - case Client_Idle: stat->num_idle++; break; - case Client_Failed: stat->num_failed++; break; - case Client_Error: stat->num_error++; break; - default: break; - } - count++; - } - stat->num_hits = se->total_hits; - stat->num_records = se->total_records; - - stat->num_clients = count; -} - -static CCL_bibset load_cclfile(const char *fn) -{ - CCL_bibset res = ccl_qual_mk(); - if (ccl_qual_fname(res, fn) < 0) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "%s", fn); - exit(1); - } - return res; -} - -int main(int argc, char **argv) -{ - int ret; - char *arg; - int setport = 0; - - if (signal(SIGPIPE, SIG_IGN) < 0) - yaz_log(YLOG_WARN|YLOG_ERRNO, "signal"); - - yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0); - - while ((ret = options("c:h:p:C:s:", argv, argc, &arg)) != -2) - { - switch (ret) { - case 'c': - command_init(atoi(arg)); - setport++; - break; - case 'h': - http_init(arg); - setport++; - break; - case 'C': - global_parameters.ccl_filter = load_cclfile(arg); - break; - case 'p': - http_set_proxyaddr(arg); - break; - case 's': - load_simpletargets(arg); - break; - default: - fprintf(stderr, "Usage: pazpar2\n" - " -h [host:]port (REST protocol listener)\n" - " -c cmdport (telnet-style)\n" - " -C cclconfig\n" - " -s simpletargetfile\n" - " -p hostname[:portno] (HTTP proxy)\n"); - exit(1); - } - } - - if (!setport) - { - fprintf(stderr, "Set command port with -h or -c\n"); - exit(1); - } - - global_parameters.ccl_filter = load_cclfile("default.bib"); - global_parameters.yaz_marc = yaz_marc_create(); - yaz_marc_subfield_str(global_parameters.yaz_marc, "\t"); - global_parameters.odr_in = odr_createmem(ODR_DECODE); - global_parameters.odr_out = odr_createmem(ODR_ENCODE); - - event_loop(&channel_list); - - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/pazpar2.h b/pazpar2.h deleted file mode 100644 index 8de4b91..0000000 --- a/pazpar2.h +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PAZPAR2_H -#define PAZPAR2_H - -struct record; - -#include - -#include -#include -#include -#include -#include "termlists.h" -#include "relevance.h" -#include "eventl.h" - -#define MAX_DATABASES 512 - -struct record { - struct client *client; - int target_offset; - char *buf; - char *merge_key; - char *title; - int relevance; - int *term_frequency_vec; - struct record *next_cluster; -}; - -struct connection; - -// Represents a host (irrespective of databases) -struct host { - char *hostport; - char *ipport; - struct connection *connections; // All connections to this - struct host *next; -}; - -// Represents a (virtual) database on a host -struct database { - struct host *host; - char *url; - char databases[MAX_DATABASES][128]; - int errors; - struct database *next; -}; - -struct client; - -// Represents a physical, reusable connection to a remote Z39.50 host -struct connection { - IOCHAN iochan; - COMSTACK link; - struct host *host; - struct client *client; - char *ibuf; - int ibufsize; - enum { - Conn_Connecting, - Conn_Open, - Conn_Waiting, - } state; - struct connection *next; -}; - -// Represents client state for a connection to one search target -struct client { - struct database *database; - struct connection *connection; - struct session *session; - int hits; - int records; - int setno; - int requestid; // ID of current outstanding request - int diagnostic; - enum client_state - { - Client_Connecting, - Client_Connected, - Client_Idle, - Client_Initializing, - Client_Searching, - Client_Presenting, - Client_Error, - Client_Failed, - Client_Disconnected, - Client_Stopped - } state; - struct client *next; -}; - -#define SESSION_WATCH_RECORDS 0 -#define SESSION_WATCH_MAX 0 - -typedef void (*session_watchfun)(void *data); - -// End-user session -struct session { - struct client *clients; - int requestid; - char query[1024]; - NMEM nmem; // Nmem for each operation (i.e. search) - WRBUF wrbuf; // Wrbuf for scratch(i.e. search) - struct termlist *termlist; - struct relevance *relevance; - struct reclist *reclist; - struct { - void *data; - session_watchfun fun; - } watchlist[SESSION_WATCH_MAX + 1]; - int total_hits; - int total_records; -}; - -struct statistics { - int num_clients; - int num_no_connection; - int num_connecting; - int num_initializing; - int num_searching; - int num_presenting; - int num_idle; - int num_failed; - int num_error; - int num_hits; - int num_records; -}; - -struct hitsbytarget { - char id[256]; - int hits; - int diagnostic; - int records; - char* state; - int connected; -}; - -struct parameters { - int timeout; /* operations timeout, in seconds */ - char implementationId[128]; - char implementationName[128]; - char implementationVersion[128]; - int target_timeout; // seconds - int session_timeout; - int toget; - int chunk; - CCL_bibset ccl_filter; - yaz_marc_t yaz_marc; - ODR odr_out; - ODR odr_in; -}; - -struct hitsbytarget *hitsbytarget(struct session *s, int *count); -int select_targets(struct session *se); -struct session *new_session(); -void destroy_session(struct session *s); -int load_targets(struct session *s, const char *fn); -void statistics(struct session *s, struct statistics *stat); -char *search(struct session *s, char *query); -struct record **show(struct session *s, int start, int *num, int *total, int *sumhits); -struct termlist_score **termlist(struct session *s, int *num); -void session_set_watch(struct session *s, int what, session_watchfun fun, void *data); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/reclists.c b/reclists.c deleted file mode 100644 index 579e34b..0000000 --- a/reclists.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * $Id: reclists.c,v 1.4 2006-12-08 21:40:58 quinn Exp $ - */ - -#include - -#include - -#include "pazpar2.h" -#include "reclists.h" - -struct reclist_bucket -{ - struct record *record; - struct reclist_bucket *next; -}; - -struct record *reclist_read_record(struct reclist *l) -{ - if (l->pointer < l->num_records) - return l->flatlist[l->pointer++]; - else - return 0; -} - -void reclist_rewind(struct reclist *l) -{ - l->pointer = 0; -} - -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -struct reclist *reclist_create(NMEM nmem, int numrecs) -{ - int hashsize = 1; - struct reclist *res; - - assert(numrecs); - while (hashsize < numrecs) - hashsize <<= 1; - res = nmem_malloc(nmem, sizeof(struct reclist)); - res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*)); - bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*)); - res->hashtable_size = hashsize; - res->nmem = nmem; - res->hashmask = hashsize - 1; // Creates a bitmask - - res->num_records = 0; - res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); - res->flatlist_size = numrecs; - - return res; -} - -struct record *reclist_insert(struct reclist *l, struct record *record) -{ - unsigned int bucket; - struct reclist_bucket **p; - struct record *head; - - bucket = hash((unsigned char*) record->merge_key) & l->hashmask; - for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) - { - // We found a matching record. Merge them - if (!strcmp(record->merge_key, (*p)->record->merge_key)) - { - struct record *existing = (*p)->record; - record->next_cluster = existing->next_cluster; - existing->next_cluster = record; - head = existing; - break; - } - } - if (!*p) // We made it to the end of the bucket without finding match - { - struct reclist_bucket *new = nmem_malloc(l->nmem, - sizeof(struct reclist_bucket)); - new->record = record; - record->next_cluster = 0; - new->next = 0; - *p = new; - l->flatlist[l->num_records++] = record; - head = record; - } - return head; -} - - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/reclists.h b/reclists.h deleted file mode 100644 index f9d38c3..0000000 --- a/reclists.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RECLISTS_H -#define RECLISTS_H - -struct reclist -{ - struct reclist_bucket **hashtable; - int hashtable_size; - int hashmask; - - struct record **flatlist; - int flatlist_size; - int num_records; - int pointer; - - NMEM nmem; -}; - -struct reclist *reclist_create(NMEM, int numrecs); -struct record * reclist_insert(struct reclist *tl, struct record *record); -struct record *reclist_read_record(struct reclist *l); -void reclist_rewind(struct reclist *l); - -#endif diff --git a/relevance.c b/relevance.c deleted file mode 100644 index c627a98..0000000 --- a/relevance.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * $Id: relevance.c,v 1.3 2006-11-27 14:35:15 quinn Exp $ - */ - -#include -#include -#include - -#include "relevance.h" -#include "pazpar2.h" - -struct relevance -{ - int *doc_frequency_vec; - int vec_len; - struct word_trie *wt; - NMEM nmem; -}; - -// We use this data structure to recognize terms in input records, -// and map them to record term vectors for counting. -struct word_trie -{ - struct - { - struct word_trie *child; - int termno; - } list[26]; -}; - -static struct word_trie *create_word_trie_node(NMEM nmem) -{ - struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie)); - int i; - for (i = 0; i < 26; i++) - { - res->list[i].child = 0; - res->list[i].termno = -1; - } - return res; -} - -static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num) -{ - while (*term) { - int c = tolower(*term); - if (c < 'a' || c > 'z') - term++; - else - { - c -= 'a'; - if (!*(++term)) - n->list[c].termno = num; - else - { - if (!n->list[c].child) - { - struct word_trie *new = create_word_trie_node(nmem); - n->list[c].child = new; - } - word_trie_addterm(nmem, n->list[c].child, term, num); - } - break; - } - } -} - -#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' : -1) - -static int word_trie_match(struct word_trie *t, const char *word, int len, int *skipped) -{ - int c = raw_char(tolower(*word)); - - if (!len) - return 0; - - word++; len--; - (*skipped)++; - if (!len || raw_char(*word) < 0) - { - if (t->list[c].termno > 0) - return t->list[c].termno; - else - return 0; - } - else - { - if (t->list[c].child) - { - return word_trie_match(t->list[c].child, word, len, skipped); - } - else - return 0; - } - -} - - -static struct word_trie *build_word_trie(NMEM nmem, const char **terms) -{ - struct word_trie *res = create_word_trie_node(nmem); - const char **p; - int i; - - for (i = 1, p = terms; *p; p++, i++) - word_trie_addterm(nmem, res, *p, i); - return res; -} - -struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) -{ - struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); - const char **p; - int i; - - for (p = terms, i = 0; *p; p++, i++) - ; - res->vec_len = ++i; - res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); - res->nmem = nmem; - res->wt = build_word_trie(nmem, terms); - return res; -} - -void relevance_newrec(struct relevance *r, struct record *rec) -{ - if (!rec->term_frequency_vec) - { - rec->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); - bzero(rec->term_frequency_vec, r->vec_len * sizeof(int)); - } -} - - -// FIXME. The definition of a word is crude here.. should support -// some form of localization mechanism? -void relevance_countwords(struct relevance *r, struct record *head, - const char *words, int len, int multiplier) -{ - while (len) - { - char c; - int res; - int skipped; - while (len && (c = raw_char(tolower(*words))) < 0) - { - words++; - len--; - } - if (!len) - return; - skipped = 0; - if ((res = word_trie_match(r->wt, words, len, &skipped))) - { - words += skipped; - len -= skipped; - head->term_frequency_vec[res] += multiplier; - } - else - { - while (len && (c = raw_char(tolower(*words))) >= 0) - { - words++; - len--; - } - } - head->term_frequency_vec[0]++; - } -} - -void relevance_donerecord(struct relevance *r, struct record *head) -{ - int i; - - for (i = 1; i < r->vec_len; i++) - if (head->term_frequency_vec[i] > 0) - r->doc_frequency_vec[i]++; - - r->doc_frequency_vec[0]++; -} - -#ifdef FLOAT_REL -static int comp(const void *p1, const void *p2) -{ - float res; - struct record **r1 = (struct record **) p1; - struct record **r2 = (struct record **) p2; - res = (*r2)->relevance - (*r1)->relevance; - if (res > 0) - return 1; - else if (res < 0) - return -1; - else - return 0; -} -#else -static int comp(const void *p1, const void *p2) -{ - struct record **r1 = (struct record **) p1; - struct record **r2 = (struct record **) p2; - return (*r2)->relevance - (*r1)->relevance; -} -#endif - -// Prepare for a relevance-sorted read of up to num entries -void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) -{ - int i; - float *idfvec = xmalloc(rel->vec_len * sizeof(float)); - - // Calculate document frequency vector for each term. - for (i = 1; i < rel->vec_len; i++) - { - if (!rel->doc_frequency_vec[i]) - idfvec[i] = 0; - else - idfvec[i] = log((float) rel->doc_frequency_vec[0] / rel->doc_frequency_vec[i]); - } - // Calculate relevance for each document - for (i = 0; i < reclist->num_records; i++) - { - int t; - struct record *rec = reclist->flatlist[i]; - float relevance; - relevance = 0; - for (t = 1; t < rel->vec_len; t++) - { - float termfreq; - if (!rec->term_frequency_vec[0]) - break; - termfreq = (float) rec->term_frequency_vec[t] / rec->term_frequency_vec[0]; - relevance += termfreq * idfvec[t]; - } - rec->relevance = (int) (relevance * 100000); - } - qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp); - reclist->pointer = 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/relevance.h b/relevance.h deleted file mode 100644 index 38c3d9c..0000000 --- a/relevance.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef RELEVANCE_H -#define RELEVANCE_H - -#include - -#include "pazpar2.h" -#include "reclists.h" - -struct relevance; - -struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); -void relevance_newrec(struct relevance *r, struct record *rec); -void relevance_countwords(struct relevance *r, struct record *rec, - const char *words, int len, int multiplier); -void relevance_donerecord(struct relevance *r, struct record *rec); - -void relevance_prepare_read(struct relevance *rel, struct reclist *rec); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..52e23c1 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,45 @@ +# ParaZ. Copyright (C) 2000-2004, Index Data ApS +# All rights reserved. +# $Id: Makefile,v 1.1 2006-12-20 20:47:16 quinn Exp $ + +SHELL=/bin/sh + +CC=gcc + +YAZCONF=yaz-config +YAZLIBS=`$(YAZCONF) --libs` +YAZCFLAGS=`$(YAZCONF) --cflags` + +PROG=pazpar2 +PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \ + reclists.o relevance.o + +all: $(PROG) + +$(PROG): $(PROGO) + $(CC) $(CFLAGS) $(YAZCFLAGS) -o $(PROG) $(PROGO) $(YAZLIBS) + +.c.o: + $(CC) -c $(CFLAGS) -I. $(YAZCFLAGS) $< + +clean: + rm -f *.[oa] test core mon.out gmon.out errlist $(PROG) + + +## Dependencies go below + +command.o: command.c command.h util.h eventl.h pazpar2.h termlists.h \ + relevance.h reclists.h +eventl.o: eventl.c eventl.h +http.o: http.c command.h util.h eventl.h pazpar2.h termlists.h \ + relevance.h reclists.h http.h http_command.h +http_command.o: http_command.c command.h util.h eventl.h pazpar2.h \ + termlists.h relevance.h reclists.h http.h http_command.h +pazpar2.o: pazpar2.c pazpar2.h termlists.h relevance.h reclists.h \ + eventl.h command.h http.h +reclists.o: reclists.c pazpar2.h termlists.h relevance.h reclists.h \ + eventl.h +relevance.o: relevance.c relevance.h pazpar2.h termlists.h eventl.h \ + reclists.h +termlists.o: termlists.c termlists.h +util.o: util.c diff --git a/src/command.c b/src/command.c new file mode 100644 index 0000000..5411b39 --- /dev/null +++ b/src/command.c @@ -0,0 +1,392 @@ +/* $Id: command.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" + +extern IOCHAN channel_list; + +struct command_session { + IOCHAN channel; + char *outbuf; + + int outbuflen; + int outbufwrit; + + struct session *psession; +}; + +void command_destroy(struct command_session *s); +void command_prompt(struct command_session *s); +void command_puts(struct command_session *s, const char *buf); + +static int cmd_quit(struct command_session *s, char **argv, int argc) +{ + IOCHAN i = s->channel; + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return 0; +} + +#ifdef GAGA +static int cmd_load(struct command_session *s, char **argv, int argc) +{ + if (argc != 2) { + command_puts(s, "Usage: load filename\n"); + } + if (load_targets(s->psession, argv[1]) < 0) + command_puts(s, "Failed to open file\n"); + return 1; +} +#endif + +static int cmd_search(struct command_session *s, char **argv, int argc) +{ + if (argc != 2) + { + command_puts(s, "Usage: search word\n"); + return 1; + } + search(s->psession, argv[1]); + return 1; +} + +static int cmd_hitsbytarget(struct command_session *s, char **argv, int argc) +{ + int count; + int i; + + struct hitsbytarget *ht = hitsbytarget(s->psession, &count); + for (i = 0; i < count; i++) + { + char buf[1024]; + + sprintf(buf, "%s: %d (%d records, diag=%d, state=%s conn=%d)\n", ht[i].id, ht[i].hits, + ht[i].records, ht[i].diagnostic, ht[i].state, ht[i].connected); + command_puts(s, buf); + } + return 1; +} + +static int cmd_show(struct command_session *s, char **argv, int argc) +{ + struct record **recs; + int num = 10; + int merged, total; + int i; + + if (argc == 2) + num = atoi(argv[1]); + + recs = show(s->psession, 0, &num, &merged, &total); + + for (i = 0; i < num; i++) + { + int rc; + struct record *cnode; + struct record *r = recs[i]; + + command_puts(s, r->merge_key); + for (rc = 1, cnode = r->next_cluster; cnode; cnode = cnode->next_cluster, rc++) + ; + if (rc > 1) + { + char buf[256]; + sprintf(buf, " (%d records)", rc); + command_puts(s, buf); + } + command_puts(s, "\n"); + } + return 1; +} + +static int cmd_stat(struct command_session *s, char **argv, int argc) +{ + char buf[1024]; + struct statistics stat; + + statistics(s->psession, &stat); + sprintf(buf, "Number of connections: %d\n", stat.num_clients); + command_puts(s, buf); + if (stat.num_no_connection) + { + sprintf(buf, "#No_connection: %d\n", stat.num_no_connection); + command_puts(s, buf); + } + if (stat.num_connecting) + { + sprintf(buf, "#Connecting: %d\n", stat.num_connecting); + command_puts(s, buf); + } + if (stat.num_initializing) + { + sprintf(buf, "#Initializing: %d\n", stat.num_initializing); + command_puts(s, buf); + } + if (stat.num_searching) + { + sprintf(buf, "#Searching: %d\n", stat.num_searching); + command_puts(s, buf); + } + if (stat.num_presenting) + { + sprintf(buf, "#Ppresenting: %d\n", stat.num_presenting); + command_puts(s, buf); + } + if (stat.num_idle) + { + sprintf(buf, "#Idle: %d\n", stat.num_idle); + command_puts(s, buf); + } + if (stat.num_failed) + { + sprintf(buf, "#Failed: %d\n", stat.num_failed); + command_puts(s, buf); + } + if (stat.num_error) + { + sprintf(buf, "#Error: %d\n", stat.num_error); + command_puts(s, buf); + } + return 1; +} + +static struct { + char *cmd; + int (*fun)(struct command_session *s, char *argv[], int argc); +} cmd_array[] = { + {"quit", cmd_quit}, +#ifdef GAGA + {"load", cmd_load}, +#endif + {"find", cmd_search}, + {"ht", cmd_hitsbytarget}, + {"stat", cmd_stat}, + {"show", cmd_show}, + {0,0} +}; + +void command_command(struct command_session *s, char *command) +{ + char *p; + char *argv[20]; + int argc = 0; + int i; + int res = -1; + + p = command; + while (*p) + { + while (isspace(*p)) + p++; + if (!*p) + break; + argv[argc++] = p; + while (*p && !isspace(*p)) + p++; + if (!*p) + break; + *(p++) = '\0'; + } + if (argc) { + for (i = 0; cmd_array[i].cmd; i++) + { + if (!strcmp(cmd_array[i].cmd, argv[0])) { + res = (cmd_array[i].fun)(s, argv, argc); + + break; + } + } + if (res < 0) { + command_puts(s, "Unknown command.\n"); + command_prompt(s); + } + else if (res == 1) { + command_prompt(s); + } + } + else + command_prompt(s); + +} + + +static void command_io(IOCHAN i, int event) +{ + int res; + char buf[1024]; + struct command_session *s; + + s = iochan_getdata(i); + + + switch (event) + { + case EVENT_INPUT: + res = read(iochan_getfd(i), buf, 1024); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "read command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return; + } + if (!index(buf, '\n')) { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Did not receive complete command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return; + } + buf[res] = '\0'; + command_command(s, buf); + break; + case EVENT_OUTPUT: + if (!s->outbuflen || s->outbufwrit < 0) + { + yaz_log(YLOG_WARN, "Called with outevent but no data"); + iochan_clearflag(i, EVENT_OUTPUT); + } + else + { + res = write(iochan_getfd(i), s->outbuf + s->outbufwrit, s->outbuflen - + s->outbufwrit); + if (res < 0) { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + } + else + { + s->outbufwrit += res; + if (s->outbufwrit >= s->outbuflen) + { + s->outbuflen = s->outbufwrit = 0; + iochan_clearflag(i, EVENT_OUTPUT); + } + } + } + break; + default: + yaz_log(YLOG_WARN, "Bad voodoo on socket"); + } +} + +void command_puts(struct command_session *s, const char *buf) +{ + int len = strlen(buf); + memcpy(s->outbuf + s->outbuflen, buf, len); + s->outbuflen += len; + iochan_setflag(s->channel, EVENT_OUTPUT); +} + +void command_prompt(struct command_session *s) +{ + command_puts(s, "Pazpar2> "); +} + + +/* Accept a new command connection */ +static void command_accept(IOCHAN i, int event) +{ + struct sockaddr_in addr; + int fd = iochan_getfd(i); + socklen_t len; + int s; + IOCHAN c; + struct command_session *ses; + int flags; + + len = sizeof addr; + if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); + return; + } + if ((flags = fcntl(s, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + + yaz_log(YLOG_LOG, "New command connection"); + c = iochan_create(s, command_io, EVENT_INPUT | EVENT_EXCEPT); + + ses = xmalloc(sizeof(*ses)); + ses->outbuf = xmalloc(50000); + ses->outbuflen = 0; + ses->outbufwrit = 0; + ses->channel = c; + ses->psession = new_session(); + iochan_setdata(c, ses); + + command_puts(ses, "Welcome to pazpar2\n\n"); + command_prompt(ses); + + c->next = channel_list; + channel_list = c; +} + +void command_destroy(struct command_session *s) { + xfree(s->outbuf); + xfree(s); +} + +/* Create a command-channel listener */ +void command_init(int port) +{ + IOCHAN c; + int l; + struct protoent *p; + struct sockaddr_in myaddr; + int one = 1; + + yaz_log(YLOG_LOG, "Command port is %d", port); + if (!(p = getprotobyname("tcp"))) { + abort(); + } + if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); + if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + + bzero(&myaddr, sizeof myaddr); + myaddr.sin_family = AF_INET; + myaddr.sin_addr.s_addr = INADDR_ANY; + myaddr.sin_port = htons(port); + if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); + if (listen(l, SOMAXCONN) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); + + c = iochan_create(l, command_accept, EVENT_INPUT | EVENT_EXCEPT); + //iochan_setdata(c, &l); + c->next = channel_list; + channel_list = c; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/command.h b/src/command.h new file mode 100644 index 0000000..25e5b99 --- /dev/null +++ b/src/command.h @@ -0,0 +1,6 @@ +#ifndef COMMAND_H +#define COMMAND_H + +void command_init(int port); + +#endif diff --git a/src/eventl.c b/src/eventl.c new file mode 100644 index 0000000..324dca1 --- /dev/null +++ b/src/eventl.c @@ -0,0 +1,154 @@ +/* + * ParaZ - a simple tool for harvesting performance data for parallel + * operations using Z39.50. + * Copyright (c) 2000-2004 Index Data ApS + * See LICENSE file for details. + */ + +/* + * $Id: eventl.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + * Based on revision YAZ' server/eventl.c 1.29. + */ + +#include +#include +#ifdef WIN32 +#include +#else +#include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include "eventl.h" +#include + +IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags) +{ + IOCHAN new_iochan; + + if (!(new_iochan = (IOCHAN)xmalloc(sizeof(*new_iochan)))) + return 0; + new_iochan->destroyed = 0; + new_iochan->fd = fd; + new_iochan->flags = flags; + new_iochan->fun = cb; + new_iochan->force_event = 0; + new_iochan->last_event = new_iochan->max_idle = 0; + new_iochan->next = NULL; + return new_iochan; +} + +int event_loop(IOCHAN *iochans) +{ + do /* loop as long as there are active associations to process */ + { + IOCHAN p, nextp; + fd_set in, out, except; + int res, max; + static struct timeval nullto = {0, 0}, to; + struct timeval *timeout; + + FD_ZERO(&in); + FD_ZERO(&out); + FD_ZERO(&except); + timeout = &to; /* hang on select */ + to.tv_sec = 30; + to.tv_usec = 0; + max = 0; + for (p = *iochans; p; p = p->next) + { + if (p->fd < 0) + continue; + if (p->force_event) + timeout = &nullto; /* polling select */ + if (p->flags & EVENT_INPUT) + FD_SET(p->fd, &in); + if (p->flags & EVENT_OUTPUT) + FD_SET(p->fd, &out); + if (p->flags & EVENT_EXCEPT) + FD_SET(p->fd, &except); + if (p->fd > max) + max = p->fd; + } + if ((res = select(max + 1, &in, &out, &except, timeout)) < 0) + { + if (errno == EINTR) + continue; + else + abort(); + } + for (p = *iochans; p; p = p->next) + { + int force_event = p->force_event; + time_t now = time(0); + + p->force_event = 0; + if (!p->destroyed && ((p->max_idle && now - p->last_event > + p->max_idle) || force_event == EVENT_TIMEOUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_TIMEOUT); + } + if (p->fd < 0) + continue; + if (!p->destroyed && (FD_ISSET(p->fd, &in) || + force_event == EVENT_INPUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_INPUT); + } + if (!p->destroyed && (FD_ISSET(p->fd, &out) || + force_event == EVENT_OUTPUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_OUTPUT); + } + if (!p->destroyed && (FD_ISSET(p->fd, &except) || + force_event == EVENT_EXCEPT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_EXCEPT); + } + } + for (p = *iochans; p; p = nextp) + { + nextp = p->next; + + if (p->destroyed) + { + IOCHAN tmp = p, pr; + + /* Now reset the pointers */ + if (p == *iochans) + *iochans = p->next; + else + { + for (pr = *iochans; pr; pr = pr->next) + if (pr->next == p) + break; + assert(pr); /* grave error if it weren't there */ + pr->next = p->next; + } + if (nextp == p) + nextp = p->next; + xfree(tmp); + } + } + } + while (*iochans); + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/eventl.h b/src/eventl.h new file mode 100644 index 0000000..199d944 --- /dev/null +++ b/src/eventl.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 1995-1999, Index Data + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: eventl.h,v $ + * Revision 1.1 2006-12-20 20:47:16 quinn + * Reorganized source tree + * + * Revision 1.3 2006/12/12 02:36:24 quinn + * Implemented session timeout; ping command + * + * Revision 1.2 2006/11/18 05:00:38 quinn + * Added record retrieval, etc. + * + * Revision 1.1.1.1 2006/11/14 20:44:38 quinn + * PazPar2 + * + * Revision 1.1.1.1 2000/02/23 14:40:18 heikki + * Original import to cvs + * + * Revision 1.11 1999/04/20 09:56:48 adam + * Added 'name' paramter to encoder/decoder routines (typedef Odr_fun). + * Modified all encoders/decoders to reflect this change. + * + * Revision 1.10 1998/01/29 13:30:23 adam + * Better event handle system for NT/Unix. + * + * Revision 1.9 1997/09/01 09:31:48 adam + * Removed definition statserv_remove from statserv.h to eventl.h. + * + * Revision 1.8 1995/06/19 12:39:09 quinn + * Fixed bug in timeout code. Added BER dumper. + * + * Revision 1.7 1995/06/16 10:31:34 quinn + * Added session timeout. + * + * Revision 1.6 1995/05/16 08:51:02 quinn + * License, documentation, and memory fixes + * + * Revision 1.5 1995/05/15 11:56:37 quinn + * Asynchronous facilities. Restructuring of seshigh code. + * + * Revision 1.4 1995/03/27 08:34:23 quinn + * Added dynamic server functionality. + * Released bindings to session.c (is now redundant) + * + * Revision 1.3 1995/03/15 08:37:42 quinn + * Now we're pretty much set for nonblocking I/O. + * + * Revision 1.2 1995/03/14 10:28:00 quinn + * More work on demo server. + * + * Revision 1.1 1995/03/10 18:22:45 quinn + * The rudiments of an asynchronous server. + * + */ + +#ifndef EVENTL_H +#define EVENTL_H + +#include + +struct iochan; + +typedef void (*IOC_CALLBACK)(struct iochan *i, int event); + +typedef struct iochan +{ + int fd; + int flags; +#define EVENT_INPUT 0x01 +#define EVENT_OUTPUT 0x02 +#define EVENT_EXCEPT 0x04 +#define EVENT_TIMEOUT 0x08 +#define EVENT_WORK 0x10 + int force_event; + IOC_CALLBACK fun; + void *data; + int destroyed; + time_t last_event; + time_t max_idle; + + struct iochan *next; +} *IOCHAN; + +#define iochan_destroy(i) (void)((i)->destroyed = 1) +#define iochan_getfd(i) ((i)->fd) +#define iochan_setfd(i, f) ((i)->fd = (f)) +#define iochan_getdata(i) ((i)->data) +#define iochan_setdata(i, d) ((i)->data = d) +#define iochan_getflags(i) ((i)->flags) +#define iochan_setflags(i, d) ((i)->flags = d) +#define iochan_setflag(i, d) ((i)->flags |= d) +#define iochan_clearflag(i, d) ((i)->flags &= ~(d)) +#define iochan_getflag(i, d) ((i)->flags & d ? 1 : 0) +#define iochan_getfun(i) ((i)->fun) +#define iochan_setfun(i, d) ((i)->fun = d) +#define iochan_setevent(i, e) ((i)->force_event = (e)) +#define iochan_getnext(i) ((i)->next) +#define iochan_settimeout(i, t) ((i)->max_idle = (t), (i)->last_event = time(0)) +#define iochan_activity(i) ((i)->last_event = time(0)) + +IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags); +int event_loop(IOCHAN *iochans); + +#endif diff --git a/src/http.c b/src/http.c new file mode 100644 index 0000000..401f5ae --- /dev/null +++ b/src/http.c @@ -0,0 +1,854 @@ +/* + * $Id: http.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" +#include "http.h" +#include "http_command.h" + +static void proxy_io(IOCHAN i, int event); +static struct http_channel *http_create(void); +static void http_destroy(IOCHAN i); + +extern IOCHAN channel_list; + +static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests +static char proxy_url[256] = ""; +static struct http_buf *http_buf_freelist = 0; +static struct http_channel *http_channel_freelist = 0; + +static struct http_buf *http_buf_create() +{ + struct http_buf *r; + + if (http_buf_freelist) + { + r = http_buf_freelist; + http_buf_freelist = http_buf_freelist->next; + } + else + r = xmalloc(sizeof(struct http_buf)); + r->offset = 0; + r->len = 0; + r->next = 0; + return r; +} + +static void http_buf_destroy(struct http_buf *b) +{ + b->next = http_buf_freelist; + http_buf_freelist = b; +} + +static void http_buf_destroy_queue(struct http_buf *b) +{ + struct http_buf *p; + while (b) + { + p = b->next; + http_buf_destroy(b); + b = p; + } +} + +#ifdef GAGA +// Calculate length of chain +static int http_buf_len(struct http_buf *b) +{ + int sum = 0; + for (; b; b = b->next) + sum += b->len; + return sum; +} +#endif + +static struct http_buf *http_buf_bybuf(char *b, int len) +{ + struct http_buf *res = 0; + struct http_buf **p = &res; + + while (len) + { + *p = http_buf_create(); + int tocopy = len; + if (tocopy > HTTP_BUF_SIZE) + tocopy = HTTP_BUF_SIZE; + memcpy((*p)->buf, b, tocopy); + (*p)->len = tocopy; + len -= tocopy; + b += tocopy; + p = &(*p)->next; + } + return res; +} + +// Add a (chain of) buffers to the end of an existing queue. +static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b) +{ + while (*queue) + queue = &(*queue)->next; + *queue = b; +} + +static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) +{ + // Heavens to Betsy (buf)! + return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); +} + +// Non-destructively collapse chain of buffers into a string (max *len) +// Return +static int http_buf_peek(struct http_buf *b, char *buf, int len) +{ + int rd = 0; + while (b && rd < len) + { + int toread = len - rd; + if (toread > b->len) + toread = b->len; + memcpy(buf + rd, b->buf + b->offset, toread); + rd += toread; + b = b->next; + } + buf[rd] = '\0'; + return rd; +} + +// Ddestructively munch up to len from head of queue. +static int http_buf_read(struct http_buf **b, char *buf, int len) +{ + int rd = 0; + while ((*b) && rd < len) + { + int toread = len - rd; + if (toread > (*b)->len) + toread = (*b)->len; + memcpy(buf + rd, (*b)->buf + (*b)->offset, toread); + rd += toread; + if (toread < (*b)->len) + { + (*b)->len -= toread; + (*b)->offset += toread; + break; + } + else + { + struct http_buf *n = (*b)->next; + http_buf_destroy(*b); + *b = n; + } + } + buf[rd] = '\0'; + return rd; +} + +void static urldecode(char *i, char *o) +{ + while (*i) + { + if (*i == '+') + { + *(o++) = ' '; + i++; + } + else if (*i == '%') + { + i++; + sscanf(i, "%2hhx", o); + i += 2; + o++; + } + else + *(o++) = *(i++); + } + *o = '\0'; +} + +void http_addheader(struct http_response *r, const char *name, const char *value) +{ + struct http_channel *c = r->channel; + struct http_header *h = nmem_malloc(c->nmem, sizeof *h); + h->name = nmem_strdup(c->nmem, name); + h->value = nmem_strdup(c->nmem, value); + h->next = r->headers; + r->headers = h; +} + +char *http_argbyname(struct http_request *r, char *name) +{ + struct http_argument *p; + if (!name) + return 0; + for (p = r->arguments; p; p = p->next) + if (!strcmp(p->name, name)) + return p->value; + return 0; +} + +char *http_headerbyname(struct http_request *r, char *name) +{ + struct http_header *p; + for (p = r->headers; p; p = p->next) + if (!strcmp(p->name, name)) + return p->value; + return 0; +} + +struct http_response *http_create_response(struct http_channel *c) +{ + struct http_response *r = nmem_malloc(c->nmem, sizeof(*r)); + strcpy(r->code, "200"); + r->msg = "OK"; + r->channel = c; + r->headers = 0; + r->payload = 0; + return r; +} + +// Check if we have a complete request. Return 0 or length (including trailing newline) +// FIXME: Does not deal gracefully with requests carrying payload +// but this is kind of OK since we will reject anything other than an empty GET +static int request_check(struct http_buf *queue) +{ + char tmp[4096]; + int len = 0; + char *buf = tmp; + + http_buf_peek(queue, tmp, 4096); + while (*buf) // Check if we have a sequence of lines terminated by an empty line + { + char *b = strstr(buf, "\r\n"); + + if (!b) + return 0; + + len += (b - buf) + 2; + if (b == buf) + return len; + buf = b + 2; + } + return 0; +} + +struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, + int len) +{ + struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); + char *p, *p2; + char tmp[4096]; + char *buf = tmp; + + if (len > 4096) + return 0; + if (http_buf_read(queue, buf, len) < len) + return 0; + + r->channel = c; + r->arguments = 0; + r->headers = 0; + // Parse first line + for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) + *(p2++) = *p; + if (*p != ' ') + { + yaz_log(YLOG_WARN, "Unexpected HTTP method in request"); + return 0; + } + *p2 = '\0'; + + if (!(buf = strchr(buf, ' '))) + { + yaz_log(YLOG_WARN, "Syntax error in request (1)"); + return 0; + } + buf++; + if (!(p = strchr(buf, ' '))) + { + yaz_log(YLOG_WARN, "Syntax error in request (2)"); + return 0; + } + *(p++) = '\0'; + if ((p2 = strchr(buf, '?'))) // Do we have arguments? + *(p2++) = '\0'; + r->path = nmem_strdup(c->nmem, buf); + if (p2) + { + // Parse Arguments + while (*p2) + { + struct http_argument *a; + char *equal = strchr(p2, '='); + char *eoa = strchr(p2, '&'); + if (!equal) + { + yaz_log(YLOG_WARN, "Expected '=' in argument"); + return 0; + } + if (!eoa) + eoa = equal + strlen(equal); // last argument + else + *(eoa++) = '\0'; + a = nmem_malloc(c->nmem, sizeof(struct http_argument)); + *(equal++) = '\0'; + a->name = nmem_strdup(c->nmem, p2); + urldecode(equal, equal); + a->value = nmem_strdup(c->nmem, equal); + a->next = r->arguments; + r->arguments = a; + p2 = eoa; + } + } + buf = p; + + if (strncmp(buf, "HTTP/", 5)) + strcpy(r->http_version, "1.0"); + else + { + buf += 5; + if (!(p = strstr(buf, "\r\n"))) + return 0; + *(p++) = '\0'; + p++; + strcpy(r->http_version, buf); + buf = p; + } + strcpy(c->version, r->http_version); + + r->headers = 0; + while (*buf) + { + if (!(p = strstr(buf, "\r\n"))) + return 0; + if (p == buf) + break; + else + { + struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); + if (!(p2 = strchr(buf, ':'))) + return 0; + *(p2++) = '\0'; + h->name = nmem_strdup(c->nmem, buf); + while (isspace(*p2)) + p2++; + if (p2 >= p) // Empty header? + { + buf = p + 2; + continue; + } + *p = '\0'; + h->value = nmem_strdup(c->nmem, p2); + h->next = r->headers; + r->headers = h; + buf = p + 2; + } + } + + return r; +} + + +static struct http_buf *http_serialize_response(struct http_channel *c, + struct http_response *r) +{ + wrbuf_rewind(c->wrbuf); + struct http_header *h; + + wrbuf_printf(c->wrbuf, "HTTP/1.1 %s %s\r\n", r->code, r->msg); + for (h = r->headers; h; h = h->next) + wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); + wrbuf_printf(c->wrbuf, "Content-length: %d\r\n", r->payload ? (int) strlen(r->payload) : 0); + wrbuf_printf(c->wrbuf, "Content-type: text/xml\r\n"); + wrbuf_puts(c->wrbuf, "\r\n"); + + if (r->payload) + wrbuf_puts(c->wrbuf, r->payload); + + return http_buf_bywrbuf(c->wrbuf); +} + +// Serialize a HTTP request +static struct http_buf *http_serialize_request(struct http_request *r) +{ + struct http_channel *c = r->channel; + wrbuf_rewind(c->wrbuf); + struct http_header *h; + struct http_argument *a; + + wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); + + if (r->arguments) + { + wrbuf_putc(c->wrbuf, '?'); + for (a = r->arguments; a; a = a->next) { + if (a != r->arguments) + wrbuf_putc(c->wrbuf, '&'); + wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); + } + } + + wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); + + for (h = r->headers; h; h = h->next) + wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); + + wrbuf_puts(c->wrbuf, "\r\n"); + + return http_buf_bywrbuf(c->wrbuf); +} + + +static int http_weshouldproxy(struct http_request *rq) +{ + if (proxy_addr && !strstr(rq->path, "search.pz2")) + return 1; + return 0; +} + +static int http_proxy(struct http_request *rq) +{ + struct http_channel *c = rq->channel; + struct http_proxy *p = c->proxy; + struct http_header *hp; + struct http_buf *requestbuf; + + if (!p) // This is a new connection. Create a proxy channel + { + int sock; + struct protoent *pe; + int one = 1; + int flags; + + if (!(pe = getprotobyname("tcp"))) { + abort(); + } + if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "socket"); + return -1; + } + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + if ((flags = fcntl(sock, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0) + if (errno != EINPROGRESS) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect"); + return -1; + } + + p = xmalloc(sizeof(struct http_proxy)); + p->oqueue = 0; + p->channel = c; + c->proxy = p; + // We will add EVENT_OUTPUT below + p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT); + iochan_setdata(p->iochan, p); + p->iochan->next = channel_list; + channel_list = p->iochan; + } + + // Modify Host: header + for (hp = rq->headers; hp; hp = hp->next) + if (!strcmp(hp->name, "Host")) + break; + if (!hp) + { + yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); + return -1; + } + hp->value = nmem_strdup(c->nmem, proxy_url); + requestbuf = http_serialize_request(rq); + http_buf_enqueue(&p->oqueue, requestbuf); + iochan_setflag(p->iochan, EVENT_OUTPUT); + return 0; +} + +void http_send_response(struct http_channel *ch) +{ + struct http_response *rs = ch->response; + assert(rs); + struct http_buf *hb = http_serialize_response(ch, rs); + if (!hb) + { + yaz_log(YLOG_WARN, "Failed to serialize HTTP response"); + http_destroy(ch->iochan); + } + else + { + http_buf_enqueue(&ch->oqueue, hb); + iochan_setflag(ch->iochan, EVENT_OUTPUT); + ch->state = Http_Idle; + } +} + +static void http_io(IOCHAN i, int event) +{ + struct http_channel *hc = iochan_getdata(i); + + switch (event) + { + int res, reqlen; + struct http_buf *htbuf; + + case EVENT_INPUT: + htbuf = http_buf_create(); + res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1); + if (res <= 0 && errno != EAGAIN) + { + http_buf_destroy(htbuf); + http_destroy(i); + return; + } + if (res > 0) + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->iqueue, htbuf); + } + + if (hc->state == Http_Busy) + return; + + if ((reqlen = request_check(hc->iqueue)) <= 2) + return; + + nmem_reset(hc->nmem); + if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) + { + yaz_log(YLOG_WARN, "Failed to parse request"); + http_destroy(i); + return; + } + hc->response = 0; + yaz_log(YLOG_LOG, "Request: %s %s v %s", hc->request->method, + hc->request->path, hc->request->http_version); + if (http_weshouldproxy(hc->request)) + http_proxy(hc->request); + else + { + // Execute our business logic! + hc->state = Http_Busy; + http_command(hc); + } + if (hc->iqueue) + { + yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); + iochan_setevent(i, EVENT_INPUT); + } + + break; + + case EVENT_OUTPUT: + if (hc->oqueue) + { + struct http_buf *wb = hc->oqueue; + res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); + http_destroy(i); + return; + } + if (res == wb->len) + { + hc->oqueue = hc->oqueue->next; + http_buf_destroy(wb); + } + else + { + wb->len -= res; + wb->offset += res; + } + if (!hc->oqueue) { + if (!strcmp(hc->version, "1.0")) + { + http_destroy(i); + return; + } + else + { + iochan_clearflag(i, EVENT_OUTPUT); + if (hc->iqueue) + iochan_setevent(hc->iochan, EVENT_INPUT); + } + } + } + + if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) + http_destroy(i); // Server closed; we're done + break; + default: + yaz_log(YLOG_WARN, "Unexpected event on connection"); + http_destroy(i); + } +} + +// Handles I/O on a client connection to a backend web server (proxy mode) +static void proxy_io(IOCHAN pi, int event) +{ + struct http_proxy *pc = iochan_getdata(pi); + struct http_channel *hc = pc->channel; + + switch (event) + { + int res; + struct http_buf *htbuf; + + case EVENT_INPUT: + htbuf = http_buf_create(); + res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1); + if (res == 0 || (res < 0 && errno != EINPROGRESS)) + { + if (hc->oqueue) + { + yaz_log(YLOG_WARN, "Proxy read came up short"); + // Close channel and alert client HTTP channel that we're gone + http_buf_destroy(htbuf); + close(iochan_getfd(pi)); + iochan_destroy(pi); + pc->iochan = 0; + } + else + { + http_destroy(hc->iochan); + return; + } + } + else + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->oqueue, htbuf); + } + iochan_setflag(hc->iochan, EVENT_OUTPUT); + break; + case EVENT_OUTPUT: + if (!(htbuf = pc->oqueue)) + { + iochan_clearflag(pi, EVENT_OUTPUT); + return; + } + res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); + http_destroy(hc->iochan); + return; + } + if (res == htbuf->len) + { + struct http_buf *np = htbuf->next; + http_buf_destroy(htbuf); + pc->oqueue = np; + } + else + { + htbuf->len -= res; + htbuf->offset += res; + } + + if (!pc->oqueue) { + iochan_setflags(pi, EVENT_INPUT); // Turns off output flag + } + break; + default: + yaz_log(YLOG_WARN, "Unexpected event on connection"); + http_destroy(hc->iochan); + } +} + +// Cleanup channel +static void http_destroy(IOCHAN i) +{ + struct http_channel *s = iochan_getdata(i); + + if (s->proxy) + { + if (s->proxy->iochan) + { + close(iochan_getfd(s->proxy->iochan)); + iochan_destroy(s->proxy->iochan); + } + http_buf_destroy_queue(s->proxy->oqueue); + xfree(s->proxy); + } + s->next = http_channel_freelist; + http_channel_freelist = s; + close(iochan_getfd(i)); + iochan_destroy(i); +} + +static struct http_channel *http_create(void) +{ + struct http_channel *r = http_channel_freelist; + + if (r) + { + http_channel_freelist = r->next; + nmem_reset(r->nmem); + wrbuf_rewind(r->wrbuf); + } + else + { + r = xmalloc(sizeof(struct http_channel)); + r->nmem = nmem_create(); + r->wrbuf = wrbuf_alloc(); + } + r->proxy = 0; + r->iochan = 0; + r->iqueue = r->oqueue = 0; + r->state = Http_Idle; + r->request = 0; + r->response = 0; + return r; +} + + +/* Accept a new command connection */ +static void http_accept(IOCHAN i, int event) +{ + struct sockaddr_in addr; + int fd = iochan_getfd(i); + socklen_t len; + int s; + IOCHAN c; + int flags; + struct http_channel *ch; + + len = sizeof addr; + if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); + return; + } + if ((flags = fcntl(s, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + + yaz_log(YLOG_LOG, "New command connection"); + c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT); + + ch = http_create(); + ch->iochan = c; + iochan_setdata(c, ch); + + c->next = channel_list; + channel_list = c; +} + +/* Create a http-channel listener, syntax [host:]port */ +void http_init(const char *addr) +{ + IOCHAN c; + int l; + struct protoent *p; + struct sockaddr_in myaddr; + int one = 1; + const char *pp; + int port; + + yaz_log(YLOG_LOG, "HTTP listener is %s", addr); + + bzero(&myaddr, sizeof myaddr); + myaddr.sin_family = AF_INET; + pp = strchr(addr, ':'); + if (pp) + { + int len = pp - addr; + char hostname[128]; + struct hostent *he; + + strncpy(hostname, addr, len); + hostname[len] = '\0'; + if (!(he = gethostbyname(hostname))) + { + yaz_log(YLOG_FATAL, "Unable to resolve '%s'", hostname); + exit(1); + } + memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length); + port = atoi(pp + 1); + } + else + { + port = atoi(addr); + myaddr.sin_addr.s_addr = INADDR_ANY; + } + myaddr.sin_port = htons(port); + + if (!(p = getprotobyname("tcp"))) { + abort(); + } + if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); + if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + + if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); + if (listen(l, SOMAXCONN) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); + + c = iochan_create(l, http_accept, EVENT_INPUT | EVENT_EXCEPT); + c->next = channel_list; + channel_list = c; +} + +void http_set_proxyaddr(char *host) +{ + char *p; + int port; + struct hostent *he; + + strcpy(proxy_url, host); + p = strchr(host, ':'); + yaz_log(YLOG_DEBUG, "Proxying for %s", host); + if (p) { + port = atoi(p + 1); + *p = '\0'; + } + else + port = 80; + if (!(he = gethostbyname(host))) + { + fprintf(stderr, "Failed to lookup '%s'\n", host); + exit(1); + } + proxy_addr = xmalloc(sizeof(struct sockaddr_in)); + proxy_addr->sin_family = he->h_addrtype; + memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length); + proxy_addr->sin_port = htons(port); +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/http.h b/src/http.h new file mode 100644 index 0000000..7501424 --- /dev/null +++ b/src/http.h @@ -0,0 +1,88 @@ +#ifndef HTTP_H +#define HTTP_H + +// Generic I/O buffer +struct http_buf +{ +#define HTTP_BUF_SIZE 4096 + char buf[4096]; + int offset; + int len; + struct http_buf *next; +}; + +struct http_channel +{ + IOCHAN iochan; + struct http_buf *iqueue; + struct http_buf *oqueue; + char version[10]; + struct http_proxy *proxy; + enum + { + Http_Idle, + Http_Busy // Don't process new HTTP requests while we're busy + } state; + NMEM nmem; + WRBUF wrbuf; + struct http_request *request; + struct http_response *response; + struct http_channel *next; // for freelist +}; + +struct http_proxy // attached to iochan for proxy connection +{ + IOCHAN iochan; + struct http_channel *channel; + struct http_buf *oqueue; +}; + +struct http_header +{ + char *name; + char *value; + struct http_header *next; +}; + +struct http_argument +{ + char *name; + char *value; + struct http_argument *next; +}; + +struct http_request +{ + struct http_channel *channel; + char http_version[20]; + char method[20]; + char *path; + struct http_header *headers; + struct http_argument *arguments; +}; + +struct http_response +{ + char code[4]; + char *msg; + struct http_channel *channel; + struct http_header *headers; + char *payload; +}; + +void http_set_proxyaddr(char *url); +void http_init(const char *addr); +void http_addheader(struct http_response *r, const char *name, const char *value); +char *http_argbyname(struct http_request *r, char *name); +char *http_headerbyname(struct http_request *r, char *name); +struct http_response *http_create_response(struct http_channel *c); +void http_send_response(struct http_channel *c); + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +#endif diff --git a/src/http_command.c b/src/http_command.c new file mode 100644 index 0000000..6c99232 --- /dev/null +++ b/src/http_command.c @@ -0,0 +1,403 @@ +/* + * $Id: http_command.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" +#include "http.h" +#include "http_command.h" + +extern struct parameters global_parameters; +extern IOCHAN channel_list; + +struct http_session { + IOCHAN timeout_iochan; // NOTE: This is NOT associated with a socket + struct session *psession; + unsigned int session_id; + int timestamp; + struct http_session *next; +}; + +static struct http_session *session_list = 0; + +void http_session_destroy(struct http_session *s); + +static void session_timeout(IOCHAN i, int event) +{ + struct http_session *s = iochan_getdata(i); + http_session_destroy(s); +} + +struct http_session *http_session_create() +{ + struct http_session *r = xmalloc(sizeof(*r)); + r->psession = new_session(); + r->session_id = 0; + r->timestamp = 0; + r->next = session_list; + session_list = r; + r->timeout_iochan = iochan_create(-1, session_timeout, 0); + iochan_setdata(r->timeout_iochan, r); + iochan_settimeout(r->timeout_iochan, global_parameters.session_timeout); + r->timeout_iochan->next = channel_list; + channel_list = r->timeout_iochan; + return r; +} + +void http_session_destroy(struct http_session *s) +{ + struct http_session **p; + + for (p = &session_list; *p; p = &(*p)->next) + if (*p == s) + { + *p = (*p)->next; + break; + } + iochan_destroy(s->timeout_iochan); + destroy_session(s->psession); + xfree(s); +} + +static void error(struct http_response *rs, char *code, char *msg, char *txt) +{ + struct http_channel *c = rs->channel; + char tmp[1024]; + + if (!txt) + txt = msg; + rs->msg = nmem_strdup(c->nmem, msg); + strcpy(rs->code, code); + sprintf(tmp, "%s", txt); + rs->payload = nmem_strdup(c->nmem, tmp); + http_send_response(c); +} + +unsigned int make_sessionid() +{ + struct timeval t; + unsigned int res; + static int seq = 0; + + seq++; + if (gettimeofday(&t, 0) < 0) + abort(); + res = t.tv_sec; + res = ((res << 8) | (seq & 0xff)) & ((unsigned int) (1 << 31) - 1); + return res; +} + +static struct http_session *locate_session(struct http_request *rq, struct http_response *rs) +{ + struct http_session *p; + char *session = http_argbyname(rq, "session"); + unsigned int id; + + if (!session) + { + error(rs, "417", "Must supply session", 0); + return 0; + } + id = atoi(session); + for (p = session_list; p; p = p->next) + if (id == p->session_id) + { + iochan_activity(p->timeout_iochan); + return p; + } + error(rs, "417", "Session does not exist, or it has expired", 0); + return 0; +} + +static void cmd_exit(struct http_channel *c) +{ + yaz_log(YLOG_WARN, "exit"); + exit(0); +} + + +static void cmd_init(struct http_channel *c) +{ + unsigned int sesid; + char buf[1024]; + struct http_session *s = http_session_create(); + struct http_response *rs = c->response; + + yaz_log(YLOG_DEBUG, "HTTP Session init"); + sesid = make_sessionid(); + s->session_id = sesid; + sprintf(buf, "OK%u", sesid); + rs->payload = nmem_strdup(c->nmem, buf); + http_send_response(c); +} + +static void cmd_termlist(struct http_channel *c) +{ + struct http_response *rs = c->response; + struct http_request *rq = c->request; + struct http_session *s = locate_session(rq, rs); + struct termlist_score **p; + int len; + int i; + + if (!s) + return; + wrbuf_rewind(c->wrbuf); + + wrbuf_puts(c->wrbuf, ""); + p = termlist(s->psession, &len); + if (p) + for (i = 0; i < len; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s", p[i]->term); + wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); + wrbuf_puts(c->wrbuf, ""); + } + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + + +static void cmd_bytarget(struct http_channel *c) +{ + struct http_response *rs = c->response; + struct http_request *rq = c->request; + struct http_session *s = locate_session(rq, rs); + struct hitsbytarget *ht; + int count, i; + + if (!s) + return; + if (!(ht = hitsbytarget(s->psession, &count))) + { + error(rs, "500", "Failed to retrieve hitcounts", 0); + return; + } + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "OK"); + + for (i = 0; i < count; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].id); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].hits); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].diagnostic); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].records); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].state); + wrbuf_puts(c->wrbuf, ""); + } + + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + +static void show_records(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + struct record **rl; + char *start = http_argbyname(rq, "start"); + char *num = http_argbyname(rq, "num"); + int startn = 0; + int numn = 20; + int total; + int total_hits; + int i; + + if (!s) + return; + + if (start) + startn = atoi(start); + if (num) + numn = atoi(num); + + rl = show(s->psession, startn, &numn, &total, &total_hits); + + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "\nOK\n"); + wrbuf_printf(c->wrbuf, "%d\n", total); + wrbuf_printf(c->wrbuf, "%d\n", total_hits); + wrbuf_printf(c->wrbuf, "%d\n", startn); + wrbuf_printf(c->wrbuf, "%d\n", numn); + + for (i = 0; i < numn; i++) + { + int ccount; + struct record *p; + + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", rl[i]->title); + for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) + ; + if (ccount > 1) + wrbuf_printf(c->wrbuf, "%d\n", ccount); + wrbuf_puts(c->wrbuf, "\n"); + } + + wrbuf_puts(c->wrbuf, "\n"); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + +static void show_records_ready(void *data) +{ + struct http_channel *c = (struct http_channel *) data; + + show_records(c); +} + +static void cmd_show(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + char *block = http_argbyname(rq, "block"); + + if (!s) + return; + + if (block) + { + if (!s->psession->reclist || !s->psession->reclist->num_records) + { + session_set_watch(s->psession, SESSION_WATCH_RECORDS, show_records_ready, c); + yaz_log(YLOG_DEBUG, "Blocking on cmd_show"); + return; + } + } + + show_records(c); +} + +static void cmd_ping(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + if (!s) + return; + rs->payload = "OK"; + http_send_response(c); +} + +static void cmd_search(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + char *query = http_argbyname(rq, "query"); + char *res; + + if (!s) + return; + if (!query) + { + error(rs, "417", "Must supply query", 0); + return; + } + res = search(s->psession, query); + if (res) + { + error(rs, "417", res, res); + return; + } + rs->payload = "OK"; + http_send_response(c); +} + + +static void cmd_stat(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + struct statistics stat; + + if (!s) + return; + + statistics(s->psession, &stat); + + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, ""); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_hits); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_records); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_clients); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_no_connection); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_connecting); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_initializing); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_searching); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_presenting); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_idle); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_failed); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_error); + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + + +struct { + char *name; + void (*fun)(struct http_channel *c); +} commands[] = { + { "init", cmd_init }, + { "stat", cmd_stat }, + { "bytarget", cmd_bytarget }, + { "show", cmd_show }, + { "search", cmd_search }, + { "termlist", cmd_termlist }, + { "exit", cmd_exit }, + { "ping", cmd_ping }, + {0,0} +}; + +void http_command(struct http_channel *c) +{ + char *command = http_argbyname(c->request, "command"); + struct http_response *rs = http_create_response(c); + int i; + + c->response = rs; + if (!command) + { + error(rs, "417", "Must supply command", 0); + return; + } + for (i = 0; commands[i].name; i++) + if (!strcmp(commands[i].name, command)) + { + (*commands[i].fun)(c); + break; + } + if (!commands[i].name) + error(rs, "417", "Unknown command", 0); + + return; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/http_command.h b/src/http_command.h new file mode 100644 index 0000000..e127925 --- /dev/null +++ b/src/http_command.h @@ -0,0 +1,8 @@ +#ifndef HTTP_COMMAND_H +#define HTTP_COMMAND + +#include "http.h" + +void http_command(struct http_channel *c); + +#endif diff --git a/src/pazpar2.c b/src/pazpar2.c new file mode 100644 index 0000000..c95ce3a --- /dev/null +++ b/src/pazpar2.c @@ -0,0 +1,1395 @@ +/* $Id: pazpar2.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pazpar2.h" +#include "eventl.h" +#include "command.h" +#include "http.h" +#include "termlists.h" +#include "reclists.h" +#include "relevance.h" + +#define PAZPAR2_VERSION "0.1" +#define MAX_CHUNK 15 + +static void client_fatal(struct client *cl); +static void connection_destroy(struct connection *co); +static int client_prep_connection(struct client *cl); +static void ingest_records(struct client *cl, Z_Records *r); +void session_alert_watch(struct session *s, int what); + +IOCHAN channel_list = 0; // Master list of connections we're handling events to + +static struct connection *connection_freelist = 0; +static struct client *client_freelist = 0; + +static struct host *hosts = 0; // The hosts we know about +static struct database *databases = 0; // The databases we know about + +static char *client_states[] = { + "Client_Connecting", + "Client_Connected", + "Client_Idle", + "Client_Initializing", + "Client_Searching", + "Client_Presenting", + "Client_Error", + "Client_Failed", + "Client_Disconnected", + "Client_Stopped" +}; + +struct parameters global_parameters = +{ + 30, + "81", + "Index Data PazPar2 (MasterKey)", + PAZPAR2_VERSION, + 600, // 10 minutes + 60, + 100, + MAX_CHUNK, + 0, + 0, + 0, + 0 +}; + + +static int send_apdu(struct client *c, Z_APDU *a) +{ + struct connection *co = c->connection; + char *buf; + int len, r; + + if (!z_APDU(global_parameters.odr_out, &a, 0, 0)) + { + odr_perror(global_parameters.odr_out, "Encoding APDU"); + abort(); + } + buf = odr_getbuf(global_parameters.odr_out, &len, 0); + r = cs_put(co->link, buf, len); + if (r < 0) + { + yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(co->link))); + return -1; + } + else if (r == 1) + { + fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n"); + exit(1); + } + odr_reset(global_parameters.odr_out); /* release the APDU structure */ + co->state = Conn_Waiting; + return 0; +} + + +static void send_init(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_initRequest); + + a->u.initRequest->implementationId = global_parameters.implementationId; + a->u.initRequest->implementationName = global_parameters.implementationName; + a->u.initRequest->implementationVersion = + global_parameters.implementationVersion; + ODR_MASK_SET(a->u.initRequest->options, Z_Options_search); + ODR_MASK_SET(a->u.initRequest->options, Z_Options_present); + ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets); + + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1); + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2); + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3); + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Initializing; + } + else + cl->state = Client_Error; + odr_reset(global_parameters.odr_out); +} + +static void send_search(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = cl->session; + struct database *db = cl->database; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest); + int ndb, cerror, cpos; + char **databaselist; + Z_Query *zquery; + struct ccl_rpn_node *cn; + int ssub = 0, lslb = 100000, mspn = 10; + + yaz_log(YLOG_DEBUG, "Sending search"); + + cn = ccl_find_str(global_parameters.ccl_filter, se->query, &cerror, &cpos); + if (!cn) + return; + a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out, + sizeof(Z_Query)); + zquery->which = Z_Query_type_1; + zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn); + ccl_rpn_delete(cn); + + for (ndb = 0; *db->databases[ndb]; ndb++) + ; + databaselist = odr_malloc(global_parameters.odr_out, sizeof(char*) * ndb); + for (ndb = 0; *db->databases[ndb]; ndb++) + databaselist[ndb] = db->databases[ndb]; + + a->u.presentRequest->preferredRecordSyntax = + yaz_oidval_to_z3950oid(global_parameters.odr_out, + CLASS_RECSYN, VAL_USMARC); + a->u.searchRequest->smallSetUpperBound = &ssub; + a->u.searchRequest->largeSetLowerBound = &lslb; + a->u.searchRequest->mediumSetPresentNumber = &mspn; + a->u.searchRequest->resultSetName = "Default"; + a->u.searchRequest->databaseNames = databaselist; + a->u.searchRequest->num_databaseNames = ndb; + + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Searching; + cl->requestid = se->requestid; + } + else + cl->state = Client_Error; + + odr_reset(global_parameters.odr_out); +} + +static void send_present(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_presentRequest); + int toget; + int start = cl->records + 1; + + toget = global_parameters.chunk; + if (toget > cl->hits - cl->records) + toget = cl->hits - cl->records; + + yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget); + + a->u.presentRequest->resultSetStartPoint = &start; + a->u.presentRequest->numberOfRecordsRequested = &toget; + + a->u.presentRequest->resultSetId = "Default"; + + a->u.presentRequest->preferredRecordSyntax = + yaz_oidval_to_z3950oid(global_parameters.odr_out, + CLASS_RECSYN, VAL_USMARC); + + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Presenting; + } + else + cl->state = Client_Error; + odr_reset(global_parameters.odr_out); +} + +static void do_initResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_InitResponse *r = a->u.initResponse; + + yaz_log(YLOG_DEBUG, "Received init response"); + + if (*r->result) + { + cl->state = Client_Idle; + } + else + cl->state = Client_Failed; // FIXME need to do something to the connection +} + +static void do_searchResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = cl->session; + Z_SearchResponse *r = a->u.searchResponse; + + yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus); + + if (*r->searchStatus) + { + cl->hits = *r->resultCount; + se->total_hits += cl->hits; + if (r->presentStatus && !*r->presentStatus && r->records) + { + yaz_log(YLOG_DEBUG, "Records in search response"); + cl->records += *r->numberOfRecordsReturned; + ingest_records(cl, r->records); + } + cl->state = Client_Idle; + } + else + { /*"FAILED"*/ + cl->hits = 0; + cl->state = Client_Error; + if (r->records) { + Z_Records *recs = r->records; + if (recs->which == Z_Records_NSD) + { + yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); + cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; + cl->state = Client_Error; + } + } + } +} + +const char *find_field(const char *rec, const char *field) +{ + char lbuf[5]; + char *line; + + lbuf[0] = '\n'; + strcpy(lbuf + 1, field); + + if ((line = strstr(rec, lbuf))) + return ++line; + else + return 0; +} + +const char *find_subfield(const char *field, char subfield) +{ + const char *p = field; + + while (*p && *p != '\n') + { + while (*p != '\n' && *p != '\t') + p++; + if (*p == '\t' && *(++p) == subfield) { + if (*(++p) == ' ') + { + while (isspace(*p)) + p++; + return p; + } + } + } + return 0; +} + +// Extract 245 $a $b 100 $a +char *extract_title(struct session *s, const char *rec) +{ + const char *field, *subfield; + char *e, *ef; + unsigned char *obuf, *p; + + wrbuf_rewind(s->wrbuf); + + if (!(field = find_field(rec, "245"))) + return 0; + if (!(subfield = find_subfield(field, 'a'))) + return 0; + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_write(s->wrbuf, subfield, ef - subfield); + if ((subfield = find_subfield(field, 'b'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_putc(s->wrbuf, ' '); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + if ((field = find_field(rec, "100"))) + { + if ((subfield = find_subfield(field, 'a'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, ", by "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + wrbuf_putc(s->wrbuf, '\0'); + obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf)); + for (p = obuf; *p; p++) + if (*p == '&' || *p == '<' || *p > 122 || *p < ' ') + *p = ' '; + return (char*) obuf; +} + +// Extract 245 $a $b 100 $a +char *extract_mergekey(struct session *s, const char *rec) +{ + const char *field, *subfield; + char *e, *ef; + char *out, *p, *pout; + + wrbuf_rewind(s->wrbuf); + + if (!(field = find_field(rec, "245"))) + return 0; + if (!(subfield = find_subfield(field, 'a'))) + return 0; + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_write(s->wrbuf, subfield, ef - subfield); + if ((subfield = find_subfield(field, 'b'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, " field "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + if ((field = find_field(rec, "100"))) + { + if ((subfield = find_subfield(field, 'a'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, " field "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + wrbuf_putc(s->wrbuf, '\0'); + p = wrbuf_buf(s->wrbuf); + out = pout = nmem_malloc(s->nmem, strlen(p) + 1); + + while (*p) + { + while (isalnum(*p)) + *(pout++) = tolower(*(p++)); + while (*p && !isalnum(*p)) + p++; + *(pout++) = ' '; + } + if (out != pout) + *(--pout) = '\0'; + + return out; +} + +#ifdef RECHEAP +static void push_record(struct session *s, struct record *r) +{ + int p; + assert(s->recheap_max + 1 < s->recheap_size); + + s->recheap[p = ++s->recheap_max] = r; + while (p > 0) + { + int parent = (p - 1) >> 1; + if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0) + { + struct record *tmp; + tmp = s->recheap[parent]; + s->recheap[parent] = s->recheap[p]; + s->recheap[p] = tmp; + p = parent; + } + else + break; + } +} + +static struct record *top_record(struct session *s) +{ + return s-> recheap_max >= 0 ? s->recheap[0] : 0; +} + +static struct record *pop_record(struct session *s) +{ + struct record *res; + int p = 0; + int lastnonleaf = (s->recheap_max - 1) >> 1; + + if (s->recheap_max < 0) + return 0; + + res = s->recheap[0]; + + s->recheap[p] = s->recheap[s->recheap_max--]; + + while (p <= lastnonleaf) + { + int right = (p + 1) << 1; + int left = right - 1; + int min = left; + + if (right < s->recheap_max && + strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0) + min = right; + if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0) + { + struct record *tmp = s->recheap[min]; + s->recheap[min] = s->recheap[p]; + s->recheap[p] = tmp; + p = min; + } + else + break; + } + return res; +} + +// Like pop_record but collapses identical (merge_key) records +// The heap will contain multiple independent matching records and possibly +// one cluster, created the last time the list was scanned +static struct record *pop_mrecord(struct session *s) +{ + struct record *this; + struct record *next; + + if (!(this = pop_record(s))) + return 0; + + // Collapse identical records + while ((next = top_record(s))) + { + struct record *p, *tmpnext; + if (strcmp(this->merge_key, next->merge_key)) + break; + // Absorb record (and clustersiblings) into a supercluster + for (p = next; p; p = tmpnext) { + tmpnext = p->next_cluster; + p->next_cluster = this->next_cluster; + this->next_cluster = p; + } + + pop_record(s); + } + return this; +} + +// Reads records in sort order. Store records in top of heapspace until rewind is called. +static struct record *read_recheap(struct session *s) +{ + struct record *r = pop_mrecord(s); + + if (r) + { + if (s->recheap_scratch < 0) + s->recheap_scratch = s->recheap_size; + s->recheap[--s->recheap_scratch] = r; + } + + return r; +} + +// Return records to heap after read +static void rewind_recheap(struct session *s) +{ + while (s->recheap_scratch >= 0) { + push_record(s, s->recheap[s->recheap_scratch++]); + if (s->recheap_scratch >= s->recheap_size) + s->recheap_scratch = -1; + } +} + +#endif + +// FIXME needs to be generalized. Should flexibly generate X lists per search +static void extract_subject(struct session *s, const char *rec) +{ + const char *field, *subfield; + + while ((field = find_field(rec, "650"))) + { + rec = field; + if ((subfield = find_subfield(field, 'a'))) + { + char *e, *ef; + char buf[1024]; + int len; + + ef = index(subfield, '\n'); + if (!ef) + return; + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')') + ef--; + len = ef - subfield; + assert(len < 1023); + memcpy(buf, subfield, len); + buf[len] = '\0'; + if (*buf) + termlist_insert(s->termlist, buf); + } + } +} + +static void pull_relevance_field(struct session *s, struct record *head, const char *rec, + char *field, int mult) +{ + const char *fb; + while ((fb = find_field(rec, field))) + { + char *ffield = strchr(fb, '\t'); + if (!ffield) + return; + char *eol = strchr(ffield, '\n'); + if (!eol) + return; + relevance_countwords(s->relevance, head, ffield, eol - ffield, mult); + rec = field + 1; // Crude way to cause a loop through repeating fields + } +} + +static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec) +{ + relevance_newrec(s->relevance, head); + pull_relevance_field(s, head, rec->buf, "100", 2); + pull_relevance_field(s, head, rec->buf, "245", 4); + //pull_relevance_field(s, head, rec->buf, "530", 1); + pull_relevance_field(s, head, rec->buf, "630", 1); + pull_relevance_field(s, head, rec->buf, "650", 1); + pull_relevance_field(s, head, rec->buf, "700", 1); + relevance_donerecord(s->relevance, head); +} + +static struct record *ingest_record(struct client *cl, char *buf, int len) +{ + struct session *se = cl->session; + struct record *res; + struct record *head; + const char *recbuf; + + wrbuf_rewind(se->wrbuf); + yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE); + if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0) + { + yaz_log(YLOG_WARN, "Failed to decode MARC record"); + return 0; + } + wrbuf_putc(se->wrbuf, '\0'); + recbuf = wrbuf_buf(se->wrbuf); + + res = nmem_malloc(se->nmem, sizeof(struct record)); + res->buf = nmem_strdup(se->nmem, recbuf); + + extract_subject(se, res->buf); + + res->title = extract_title(se, res->buf); + res->merge_key = extract_mergekey(se, res->buf); + if (!res->merge_key) + return 0; + res->client = cl; + res->next_cluster = 0; + res->target_offset = -1; + res->term_frequency_vec = 0; + + head = reclist_insert(se->reclist, res); + + pull_relevance_keys(se, head, res); + + se->total_records++; + + return res; +} + +static void ingest_records(struct client *cl, Z_Records *r) +{ + struct record *rec; + struct session *s = cl->session; + Z_NamePlusRecordList *rlist; + int i; + + if (r->which != Z_Records_DBOSD) + return; + rlist = r->u.databaseOrSurDiagnostics; + for (i = 0; i < rlist->num_records; i++) + { + Z_NamePlusRecord *npr = rlist->records[i]; + Z_External *e; + char *buf; + int len; + + if (npr->which != Z_NamePlusRecord_databaseRecord) + { + yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic"); + continue; + } + e = npr->u.databaseRecord; + if (e->which != Z_External_octet) + { + yaz_log(YLOG_WARN, "Unexpected external branch, probably BER"); + continue; + } + buf = (char*) e->u.octet_aligned->buf; + len = e->u.octet_aligned->len; + + rec = ingest_record(cl, buf, len); + if (!rec) + continue; + } + if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records) + session_alert_watch(s, SESSION_WATCH_RECORDS); +} + +static void do_presentResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_PresentResponse *r = a->u.presentResponse; + + if (r->records) { + Z_Records *recs = r->records; + if (recs->which == Z_Records_NSD) + { + yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); + cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; + cl->state = Client_Error; + } + } + + if (!*r->presentStatus && cl->state != Client_Error) + { + yaz_log(YLOG_DEBUG, "Good Present response"); + cl->records += *r->numberOfRecordsReturned; + ingest_records(cl, r->records); + cl->state = Client_Idle; + } + else if (*r->presentStatus) + { + yaz_log(YLOG_WARN, "Bad Present response"); + cl->state = Client_Error; + } +} + +static void handler(IOCHAN i, int event) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = 0; + + if (cl) + se = cl->session; + else + { + yaz_log(YLOG_WARN, "Destroying orphan connection (fix me?)"); + connection_destroy(co); + return; + } + + if (co->state == Conn_Connecting && event & EVENT_OUTPUT) + { + int errcode; + socklen_t errlen = sizeof(errcode); + + if (getsockopt(cs_fileno(co->link), SOL_SOCKET, SO_ERROR, &errcode, + &errlen) < 0 || errcode != 0) + { + client_fatal(cl); + return; + } + else + { + yaz_log(YLOG_DEBUG, "Connect OK"); + co->state = Conn_Open; + if (cl) + cl->state = Client_Connected; + } + } + + else if (event & EVENT_INPUT) + { + int len = cs_get(co->link, &co->ibuf, &co->ibufsize); + + if (len < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Error reading from Z server"); + connection_destroy(co); + return; + } + else if (len == 0) + { + yaz_log(YLOG_WARN, "EOF reading from Z server"); + connection_destroy(co); + return; + } + else if (len > 1) // We discard input if we have no connection + { + co->state = Conn_Open; + + if (cl && (cl->requestid == se->requestid || cl->state == Client_Initializing)) + { + Z_APDU *a; + + odr_reset(global_parameters.odr_in); + odr_setbuf(global_parameters.odr_in, co->ibuf, len, 0); + if (!z_APDU(global_parameters.odr_in, &a, 0, 0)) + { + client_fatal(cl); + return; + } + switch (a->which) + { + case Z_APDU_initResponse: + do_initResponse(i, a); + break; + case Z_APDU_searchResponse: + do_searchResponse(i, a); + break; + case Z_APDU_presentResponse: + do_presentResponse(i, a); + break; + default: + yaz_log(YLOG_WARN, "Unexpected result from server"); + client_fatal(cl); + return; + } + // We aren't expecting staggered output from target + // if (cs_more(t->link)) + // iochan_setevent(i, EVENT_INPUT); + } + else // we throw away response and go to idle mode + { + yaz_log(YLOG_DEBUG, "Ignoring result of expired operation"); + cl->state = Client_Idle; + } + } + /* if len==1 we do nothing but wait for more input */ + } + + if (cl->state == Client_Connected) { + send_init(i); + } + + if (cl->state == Client_Idle) + { + if (cl->requestid != se->requestid && *se->query) { + send_search(i); + } + else if (cl->hits > 0 && cl->records < global_parameters.toget && + cl->records < cl->hits) { + send_present(i); + } + } +} + +// Disassociate connection from client +static void connection_release(struct connection *co) +{ + struct client *cl = co->client; + + yaz_log(YLOG_DEBUG, "Connection release %s", co->host->hostport); + if (!cl) + return; + cl->connection = 0; + co->client = 0; +} + +// Close connection and recycle structure +static void connection_destroy(struct connection *co) +{ + struct host *h = co->host; + cs_close(co->link); + iochan_destroy(co->iochan); + + yaz_log(YLOG_DEBUG, "Connection destroy %s", co->host->hostport); + if (h->connections == co) + h->connections = co->next; + else + { + struct connection *pco; + for (pco = h->connections; pco && pco->next != co; pco = pco->next) + ; + if (pco) + pco->next = co->next; + else + abort(); + } + if (co->client) + { + if (co->client->state != Client_Idle) + co->client->state = Client_Disconnected; + co->client->connection = 0; + } + co->next = connection_freelist; + connection_freelist = co; +} + +// Creates a new connection for client, associated with the host of +// client's database +static struct connection *connection_create(struct client *cl) +{ + struct connection *new; + COMSTACK link; + int res; + void *addr; + + yaz_log(YLOG_DEBUG, "Connection create %s", cl->database->url); + if (!(link = cs_create(tcpip_type, 0, PROTO_Z3950))) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack"); + exit(1); + } + + if (!(addr = cs_straddr(link, cl->database->host->ipport))) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address failed?"); + return 0; + } + + res = cs_connect(link, addr); + if (res < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "cs_connect %s", cl->database->url); + return 0; + } + + if ((new = connection_freelist)) + connection_freelist = new->next; + else + { + new = xmalloc(sizeof (struct connection)); + new->ibuf = 0; + new->ibufsize = 0; + } + new->state = Conn_Connecting; + new->host = cl->database->host; + new->next = new->host->connections; + new->host->connections = new; + new->client = cl; + cl->connection = new; + new->link = link; + + new->iochan = iochan_create(cs_fileno(link), handler, 0); + iochan_setdata(new->iochan, new); + new->iochan->next = channel_list; + channel_list = new->iochan; + return new; +} + +// Close connection and set state to error +static void client_fatal(struct client *cl) +{ + yaz_log(YLOG_WARN, "Fatal error from %s", cl->database->url); + connection_destroy(cl->connection); + cl->state = Client_Error; +} + +// Ensure that client has a connection associated +static int client_prep_connection(struct client *cl) +{ + struct connection *co; + struct session *se = cl->session; + struct host *host = cl->database->host; + + co = cl->connection; + + yaz_log(YLOG_DEBUG, "Client prep %s", cl->database->url); + + if (!co) + { + // See if someone else has an idle connection + // We should look at timestamps here to select the longest-idle connection + for (co = host->connections; co; co = co->next) + if (co->state == Conn_Open && (!co->client || co->client->session != se)) + break; + if (co) + { + connection_release(co); + cl->connection = co; + co->client = cl; + } + else + co = connection_create(cl); + } + if (co) + { + if (co->state == Conn_Connecting) + cl->state = Client_Connecting; + else if (co->state == Conn_Open) + { + if (cl->state == Client_Error || cl->state == Client_Disconnected) + cl->state = Client_Idle; + } + iochan_setflag(co->iochan, EVENT_OUTPUT); + return 1; + } + else + return 0; +} + +void load_simpletargets(const char *fn) +{ + FILE *f = fopen(fn, "r"); + char line[256]; + + if (!f) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn); + exit(1); + } + + while (fgets(line, 255, f)) + { + char *url, *db; + struct host *host; + struct database *database; + + if (strncmp(line, "target ", 7)) + continue; + url = line + 7; + url[strlen(url) - 1] = '\0'; + yaz_log(YLOG_DEBUG, "Target: %s", url); + if ((db = strchr(url, '/'))) + *(db++) = '\0'; + else + db = "Default"; + + for (host = hosts; host; host = host->next) + if (!strcmp(url, host->hostport)) + break; + if (!host) + { + struct addrinfo *addrinfo, hints; + char *port; + char ipport[128]; + unsigned char addrbuf[4]; + int res; + + host = xmalloc(sizeof(struct host)); + host->hostport = xstrdup(url); + host->connections = 0; + + if ((port = strchr(url, ':'))) + *(port++) = '\0'; + else + port = "210"; + + hints.ai_flags = 0; + hints.ai_family = PF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_addrlen = 0; + hints.ai_addr = 0; + hints.ai_canonname = 0; + hints.ai_next = 0; + // This is not robust code. It assumes that getaddrinfo returns AF_INET + // address. + if ((res = getaddrinfo(url, port, &hints, &addrinfo))) + { + yaz_log(YLOG_WARN, "Failed to resolve %s: %s", url, gai_strerror(res)); + continue; + } + assert(addrinfo->ai_family == PF_INET); + memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); + sprintf(ipport, "%hhd.%hhd.%hhd.%hhd:%s", + addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); + host->ipport = xstrdup(ipport); + freeaddrinfo(addrinfo); + host->next = hosts; + hosts = host; + } + database = xmalloc(sizeof(struct database)); + database->host = host; + database->url = xmalloc(strlen(url) + strlen(db) + 2); + strcpy(database->url, url); + strcat(database->url, "/"); + strcat(database->url, db); + strcpy(database->databases[0], db); + *database->databases[1] = '\0'; + database->errors = 0; + database->next = databases; + databases = database; + + } + fclose(f); +} + +static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) +{ + switch (n->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + case CCL_RPN_PROX: + pull_terms(nmem, n->u.p[0], termlist, num); + pull_terms(nmem, n->u.p[1], termlist, num); + break; + case CCL_RPN_TERM: + termlist[(*num)++] = nmem_strdup(nmem, n->u.t.term); + break; + default: // NOOP + break; + } +} + +// Extract terms from query into null-terminated termlist +static int extract_terms(NMEM nmem, char *query, char **termlist) +{ + int error, pos; + struct ccl_rpn_node *n; + int num = 0; + + n = ccl_find_str(global_parameters.ccl_filter, query, &error, &pos); + if (!n) + return -1; + pull_terms(nmem, n, termlist, &num); + termlist[num] = 0; + ccl_rpn_delete(n); + return 0; +} + +static struct client *client_create(void) +{ + struct client *r; + if (client_freelist) + { + r = client_freelist; + client_freelist = client_freelist->next; + } + else + r = xmalloc(sizeof(struct client)); + r->database = 0; + r->connection = 0; + r->session = 0; + r->hits = 0; + r->records = 0; + r->setno = 0; + r->requestid = -1; + r->diagnostic = 0; + r->state = Client_Disconnected; + r->next = 0; + return r; +} + +void client_destroy(struct client *c) +{ + struct session *se = c->session; + if (c == se->clients) + se->clients = c->next; + else + { + struct client *cc; + for (cc = se->clients; cc && cc->next != c; cc = cc->next) + ; + if (cc) + cc->next = c->next; + } + if (c->connection) + connection_release(c->connection); + c->next = client_freelist; + client_freelist = c; +} + +void session_set_watch(struct session *s, int what, session_watchfun fun, void *data) +{ + s->watchlist[what].fun = fun; + s->watchlist[what].data = data; +} + +void session_alert_watch(struct session *s, int what) +{ + if (!s->watchlist[what].fun) + return; + (*s->watchlist[what].fun)(s->watchlist[what].data); + s->watchlist[what].fun = 0; + s->watchlist[what].data = 0; +} + +// This should be extended with parameters to control selection criteria +// Associates a set of clients with a session; +int select_targets(struct session *se) +{ + struct database *db; + int c = 0; + + while (se->clients) + client_destroy(se->clients); + for (db = databases; db; db = db->next) + { + struct client *cl = client_create(); + cl->database = db; + cl->session = se; + cl->next = se->clients; + se->clients = cl; + c++; + } + return c; +} + +char *search(struct session *se, char *query) +{ + int live_channels = 0; + struct client *cl; + + yaz_log(YLOG_DEBUG, "Search"); + + strcpy(se->query, query); + se->requestid++; + nmem_reset(se->nmem); + for (cl = se->clients; cl; cl = cl->next) + { + cl->hits = -1; + cl->records = 0; + cl->diagnostic = 0; + + if (client_prep_connection(cl)) + live_channels++; + } + if (live_channels) + { + char *p[512]; + int maxrecs = live_channels * global_parameters.toget; + se->termlist = termlist_create(se->nmem, maxrecs, 15); + se->reclist = reclist_create(se->nmem, maxrecs); + extract_terms(se->nmem, query, p); + se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs); + se->total_records = se->total_hits = 0; + } + else + return "NOTARGETS"; + + return 0; +} + +void destroy_session(struct session *s) +{ + yaz_log(YLOG_LOG, "Destroying session"); + while (s->clients) + client_destroy(s->clients); + nmem_destroy(s->nmem); + wrbuf_free(s->wrbuf, 1); +} + +struct session *new_session() +{ + int i; + struct session *session = xmalloc(sizeof(*session)); + + yaz_log(YLOG_DEBUG, "New pazpar2 session"); + + session->total_hits = 0; + session->total_records = 0; + session->termlist = 0; + session->reclist = 0; + session->requestid = -1; + session->clients = 0; + session->query[0] = '\0'; + session->nmem = nmem_create(); + session->wrbuf = wrbuf_alloc(); + for (i = 0; i <= SESSION_WATCH_MAX; i++) + { + session->watchlist[i].data = 0; + session->watchlist[i].fun = 0; + } + + select_targets(session); + + return session; +} + +struct hitsbytarget *hitsbytarget(struct session *se, int *count) +{ + static struct hitsbytarget res[1000]; // FIXME MM + struct client *cl; + + *count = 0; + for (cl = se->clients; cl; cl = cl->next) + { + strcpy(res[*count].id, cl->database->host->hostport); + res[*count].hits = cl->hits; + res[*count].records = cl->records; + res[*count].diagnostic = cl->diagnostic; + res[*count].state = client_states[cl->state]; + res[*count].connected = cl->connection ? 1 : 0; + (*count)++; + } + + return res; +} + +struct termlist_score **termlist(struct session *s, int *num) +{ + return termlist_highscore(s->termlist, num); +} + +struct record **show(struct session *s, int start, int *num, int *total, int *sumhits) +{ + struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *)); + int i; + + relevance_prepare_read(s->relevance, s->reclist); + + *total = s->reclist->num_records; + *sumhits = s->total_hits; + + for (i = 0; i < start; i++) + if (!reclist_read_record(s->reclist)) + { + *num = 0; + return 0; + } + + for (i = 0; i < *num; i++) + { + struct record *r = reclist_read_record(s->reclist); + if (!r) + { + *num = i; + break; + } + recs[i] = r; + } + return recs; +} + +void statistics(struct session *se, struct statistics *stat) +{ + struct client *cl; + int count = 0; + + bzero(stat, sizeof(*stat)); + for (cl = se->clients; cl; cl = cl->next) + { + if (!cl->connection) + stat->num_no_connection++; + switch (cl->state) + { + case Client_Connecting: stat->num_connecting++; break; + case Client_Initializing: stat->num_initializing++; break; + case Client_Searching: stat->num_searching++; break; + case Client_Presenting: stat->num_presenting++; break; + case Client_Idle: stat->num_idle++; break; + case Client_Failed: stat->num_failed++; break; + case Client_Error: stat->num_error++; break; + default: break; + } + count++; + } + stat->num_hits = se->total_hits; + stat->num_records = se->total_records; + + stat->num_clients = count; +} + +static CCL_bibset load_cclfile(const char *fn) +{ + CCL_bibset res = ccl_qual_mk(); + if (ccl_qual_fname(res, fn) < 0) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "%s", fn); + exit(1); + } + return res; +} + +int main(int argc, char **argv) +{ + int ret; + char *arg; + int setport = 0; + + if (signal(SIGPIPE, SIG_IGN) < 0) + yaz_log(YLOG_WARN|YLOG_ERRNO, "signal"); + + yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0); + + while ((ret = options("c:h:p:C:s:", argv, argc, &arg)) != -2) + { + switch (ret) { + case 'c': + command_init(atoi(arg)); + setport++; + break; + case 'h': + http_init(arg); + setport++; + break; + case 'C': + global_parameters.ccl_filter = load_cclfile(arg); + break; + case 'p': + http_set_proxyaddr(arg); + break; + case 's': + load_simpletargets(arg); + break; + default: + fprintf(stderr, "Usage: pazpar2\n" + " -h [host:]port (REST protocol listener)\n" + " -c cmdport (telnet-style)\n" + " -C cclconfig\n" + " -s simpletargetfile\n" + " -p hostname[:portno] (HTTP proxy)\n"); + exit(1); + } + } + + if (!setport) + { + fprintf(stderr, "Set command port with -h or -c\n"); + exit(1); + } + + global_parameters.ccl_filter = load_cclfile("default.bib"); + global_parameters.yaz_marc = yaz_marc_create(); + yaz_marc_subfield_str(global_parameters.yaz_marc, "\t"); + global_parameters.odr_in = odr_createmem(ODR_DECODE); + global_parameters.odr_out = odr_createmem(ODR_ENCODE); + + event_loop(&channel_list); + + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/pazpar2.h b/src/pazpar2.h new file mode 100644 index 0000000..8de4b91 --- /dev/null +++ b/src/pazpar2.h @@ -0,0 +1,172 @@ +#ifndef PAZPAR2_H +#define PAZPAR2_H + +struct record; + +#include + +#include +#include +#include +#include +#include "termlists.h" +#include "relevance.h" +#include "eventl.h" + +#define MAX_DATABASES 512 + +struct record { + struct client *client; + int target_offset; + char *buf; + char *merge_key; + char *title; + int relevance; + int *term_frequency_vec; + struct record *next_cluster; +}; + +struct connection; + +// Represents a host (irrespective of databases) +struct host { + char *hostport; + char *ipport; + struct connection *connections; // All connections to this + struct host *next; +}; + +// Represents a (virtual) database on a host +struct database { + struct host *host; + char *url; + char databases[MAX_DATABASES][128]; + int errors; + struct database *next; +}; + +struct client; + +// Represents a physical, reusable connection to a remote Z39.50 host +struct connection { + IOCHAN iochan; + COMSTACK link; + struct host *host; + struct client *client; + char *ibuf; + int ibufsize; + enum { + Conn_Connecting, + Conn_Open, + Conn_Waiting, + } state; + struct connection *next; +}; + +// Represents client state for a connection to one search target +struct client { + struct database *database; + struct connection *connection; + struct session *session; + int hits; + int records; + int setno; + int requestid; // ID of current outstanding request + int diagnostic; + enum client_state + { + Client_Connecting, + Client_Connected, + Client_Idle, + Client_Initializing, + Client_Searching, + Client_Presenting, + Client_Error, + Client_Failed, + Client_Disconnected, + Client_Stopped + } state; + struct client *next; +}; + +#define SESSION_WATCH_RECORDS 0 +#define SESSION_WATCH_MAX 0 + +typedef void (*session_watchfun)(void *data); + +// End-user session +struct session { + struct client *clients; + int requestid; + char query[1024]; + NMEM nmem; // Nmem for each operation (i.e. search) + WRBUF wrbuf; // Wrbuf for scratch(i.e. search) + struct termlist *termlist; + struct relevance *relevance; + struct reclist *reclist; + struct { + void *data; + session_watchfun fun; + } watchlist[SESSION_WATCH_MAX + 1]; + int total_hits; + int total_records; +}; + +struct statistics { + int num_clients; + int num_no_connection; + int num_connecting; + int num_initializing; + int num_searching; + int num_presenting; + int num_idle; + int num_failed; + int num_error; + int num_hits; + int num_records; +}; + +struct hitsbytarget { + char id[256]; + int hits; + int diagnostic; + int records; + char* state; + int connected; +}; + +struct parameters { + int timeout; /* operations timeout, in seconds */ + char implementationId[128]; + char implementationName[128]; + char implementationVersion[128]; + int target_timeout; // seconds + int session_timeout; + int toget; + int chunk; + CCL_bibset ccl_filter; + yaz_marc_t yaz_marc; + ODR odr_out; + ODR odr_in; +}; + +struct hitsbytarget *hitsbytarget(struct session *s, int *count); +int select_targets(struct session *se); +struct session *new_session(); +void destroy_session(struct session *s); +int load_targets(struct session *s, const char *fn); +void statistics(struct session *s, struct statistics *stat); +char *search(struct session *s, char *query); +struct record **show(struct session *s, int start, int *num, int *total, int *sumhits); +struct termlist_score **termlist(struct session *s, int *num); +void session_set_watch(struct session *s, int what, session_watchfun fun, void *data); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/reclists.c b/src/reclists.c new file mode 100644 index 0000000..40f1d76 --- /dev/null +++ b/src/reclists.c @@ -0,0 +1,110 @@ +/* + * $Id: reclists.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include + +#include + +#include "pazpar2.h" +#include "reclists.h" + +struct reclist_bucket +{ + struct record *record; + struct reclist_bucket *next; +}; + +struct record *reclist_read_record(struct reclist *l) +{ + if (l->pointer < l->num_records) + return l->flatlist[l->pointer++]; + else + return 0; +} + +void reclist_rewind(struct reclist *l) +{ + l->pointer = 0; +} + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct reclist *reclist_create(NMEM nmem, int numrecs) +{ + int hashsize = 1; + struct reclist *res; + + assert(numrecs); + while (hashsize < numrecs) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct reclist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->num_records = 0; + res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); + res->flatlist_size = numrecs; + + return res; +} + +struct record *reclist_insert(struct reclist *l, struct record *record) +{ + unsigned int bucket; + struct reclist_bucket **p; + struct record *head; + + bucket = hash((unsigned char*) record->merge_key) & l->hashmask; + for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) + { + // We found a matching record. Merge them + if (!strcmp(record->merge_key, (*p)->record->merge_key)) + { + struct record *existing = (*p)->record; + record->next_cluster = existing->next_cluster; + existing->next_cluster = record; + head = existing; + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + struct reclist_bucket *new = nmem_malloc(l->nmem, + sizeof(struct reclist_bucket)); + new->record = record; + record->next_cluster = 0; + new->next = 0; + *p = new; + l->flatlist[l->num_records++] = record; + head = record; + } + return head; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/reclists.h b/src/reclists.h new file mode 100644 index 0000000..f9d38c3 --- /dev/null +++ b/src/reclists.h @@ -0,0 +1,23 @@ +#ifndef RECLISTS_H +#define RECLISTS_H + +struct reclist +{ + struct reclist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct record **flatlist; + int flatlist_size; + int num_records; + int pointer; + + NMEM nmem; +}; + +struct reclist *reclist_create(NMEM, int numrecs); +struct record * reclist_insert(struct reclist *tl, struct record *record); +struct record *reclist_read_record(struct reclist *l); +void reclist_rewind(struct reclist *l); + +#endif diff --git a/src/relevance.c b/src/relevance.c new file mode 100644 index 0000000..2823eff --- /dev/null +++ b/src/relevance.c @@ -0,0 +1,247 @@ +/* + * $Id: relevance.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include + +#include "relevance.h" +#include "pazpar2.h" + +struct relevance +{ + int *doc_frequency_vec; + int vec_len; + struct word_trie *wt; + NMEM nmem; +}; + +// We use this data structure to recognize terms in input records, +// and map them to record term vectors for counting. +struct word_trie +{ + struct + { + struct word_trie *child; + int termno; + } list[26]; +}; + +static struct word_trie *create_word_trie_node(NMEM nmem) +{ + struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie)); + int i; + for (i = 0; i < 26; i++) + { + res->list[i].child = 0; + res->list[i].termno = -1; + } + return res; +} + +static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num) +{ + while (*term) { + int c = tolower(*term); + if (c < 'a' || c > 'z') + term++; + else + { + c -= 'a'; + if (!*(++term)) + n->list[c].termno = num; + else + { + if (!n->list[c].child) + { + struct word_trie *new = create_word_trie_node(nmem); + n->list[c].child = new; + } + word_trie_addterm(nmem, n->list[c].child, term, num); + } + break; + } + } +} + +#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' : -1) + +static int word_trie_match(struct word_trie *t, const char *word, int len, int *skipped) +{ + int c = raw_char(tolower(*word)); + + if (!len) + return 0; + + word++; len--; + (*skipped)++; + if (!len || raw_char(*word) < 0) + { + if (t->list[c].termno > 0) + return t->list[c].termno; + else + return 0; + } + else + { + if (t->list[c].child) + { + return word_trie_match(t->list[c].child, word, len, skipped); + } + else + return 0; + } + +} + + +static struct word_trie *build_word_trie(NMEM nmem, const char **terms) +{ + struct word_trie *res = create_word_trie_node(nmem); + const char **p; + int i; + + for (i = 1, p = terms; *p; p++, i++) + word_trie_addterm(nmem, res, *p, i); + return res; +} + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) +{ + struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); + const char **p; + int i; + + for (p = terms, i = 0; *p; p++, i++) + ; + res->vec_len = ++i; + res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); + bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); + res->nmem = nmem; + res->wt = build_word_trie(nmem, terms); + return res; +} + +void relevance_newrec(struct relevance *r, struct record *rec) +{ + if (!rec->term_frequency_vec) + { + rec->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); + bzero(rec->term_frequency_vec, r->vec_len * sizeof(int)); + } +} + + +// FIXME. The definition of a word is crude here.. should support +// some form of localization mechanism? +void relevance_countwords(struct relevance *r, struct record *head, + const char *words, int len, int multiplier) +{ + while (len) + { + char c; + int res; + int skipped; + while (len && (c = raw_char(tolower(*words))) < 0) + { + words++; + len--; + } + if (!len) + return; + skipped = 0; + if ((res = word_trie_match(r->wt, words, len, &skipped))) + { + words += skipped; + len -= skipped; + head->term_frequency_vec[res] += multiplier; + } + else + { + while (len && (c = raw_char(tolower(*words))) >= 0) + { + words++; + len--; + } + } + head->term_frequency_vec[0]++; + } +} + +void relevance_donerecord(struct relevance *r, struct record *head) +{ + int i; + + for (i = 1; i < r->vec_len; i++) + if (head->term_frequency_vec[i] > 0) + r->doc_frequency_vec[i]++; + + r->doc_frequency_vec[0]++; +} + +#ifdef FLOAT_REL +static int comp(const void *p1, const void *p2) +{ + float res; + struct record **r1 = (struct record **) p1; + struct record **r2 = (struct record **) p2; + res = (*r2)->relevance - (*r1)->relevance; + if (res > 0) + return 1; + else if (res < 0) + return -1; + else + return 0; +} +#else +static int comp(const void *p1, const void *p2) +{ + struct record **r1 = (struct record **) p1; + struct record **r2 = (struct record **) p2; + return (*r2)->relevance - (*r1)->relevance; +} +#endif + +// Prepare for a relevance-sorted read of up to num entries +void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) +{ + int i; + float *idfvec = xmalloc(rel->vec_len * sizeof(float)); + + // Calculate document frequency vector for each term. + for (i = 1; i < rel->vec_len; i++) + { + if (!rel->doc_frequency_vec[i]) + idfvec[i] = 0; + else + idfvec[i] = log((float) rel->doc_frequency_vec[0] / rel->doc_frequency_vec[i]); + } + // Calculate relevance for each document + for (i = 0; i < reclist->num_records; i++) + { + int t; + struct record *rec = reclist->flatlist[i]; + float relevance; + relevance = 0; + for (t = 1; t < rel->vec_len; t++) + { + float termfreq; + if (!rec->term_frequency_vec[0]) + break; + termfreq = (float) rec->term_frequency_vec[t] / rec->term_frequency_vec[0]; + relevance += termfreq * idfvec[t]; + } + rec->relevance = (int) (relevance * 100000); + } + qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp); + reclist->pointer = 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/relevance.h b/src/relevance.h new file mode 100644 index 0000000..38c3d9c --- /dev/null +++ b/src/relevance.h @@ -0,0 +1,27 @@ +#ifndef RELEVANCE_H +#define RELEVANCE_H + +#include + +#include "pazpar2.h" +#include "reclists.h" + +struct relevance; + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); +void relevance_newrec(struct relevance *r, struct record *rec); +void relevance_countwords(struct relevance *r, struct record *rec, + const char *words, int len, int multiplier); +void relevance_donerecord(struct relevance *r, struct record *rec); + +void relevance_prepare_read(struct relevance *rel, struct reclist *rec); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/termlists.c b/src/termlists.c new file mode 100644 index 0000000..7f32e3d --- /dev/null +++ b/src/termlists.c @@ -0,0 +1,163 @@ +/* + * $Id: termlists.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include + +#include "termlists.h" + +// Discussion: +// As terms are found in incoming records, they are added to (or updated in) a +// Hash table. When term records are updated, a frequency value is updated. At +// the same time, a highscore is maintained for the most frequent terms. + +struct termlist_bucket +{ + struct termlist_score term; + struct termlist_bucket *next; +}; + +struct termlist +{ + struct termlist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct termlist_score **highscore; + int highscore_size; + int highscore_num; + int highscore_min; + + NMEM nmem; +}; + + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) +{ + int hashsize = 1; + int halfnumterms; + struct termlist *res; + + // Calculate a hash size smallest power of 2 larger than 50% of expected numterms + halfnumterms = numterms >> 1; + if (halfnumterms < 0) + halfnumterms = 1; + while (hashsize < halfnumterms) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct termlist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); + res->highscore_size = highscore_size; + res->highscore_num = 0; + res->highscore_min = 0; + + return res; +} + +static void update_highscore(struct termlist *tl, struct termlist_score *t) +{ + int i; + int smallest; + int me = -1; + + if (t->frequency < tl->highscore_min) + return; + + smallest = 0; + for (i = 0; i < tl->highscore_num; i++) + { + if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) + smallest = i; + if (tl->highscore[i] == t) + me = i; + } + if (tl->highscore_num) + tl->highscore_min = tl->highscore[smallest]->frequency; + if (me >= 0) + return; + if (tl->highscore_num < tl->highscore_size) + { + tl->highscore[tl->highscore_num++] = t; + if (t->frequency < tl->highscore_min) + tl->highscore_min = t->frequency; + } + else + { + if (t->frequency > tl->highscore[smallest]->frequency) + { + tl->highscore[smallest] = t; + } + } +} + +void termlist_insert(struct termlist *tl, const char *term) +{ + unsigned int bucket; + struct termlist_bucket **p; + + bucket = hash((unsigned char *)term) & tl->hashmask; + for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) + { + if (!strcmp(term, (*p)->term.term)) + { + (*p)->term.frequency++; + update_highscore(tl, &((*p)->term)); + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + struct termlist_bucket *new = nmem_malloc(tl->nmem, + sizeof(struct termlist_bucket)); + new->term.term = nmem_strdup(tl->nmem, term); + new->term.frequency = 1; + new->next = 0; + *p = new; + update_highscore(tl, &new->term); + } +} + +static int compare(const void *s1, const void *s2) +{ + struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2; + return (*p2)->frequency - (*p1)->frequency; +} + +struct termlist_score **termlist_highscore(struct termlist *tl, int *len) +{ + qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); + *len = tl->highscore_num; + return tl->highscore; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/termlists.h b/src/termlists.h new file mode 100644 index 0000000..2dbee2d --- /dev/null +++ b/src/termlists.h @@ -0,0 +1,26 @@ +#ifndef TERMLISTS_H +#define TERMLISTS_H + +#include + +struct termlist_score +{ + char *term; + int frequency; +}; + +struct termlist; + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size); +void termlist_insert(struct termlist *tl, const char *term); +struct termlist_score **termlist_highscore(struct termlist *tl, int *len); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..a8547fa --- /dev/null +++ b/src/util.c @@ -0,0 +1,11 @@ +/* $Id: util.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#include +#include + +void die(char *string, char *add) +{ + yaz_log(YLOG_FATAL, "Fatal error: %s (%s)", string, add ? add : ""); + abort(); +} + diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..0bae58e --- /dev/null +++ b/src/util.h @@ -0,0 +1,8 @@ +/* $Id: util.h,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#ifndef UTIL_H +#define UTIL_H + +void die(char *string, char *add); + +#endif diff --git a/termlists.c b/termlists.c deleted file mode 100644 index c9a9442..0000000 --- a/termlists.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * $Id: termlists.c,v 1.3 2006-12-08 21:40:58 quinn Exp $ - */ - -#include -#include -#include - -#include "termlists.h" - -// Discussion: -// As terms are found in incoming records, they are added to (or updated in) a -// Hash table. When term records are updated, a frequency value is updated. At -// the same time, a highscore is maintained for the most frequent terms. - -struct termlist_bucket -{ - struct termlist_score term; - struct termlist_bucket *next; -}; - -struct termlist -{ - struct termlist_bucket **hashtable; - int hashtable_size; - int hashmask; - - struct termlist_score **highscore; - int highscore_size; - int highscore_num; - int highscore_min; - - NMEM nmem; -}; - - -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) -{ - int hashsize = 1; - int halfnumterms; - struct termlist *res; - - // Calculate a hash size smallest power of 2 larger than 50% of expected numterms - halfnumterms = numterms >> 1; - if (halfnumterms < 0) - halfnumterms = 1; - while (hashsize < halfnumterms) - hashsize <<= 1; - res = nmem_malloc(nmem, sizeof(struct termlist)); - res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*)); - bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*)); - res->hashtable_size = hashsize; - res->nmem = nmem; - res->hashmask = hashsize - 1; // Creates a bitmask - - res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); - res->highscore_size = highscore_size; - res->highscore_num = 0; - res->highscore_min = 0; - - return res; -} - -static void update_highscore(struct termlist *tl, struct termlist_score *t) -{ - int i; - int smallest; - int me = -1; - - if (t->frequency < tl->highscore_min) - return; - - smallest = 0; - for (i = 0; i < tl->highscore_num; i++) - { - if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) - smallest = i; - if (tl->highscore[i] == t) - me = i; - } - if (tl->highscore_num) - tl->highscore_min = tl->highscore[smallest]->frequency; - if (me >= 0) - return; - if (tl->highscore_num < tl->highscore_size) - { - tl->highscore[tl->highscore_num++] = t; - if (t->frequency < tl->highscore_min) - tl->highscore_min = t->frequency; - } - else - { - if (t->frequency > tl->highscore[smallest]->frequency) - { - tl->highscore[smallest] = t; - } - } -} - -void termlist_insert(struct termlist *tl, const char *term) -{ - unsigned int bucket; - struct termlist_bucket **p; - - bucket = hash((unsigned char *)term) & tl->hashmask; - for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) - { - if (!strcmp(term, (*p)->term.term)) - { - (*p)->term.frequency++; - update_highscore(tl, &((*p)->term)); - break; - } - } - if (!*p) // We made it to the end of the bucket without finding match - { - struct termlist_bucket *new = nmem_malloc(tl->nmem, - sizeof(struct termlist_bucket)); - new->term.term = nmem_strdup(tl->nmem, term); - new->term.frequency = 1; - new->next = 0; - *p = new; - update_highscore(tl, &new->term); - } -} - -static int compare(const void *s1, const void *s2) -{ - struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2; - return (*p2)->frequency - (*p1)->frequency; -} - -struct termlist_score **termlist_highscore(struct termlist *tl, int *len) -{ - qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); - *len = tl->highscore_num; - return tl->highscore; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/termlists.h b/termlists.h deleted file mode 100644 index 2dbee2d..0000000 --- a/termlists.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef TERMLISTS_H -#define TERMLISTS_H - -#include - -struct termlist_score -{ - char *term; - int frequency; -}; - -struct termlist; - -struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size); -void termlist_insert(struct termlist *tl, const char *term); -struct termlist_score **termlist_highscore(struct termlist *tl, int *len); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/test.pz b/test.pz deleted file mode 100644 index 0fb10c8..0000000 --- a/test.pz +++ /dev/null @@ -1 +0,0 @@ -target localhost:9999/Default diff --git a/util.c b/util.c deleted file mode 100644 index fc67cfa..0000000 --- a/util.c +++ /dev/null @@ -1,11 +0,0 @@ -/* $Id: util.c,v 1.2 2006-11-18 05:00:38 quinn Exp $ */ - -#include -#include - -void die(char *string, char *add) -{ - yaz_log(YLOG_FATAL, "Fatal error: %s (%s)", string, add ? add : ""); - abort(); -} - diff --git a/util.h b/util.h deleted file mode 100644 index 10f7981..0000000 --- a/util.h +++ /dev/null @@ -1,8 +0,0 @@ -/* $Id: util.h,v 1.1 2006-11-14 20:44:38 quinn Exp $ */ - -#ifndef UTIL_H -#define UTIL_H - -void die(char *string, char *add); - -#endif