From d7967e62bb987396444404f5e6ed59bbda5f5131 Mon Sep 17 00:00:00 2001 From: Sebastian Hammer Date: Fri, 24 Nov 2006 20:29:07 +0000 Subject: [PATCH] Added subject facet browsing, beginning of relevance ranking --- Makefile | 5 +- http.c | 529 +++++++++++++++++++++++++++++++++++++++++++++++++++----- http.h | 43 +++-- http_command.c | 191 +++++++++++++++++++- pazpar2.c | 96 +++++++--- pazpar2.h | 8 +- reclists.c | 122 +++++++++++++ reclists.h | 11 ++ relevance.c | 136 +++++++++++++++ relevance.h | 27 +++ termlists.c | 165 ++++++++++++++++++ termlists.h | 26 +++ 12 files changed, 1266 insertions(+), 93 deletions(-) create mode 100644 reclists.c create mode 100644 reclists.h create mode 100644 relevance.c create mode 100644 relevance.h create mode 100644 termlists.c create mode 100644 termlists.h diff --git a/Makefile b/Makefile index 8e2b9a5..935ab70 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # ParaZ. Copyright (C) 2000-2004, Index Data ApS # All rights reserved. -# $Id: Makefile,v 1.2 2006-11-21 18:46:43 quinn Exp $ +# $Id: Makefile,v 1.3 2006-11-24 20:29:07 quinn Exp $ SHELL=/bin/sh @@ -11,7 +11,8 @@ YAZLIBS=`$(YAZCONF) --libs` YAZCFLAGS=`$(YAZCONF) --cflags` PROG=pazpar2 -PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o +PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \ + reclists.o relevance.o all: $(PROG) diff --git a/http.c b/http.c index 9de5c05..f2d6906 100644 --- a/http.c +++ b/http.c @@ -1,5 +1,5 @@ /* - * $Id: http.c,v 1.1 2006-11-21 18:46:43 quinn Exp $ + * $Id: http.c,v 1.2 2006-11-24 20:29:07 quinn Exp $ */ #include @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -23,8 +25,138 @@ #include "http.h" #include "http_command.h" +static void proxy_io(IOCHAN i, int event); + extern IOCHAN channel_list; +static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests +static char proxy_url[256] = ""; +static struct http_buf *http_buf_freelist = 0; + +static struct http_buf *http_buf_create() +{ + struct http_buf *r; + + if (http_buf_freelist) + { + r = http_buf_freelist; + http_buf_freelist = http_buf_freelist->next; + } + else + r = xmalloc(sizeof(struct http_buf)); + r->offset = 0; + r->len = 0; + r->next = 0; + return r; +} + +static void http_buf_destroy(struct http_buf *b) +{ + b->next = http_buf_freelist; + http_buf_freelist = b; +} + +static void http_buf_destroy_queue(struct http_buf *b) +{ + struct http_buf *p; + while (b) + { + p = b->next; + http_buf_destroy(b); + b = p; + } +} + +#ifdef GAGA +// Calculate length of chain +static int http_buf_len(struct http_buf *b) +{ + int sum = 0; + for (; b; b = b->next) + sum += b->len; + return sum; +} +#endif + +static struct http_buf *http_buf_bybuf(char *b, int len) +{ + struct http_buf *res = 0; + struct http_buf **p = &res; + + while (len) + { + *p = http_buf_create(); + int tocopy = len; + if (tocopy > HTTP_BUF_SIZE) + tocopy = HTTP_BUF_SIZE; + memcpy((*p)->buf, b, tocopy); + (*p)->len = tocopy; + len -= tocopy; + b += tocopy; + p = &(*p)->next; + } + return res; +} + +// Add a (chain of) buffers to the end of an existing queue. +static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b) +{ + while (*queue) + queue = &(*queue)->next; + *queue = b; +} + +static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) +{ + return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); +} + +// Non-destructively collapse chain of buffers into a string (max *len) +// Return +static int http_buf_peek(struct http_buf *b, char *buf, int len) +{ + int rd = 0; + while (b && rd < len) + { + int toread = len - rd; + if (toread > b->len) + toread = b->len; + memcpy(buf + rd, b->buf + b->offset, toread); + rd += toread; + b = b->next; + } + buf[rd] = '\0'; + return rd; +} + +// Ddestructively munch up to len from head of queue. +static int http_buf_read(struct http_buf **b, char *buf, int len) +{ + int rd = 0; + while ((*b) && rd < len) + { + int toread = len - rd; + if (toread > (*b)->len) + toread = (*b)->len; + memcpy(buf + rd, (*b)->buf + (*b)->offset, toread); + rd += toread; + if (toread < (*b)->len) + { + (*b)->len -= toread; + (*b)->offset += toread; + break; + } + else + { + struct http_buf *n = (*b)->next; + http_buf_destroy(*b); + *b = n; + } + } + buf[rd] = '\0'; + return rd; +} + void http_addheader(struct http_response *r, const char *name, const char *value) { struct http_channel *c = r->channel; @@ -35,16 +167,18 @@ void http_addheader(struct http_response *r, const char *name, const char *value r->headers = h; } -char *argbyname(struct http_request *r, char *name) +char *http_argbyname(struct http_request *r, char *name) { struct http_argument *p; + if (!name) + return 0; for (p = r->arguments; p; p = p->next) if (!strcmp(p->name, name)) return p->value; return 0; } -char *headerbyname(struct http_request *r, char *name) +char *http_headerbyname(struct http_request *r, char *name) { struct http_header *p; for (p = r->headers; p; p = p->next) @@ -67,10 +201,13 @@ struct http_response *http_create_response(struct http_channel *c) // Check if we have a complete request. Return 0 or length (including trailing newline) // FIXME: Does not deal gracefully with requests carrying payload // but this is kind of OK since we will reject anything other than an empty GET -static int request_check(const char *buf) +static int request_check(struct http_buf *queue) { + char tmp[4096]; int len = 0; + char *buf = tmp; + http_buf_peek(queue, tmp, 4096); while (*buf) // Check if we have a sequence of lines terminated by an empty line { char *b = strstr(buf, "\r\n"); @@ -86,20 +223,32 @@ static int request_check(const char *buf) return 0; } -struct http_request *http_parse_request(struct http_channel *c, char *buf) +struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, + int len) { struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); char *p, *p2; + char tmp[4096]; + char *buf = tmp; + + if (len > 4096) + return 0; + if (http_buf_read(queue, buf, len) < len) + return 0; r->channel = c; + r->arguments = 0; + r->headers = 0; // Parse first line - if (!strncmp(buf, "GET ", 4)) - r->method = Method_GET; - else + for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) + *(p2++) = *p; + if (*p != ' ') { yaz_log(YLOG_WARN, "Unexpected HTTP method in request"); return 0; } + *p2 = '\0'; + if (!(buf = strchr(buf, ' '))) { yaz_log(YLOG_WARN, "Syntax error in request (1)"); @@ -151,18 +300,47 @@ struct http_request *http_parse_request(struct http_channel *c, char *buf) if (!(p = strstr(buf, "\r\n"))) return 0; *(p++) = '\0'; + p++; strcpy(r->http_version, buf); buf = p; } strcpy(c->version, r->http_version); - r->headers = 0; // We might want to parse these someday + r->headers = 0; + while (*buf) + { + if (!(p = strstr(buf, "\r\n"))) + return 0; + if (p == buf) + break; + else + { + struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); + if (!(p2 = strchr(buf, ':'))) + return 0; + *(p2++) = '\0'; + h->name = nmem_strdup(c->nmem, buf); + while (isspace(*p2)) + p2++; + if (p2 >= p) // Empty header? + { + buf = p + 2; + continue; + } + *p = '\0'; + h->value = nmem_strdup(c->nmem, p2); + h->next = r->headers; + r->headers = h; + buf = p + 2; + } + } return r; } -static char *http_serialize_response(struct http_channel *c, struct http_response *r) +static struct http_buf *http_serialize_response(struct http_channel *c, + struct http_response *r) { wrbuf_rewind(c->wrbuf); struct http_header *h; @@ -177,16 +355,59 @@ static char *http_serialize_response(struct http_channel *c, struct http_respons if (r->payload) wrbuf_puts(c->wrbuf, r->payload); - wrbuf_putc(c->wrbuf, '\0'); - return wrbuf_buf(c->wrbuf); + return http_buf_bywrbuf(c->wrbuf); +} + +// Serialize a HTTP request +static struct http_buf *http_serialize_request(struct http_request *r) +{ + struct http_channel *c = r->channel; + wrbuf_rewind(c->wrbuf); + struct http_header *h; + struct http_argument *a; + + wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); + + if (r->arguments) + { + wrbuf_putc(c->wrbuf, '?'); + for (a = r->arguments; a; a = a->next) { + if (a != r->arguments) + wrbuf_putc(c->wrbuf, '&'); + wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); + } + } + + wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); + + for (h = r->headers; h; h = h->next) + wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); + + wrbuf_puts(c->wrbuf, "\r\n"); + + return http_buf_bywrbuf(c->wrbuf); } + // Cleanup static void http_destroy(IOCHAN i) { struct http_channel *s = iochan_getdata(i); yaz_log(YLOG_DEBUG, "Destroying http channel"); + if (s->proxy) + { + yaz_log(YLOG_DEBUG, "Destroying Proxy channel"); + if (s->proxy->iochan) + { + close(iochan_getfd(s->proxy->iochan)); + iochan_destroy(s->proxy->iochan); + } + http_buf_destroy_queue(s->proxy->oqueue); + xfree(s->proxy); + } + http_buf_destroy_queue(s->iqueue); + http_buf_destroy_queue(s->oqueue); nmem_destroy(s->nmem); wrbuf_free(s->wrbuf, 1); xfree(s); @@ -194,6 +415,79 @@ static void http_destroy(IOCHAN i) iochan_destroy(i); } +static int http_weshouldproxy(struct http_request *rq) +{ + if (proxy_addr && !strstr(rq->path, "search.pz2")) + return 1; + return 0; +} + +static int http_proxy(struct http_request *rq) +{ + struct http_channel *c = rq->channel; + struct http_proxy *p = c->proxy; + struct http_header *hp; + struct http_buf *requestbuf; + + yaz_log(YLOG_DEBUG, "Proxy request"); + + if (!p) // This is a new connection. Create a proxy channel + { + int sock; + struct protoent *pe; + int one = 1; + int flags; + + yaz_log(YLOG_DEBUG, "Creating a new proxy channel"); + if (!(pe = getprotobyname("tcp"))) { + abort(); + } + if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "socket"); + return -1; + } + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + if ((flags = fcntl(sock, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0) + if (errno != EINPROGRESS) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect"); + return -1; + } + + p = xmalloc(sizeof(struct http_proxy)); + p->oqueue = 0; + p->channel = c; + c->proxy = p; + // We will add EVENT_OUTPUT below + p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT); + iochan_setdata(p->iochan, p); + p->iochan->next = channel_list; + channel_list = p->iochan; + } + + // Modify Host: header + for (hp = rq->headers; hp; hp = hp->next) + if (!strcmp(hp->name, "Host")) + break; + if (!hp) + { + yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); + return -1; + } + hp->value = nmem_strdup(c->nmem, proxy_url); + requestbuf = http_serialize_request(rq); + http_buf_enqueue(&p->oqueue, requestbuf); + iochan_setflag(p->iochan, EVENT_OUTPUT); + return 0; +} + static void http_io(IOCHAN i, int event) { struct http_channel *hc = iochan_getdata(i); @@ -202,74 +496,111 @@ static void http_io(IOCHAN i, int event) switch (event) { - int res; + int res, reqlen; + struct http_buf *htbuf; case EVENT_INPUT: yaz_log(YLOG_DEBUG, "HTTP Input event"); - res = read(iochan_getfd(i), hc->ibuf + hc->read, IBUF_SIZE - (hc->read + 1)); - if (res <= 0) + htbuf = http_buf_create(); + res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1); + if (res <= 0 && errno != EAGAIN) { yaz_log(YLOG_WARN|YLOG_ERRNO, "HTTP read"); + http_buf_destroy(htbuf); http_destroy(i); return; } - yaz_log(YLOG_DEBUG, "HTTP read %d octets", res); - hc->read += res; - hc->ibuf[hc->read] = '\0'; + if (res > 0) + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->iqueue, htbuf); + } - if ((res = request_check(hc->ibuf)) <= 2) + if ((reqlen = request_check(hc->iqueue)) <= 2) { yaz_log(YLOG_DEBUG, "We don't have a complete HTTP request yet"); return; } - yaz_log(YLOG_DEBUG, "We think we have a complete HTTP request (len %d): \n%s", res, hc->ibuf); + yaz_log(YLOG_DEBUG, "We think we have a complete HTTP request (len %d)", reqlen); + nmem_reset(hc->nmem); - if (!(request = http_parse_request(hc, hc->ibuf))) + if (!(request = http_parse_request(hc, &hc->iqueue, reqlen))) { yaz_log(YLOG_WARN, "Failed to parse request"); http_destroy(i); return; } - response = http_command(request); - if (!response) + yaz_log(YLOG_LOG, "Request: %s %s v %s", request->method, request->path, + request->http_version); + if (http_weshouldproxy(request)) + http_proxy(request); + else { - http_destroy(i); - return; + struct http_buf *hb; + // Execute our business logic! + response = http_command(request); + if (!response) + { + http_destroy(i); + return; + } + if (!(hb = http_serialize_response(hc, response))) + { + http_destroy(i); + return; + } + http_buf_enqueue(&hc->oqueue, hb); + yaz_log(YLOG_DEBUG, "Response ready"); + iochan_setflags(i, EVENT_OUTPUT); // Turns off input selecting } - // FIXME -- do something to cause the response to be sent to the client - if (!(hc->obuf = http_serialize_response(hc, response))) + if (hc->iqueue) { - http_destroy(i); - return; + yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); + iochan_setevent(i, EVENT_INPUT); } - yaz_log(YLOG_DEBUG, "Response ready:\n%s", hc->obuf); - hc->writ = 0; - hc->read = 0; - iochan_setflags(i, EVENT_OUTPUT); // Turns off input selecting + break; case EVENT_OUTPUT: yaz_log(YLOG_DEBUG, "HTTP output event"); - res = write(iochan_getfd(hc->iochan), hc->obuf + hc->writ, - strlen(hc->obuf + hc->writ)); - if (res <= 0) + if (hc->oqueue) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); - http_destroy(i); - return; - } - hc->writ += res; - if (!hc->obuf[hc->writ]) { - yaz_log(YLOG_DEBUG, "Writing finished"); - if (!strcmp(hc->version, "1.0")) + struct http_buf *wb = hc->oqueue; + res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len); + if (res <= 0) { - yaz_log(YLOG_DEBUG, "Closing 1.0 connection"); + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); http_destroy(i); + return; + } + yaz_log(YLOG_DEBUG, "HTTP Wrote %d octets", res); + if (res == wb->len) + { + hc->oqueue = hc->oqueue->next; + http_buf_destroy(wb); } else - iochan_setflags(i, EVENT_INPUT); // Turns off output flag + { + wb->len -= res; + wb->offset += res; + } + if (!hc->oqueue) { + yaz_log(YLOG_DEBUG, "Writing finished"); + if (!strcmp(hc->version, "1.0")) + { + yaz_log(YLOG_DEBUG, "Closing 1.0 connection"); + http_destroy(i); + return; + } + else + iochan_setflags(i, EVENT_INPUT); // Turns off output flag + } } + + if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) + http_destroy(i); // Server closed; we're done break; default: yaz_log(YLOG_WARN, "Unexpected event on connection"); @@ -277,6 +608,83 @@ static void http_io(IOCHAN i, int event) } } +// Handles I/O on a client connection to a backend web server (proxy mode) +static void proxy_io(IOCHAN pi, int event) +{ + struct http_proxy *pc = iochan_getdata(pi); + struct http_channel *hc = pc->channel; + + switch (event) + { + int res; + struct http_buf *htbuf; + + case EVENT_INPUT: + yaz_log(YLOG_DEBUG, "Proxy input event"); + htbuf = http_buf_create(); + res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1); + yaz_log(YLOG_DEBUG, "Proxy read %d bytes.", res); + if (res == 0 || (res < 0 && errno != EINPROGRESS)) + { + if (hc->oqueue) + { + yaz_log(YLOG_WARN, "Proxy read came up short"); + // Close channel and alert client HTTP channel that we're gone + http_buf_destroy(htbuf); + close(iochan_getfd(pi)); + iochan_destroy(pi); + pc->iochan = 0; + } + else + { + http_destroy(hc->iochan); + return; + } + } + else + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->oqueue, htbuf); + } + iochan_setflag(hc->iochan, EVENT_OUTPUT); + break; + case EVENT_OUTPUT: + yaz_log(YLOG_DEBUG, "Proxy output event"); + if (!(htbuf = pc->oqueue)) + { + iochan_clearflag(pi, EVENT_OUTPUT); + return; + } + res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); + http_destroy(hc->iochan); + return; + } + if (res == htbuf->len) + { + struct http_buf *np = htbuf->next; + http_buf_destroy(htbuf); + pc->oqueue = np; + } + else + { + htbuf->len -= res; + htbuf->offset += res; + } + + if (!pc->oqueue) { + iochan_setflags(pi, EVENT_INPUT); // Turns off output flag + } + break; + default: + yaz_log(YLOG_WARN, "Unexpected event on connection"); + http_destroy(hc->iochan); + } +} + /* Accept a new command connection */ static void http_accept(IOCHAN i, int event) { @@ -303,17 +711,17 @@ static void http_accept(IOCHAN i, int event) c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT); ch = xmalloc(sizeof(*ch)); - ch->read = 0; + ch->proxy = 0; ch->nmem = nmem_create(); ch->wrbuf = wrbuf_alloc(); ch->iochan = c; + ch->iqueue = ch->oqueue = 0; iochan_setdata(c, ch); c->next = channel_list; channel_list = c; } - /* Create a http-channel listener */ void http_init(int port) { @@ -347,6 +755,31 @@ void http_init(int port) channel_list = c; } +void http_set_proxyaddr(char *host) +{ + char *p; + int port; + struct hostent *he; + + strcpy(proxy_url, host); + p = strchr(host, ':'); + yaz_log(YLOG_DEBUG, "Proxying for %s", host); + if (p) { + port = atoi(p + 1); + *p = '\0'; + } + else + port = 80; + if (!(he = gethostbyname(host))) + { + fprintf(stderr, "Failed to lookup '%s'\n", host); + exit(1); + } + proxy_addr = xmalloc(sizeof(struct sockaddr_in)); + proxy_addr->sin_family = he->h_addrtype; + memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length); + proxy_addr->sin_port = htons(port); +} /* * Local variables: diff --git a/http.h b/http.h index bb15ac7..907a19d 100644 --- a/http.h +++ b/http.h @@ -1,19 +1,34 @@ #ifndef HTTP_H #define HTTP_H +// Generic I/O buffer +struct http_buf +{ +#define HTTP_BUF_SIZE 4096 + char buf[4096]; + int offset; + int len; + struct http_buf *next; +}; + struct http_channel { IOCHAN iochan; -#define IBUF_SIZE 10240 - char ibuf[IBUF_SIZE]; + struct http_buf *iqueue; + struct http_buf *oqueue; char version[10]; - int read; - char *obuf; - int writ; + struct http_proxy *proxy; NMEM nmem; WRBUF wrbuf; }; +struct http_proxy // attached to iochan for proxy connection +{ + IOCHAN iochan; + struct http_channel *channel; + struct http_buf *oqueue; +}; + struct http_header { char *name; @@ -32,11 +47,7 @@ struct http_request { struct http_channel *channel; char http_version[20]; - enum - { - Method_GET, - Method_other - } method; + char method[20]; char *path; struct http_header *headers; struct http_argument *arguments; @@ -51,10 +62,18 @@ struct http_response char *payload; }; +void http_set_proxyaddr(char *url); void http_init(int port); void http_addheader(struct http_response *r, const char *name, const char *value); -char *argbyname(struct http_request *r, char *name); -char *headerbyname(struct http_request *r, char *name); +char *http_argbyname(struct http_request *r, char *name); +char *http_headerbyname(struct http_request *r, char *name); struct http_response *http_create_response(struct http_channel *c); +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ #endif diff --git a/http_command.c b/http_command.c index 07210ec..0affacd 100644 --- a/http_command.c +++ b/http_command.c @@ -1,5 +1,5 @@ /* - * $Id: http_command.c,v 1.1 2006-11-21 18:46:43 quinn Exp $ + * $Id: http_command.c,v 1.2 2006-11-24 20:29:07 quinn Exp $ */ #include @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -21,7 +22,7 @@ struct http_session { struct session *psession; - char session_id[128]; + int session_id; int timestamp; struct http_session *next; }; @@ -31,8 +32,8 @@ static struct http_session *session_list = 0; struct http_session *http_session_create() { struct http_session *r = xmalloc(sizeof(*r)); - r->psession = 0; - *r->session_id = '\0'; + r->psession = new_session(); + r->session_id = 0; r->timestamp = 0; r->next = session_list; session_list = r; @@ -66,16 +67,192 @@ static void error(struct http_response *rs, char *code, char *msg, char *txt) rs->payload = nmem_strdup(c->nmem, tmp); } +int make_sessionid() +{ + struct timeval t; + int res; + static int seq = 0; + + seq++; + if (gettimeofday(&t, 0) < 0) + abort(); + res = t.tv_sec; + res = (res << 8) | (seq & 0xff); + return res; +} + +static struct http_session *locate_session(struct http_request *rq, struct http_response *rs) +{ + struct http_session *p; + char *session = http_argbyname(rq, "session"); + int id; + + if (!session) + { + error(rs, "417", "Must supply session", 0); + return 0; + } + id = atoi(session); + for (p = session_list; p; p = p->next) + if (id == p->session_id) + return p; + error(rs, "417", "Session does not exist, or it has expired", 0); + return 0; +} + + static void cmd_init(struct http_request *rq, struct http_response *rs) { + int sesid; + char buf[1024]; + struct http_session *s = http_session_create(); + + // FIXME create a pazpar2 session + yaz_log(YLOG_DEBUG, "HTTP Session init"); + sesid = make_sessionid(); + s->session_id = sesid; + sprintf(buf, "OK%d", sesid); + rs->payload = nmem_strdup(rq->channel->nmem, buf); } +static void cmd_termlist(struct http_request *rq, struct http_response *rs) +{ + struct http_session *s = locate_session(rq, rs); + struct http_channel *c = rq->channel; + struct termlist_score **p; + int len; + int i; + + if (!s) + return; + wrbuf_rewind(c->wrbuf); + + wrbuf_puts(c->wrbuf, ""); + p = termlist(s->psession, &len); + if (p) + for (i = 0; i < len; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s", p[i]->term); + wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); + wrbuf_puts(c->wrbuf, ""); + } + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf)); +} + + +static void cmd_bytarget(struct http_request *rq, struct http_response *rs) +{ + struct http_session *s = locate_session(rq, rs); + struct http_channel *c = rq->channel; + struct hitsbytarget *ht; + int count, i; + + if (!s) + return; + if (!(ht = hitsbytarget(s->psession, &count))) + { + error(rs, "500", "Failed to retrieve hitcounts", 0); + return; + } + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "OK"); + + for (i = 0; i < count; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].id); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].hits); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].diagnostic); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].records); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].state); + wrbuf_puts(c->wrbuf, ""); + } + + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); +} + +static void cmd_show(struct http_request *rq, struct http_response *rs) +{ + struct http_session *s = locate_session(rq, rs); + struct http_channel *c = rq->channel; + struct record **rl; + char *start = http_argbyname(rq, "start"); + char *num = http_argbyname(rq, "num"); + int startn = 0; + int numn = 20; + int i; + + if (!s) + return; + + if (start) + startn = atoi(start); + if (num) + numn = atoi(num); + + rl = show(s->psession, startn, &numn); + + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "\nOK\n"); + + for (i = 0; i < numn; i++) + { + int ccount; + struct record *p; + + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", rl[i]->merge_key); + for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) + ; + if (ccount > 1) + wrbuf_printf(c->wrbuf, "%d\n", ccount); + wrbuf_puts(c->wrbuf, "\n"); + } + + wrbuf_puts(c->wrbuf, "\n"); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); +} + +static void cmd_search(struct http_request *rq, struct http_response *rs) +{ + struct http_session *s = locate_session(rq, rs); + char *query = http_argbyname(rq, "query"); + + if (!s) + return; + if (!query) + { + error(rs, "417", "Must supply query", 0); + return; + } + search(s->psession, query); + rs->payload = "OK"; +} + + static void cmd_stat(struct http_request *rq, struct http_response *rs) { } static void cmd_load(struct http_request *rq, struct http_response *rs) { + struct http_session *s = locate_session(rq, rs); + char *fn = http_argbyname(rq, "name"); + + if (!s) + return; + if (!fn) + { + error(rs, "417", "Must suppply name", 0); + return; + } + if (load_targets(s->psession, fn) < 0) + error(rs, "417", "Failed to find targets", "Possibly wrong filename"); + else + rs->payload = "OK"; } struct { @@ -85,12 +262,16 @@ struct { { "init", cmd_init }, { "stat", cmd_stat }, { "load", cmd_load }, + { "bytarget", cmd_bytarget }, + { "show", cmd_show }, + { "search", cmd_search }, + { "termlist", cmd_termlist }, {0,0} }; struct http_response *http_command(struct http_request *rq) { - char *command = argbyname(rq, "command"); + char *command = http_argbyname(rq, "command"); struct http_channel *c = rq->channel; struct http_response *rs = http_create_response(c); int i; diff --git a/pazpar2.c b/pazpar2.c index b4ca296..c2d4fbf 100644 --- a/pazpar2.c +++ b/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.3 2006-11-21 18:46:43 quinn Exp $ */ +/* $Id: pazpar2.c,v 1.4 2006-11-24 20:29:07 quinn Exp $ */ #include #include @@ -21,6 +21,9 @@ #include "eventl.h" #include "command.h" #include "http.h" +#include "termlists.h" +#include "reclists.h" +#include "relevance.h" #define PAZPAR2_VERSION "0.1" #define MAX_DATABASES 512 @@ -80,6 +83,7 @@ static struct parameters { struct timeval base_time; int toget; int chunk; + void *ccl_filter; } global_parameters = { 30, @@ -88,7 +92,8 @@ static struct parameters { PAZPAR2_VERSION, {0,0}, 100, - MAX_CHUNK + MAX_CHUNK, + 0 }; @@ -289,7 +294,11 @@ const char *find_subfield(const char *field, char subfield) p++; if (*p == '\t' && *(++p) == subfield) { if (*(++p) == ' ') - return ++p; + { + while (isspace(*p)) + p++; + return p; + } } } return 0; @@ -358,6 +367,7 @@ char *extract_mergekey(struct session *s, const char *rec) return out; } +#ifdef RECHEAP static void push_record(struct session *s, struct record *r) { int p; @@ -387,13 +397,15 @@ static struct record *top_record(struct session *s) static struct record *pop_record(struct session *s) { - struct record *res = s->recheap[0]; + struct record *res; int p = 0; int lastnonleaf = (s->recheap_max - 1) >> 1; if (s->recheap_max < 0) return 0; + res = s->recheap[0]; + s->recheap[p] = s->recheap[s->recheap_max--]; while (p <= lastnonleaf) @@ -472,6 +484,36 @@ static void rewind_recheap(struct session *s) } } +#endif + +// FIXME needs to be generalized. Should flexibly generate X lists per search +static void extract_subject(struct session *s, const char *rec) +{ + const char *field, *subfield; + + while ((field = find_field(rec, "650"))) + { + rec = field + 1; // Crude way to cause a loop through repeating fields + if ((subfield = find_subfield(field, 'a'))) + { + char *e, *ef; + char buf[1024]; + int len; + + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')') + ef--; + len = ef - subfield; + assert(len < 1023); + memcpy(buf, subfield, len); + buf[len] = '\0'; + termlist_insert(s->termlist, buf); + } + } +} + struct record *ingest_record(struct target *t, char *buf, int len) { struct session *s = t->session; @@ -490,6 +532,8 @@ struct record *ingest_record(struct target *t, char *buf, int len) res = nmem_malloc(s->nmem, sizeof(struct record)); + extract_subject(s, recbuf); + res->merge_key = extract_mergekey(s, recbuf); if (!res->merge_key) return 0; @@ -500,7 +544,7 @@ struct record *ingest_record(struct target *t, char *buf, int len) yaz_log(YLOG_DEBUG, "Key: %s", res->merge_key); - push_record(s, res); + reclist_insert(s->reclist, res); return res; } @@ -812,20 +856,12 @@ void search(struct session *s, char *query) } if (live_channels) { + const char *t[] = { "aa", "ab", 0 }; int maxrecs = live_channels * global_parameters.toget; - if (!s->recheap_size) - { - s->recheap = xmalloc(maxrecs * sizeof(struct record *)); - s->recheap_size = maxrecs; - } - else if (s->recheap_size < maxrecs) - { - s->recheap = xrealloc(s->recheap, maxrecs * sizeof(struct record*)); - s->recheap_size = maxrecs; - } + s->termlist = termlist_create(s->nmem, maxrecs, 15); + s->reclist = reclist_create(s->nmem, maxrecs); + relevance_create(s->nmem, t, 1000); } - s->recheap_max = -1; - s->recheap_scratch = -1; } struct session *new_session() @@ -834,6 +870,8 @@ struct session *new_session() yaz_log(YLOG_DEBUG, "New pazpar2 session"); + session->termlist = 0; + session->reclist = 0; session->requestid = -1; session->targets = 0; session->pqf_parser = yaz_pqf_create(); @@ -842,8 +880,6 @@ struct session *new_session() session->yaz_marc = yaz_marc_create(); yaz_marc_subfield_str(session->yaz_marc, "\t"); session->wrbuf = wrbuf_alloc(); - session->recheap = 0; - session->recheap_size = 0; return session; } @@ -877,6 +913,11 @@ struct hitsbytarget *hitsbytarget(struct session *s, int *count) return res; } +struct termlist_score **termlist(struct session *s, int *num) +{ + return termlist_highscore(s->termlist, num); +} + struct record **show(struct session *s, int start, int *num) { struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *)); @@ -884,16 +925,16 @@ struct record **show(struct session *s, int start, int *num) // FIXME -- skip initial records + reclist_rewind(s->reclist); for (i = 0; i < *num; i++) { - recs[i] = read_recheap(s); + recs[i] = reclist_read_record(s->reclist); if (!recs[i]) { *num = i; break; } } - rewind_recheap(s); return recs; } @@ -926,6 +967,11 @@ void statistics(struct session *s, struct statistics *stat) stat->num_connections = i; } +static void *load_cclfile(const char *fn) +{ + return 0; +} + int main(int argc, char **argv) { int ret; @@ -936,7 +982,7 @@ int main(int argc, char **argv) yaz_log_init(YLOG_DEFAULT_LEVEL|YLOG_DEBUG, "pazpar2", 0); - while ((ret = options("c:h:", argv, argc, &arg)) != -2) + while ((ret = options("c:h:p:C:", argv, argc, &arg)) != -2) { switch (ret) { case 0: @@ -944,9 +990,15 @@ int main(int argc, char **argv) case 'c': command_init(atoi(arg)); break; + case 'C': + global_parameters.ccl_filter = load_cclfile(arg); + break; case 'h': http_init(atoi(arg)); break; + case 'p': + http_set_proxyaddr(arg); + break; default: fprintf(stderr, "Usage: pazpar2 -d comport"); exit(1); diff --git a/pazpar2.h b/pazpar2.h index 0a737ac..aa33a65 100644 --- a/pazpar2.h +++ b/pazpar2.h @@ -2,6 +2,7 @@ #define PAZPAR2_H #include +#include "termlists.h" struct record { struct target *target; @@ -18,10 +19,8 @@ struct session { char query[1024]; NMEM nmem; WRBUF wrbuf; - struct record **recheap; - int recheap_size; - int recheap_max; - int recheap_scratch; + struct termlist *termlist; + struct reclist *reclist; yaz_marc_t yaz_marc; }; @@ -52,6 +51,7 @@ int load_targets(struct session *s, const char *fn); void statistics(struct session *s, struct statistics *stat); void search(struct session *s, char *query); struct record **show(struct session *s, int start, int *num); +struct termlist_score **termlist(struct session *s, int *num); #endif diff --git a/reclists.c b/reclists.c new file mode 100644 index 0000000..004efcd --- /dev/null +++ b/reclists.c @@ -0,0 +1,122 @@ +/* + * $Id: reclists.c,v 1.1 2006-11-24 20:29:07 quinn Exp $ + */ + +#include + +#include + +#include "pazpar2.h" +#include "reclists.h" + +struct reclist_bucket +{ + struct record *record; + struct reclist_bucket *next; +}; + +struct reclist +{ + struct reclist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct record **flatlist; + int flatlist_size; + int num_records; + int pointer; + + NMEM nmem; +}; + +struct record *reclist_read_record(struct reclist *l) +{ + if (l->pointer < l->num_records) + return l->flatlist[l->pointer++]; + else + return 0; +} + +void reclist_rewind(struct reclist *l) +{ + l->pointer = 0; +} + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct reclist *reclist_create(NMEM nmem, int numrecs) +{ + int hashsize = 1; + struct reclist *res; + + assert(numrecs); + while (hashsize < numrecs) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct reclist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->num_records = 0; + res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); + res->flatlist_size = numrecs; + + return res; +} + +void reclist_insert(struct reclist *l, struct record *record) +{ + unsigned int bucket; + struct reclist_bucket **p; + + bucket = hash(record->merge_key) & l->hashmask; + for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) + { + // We found a matching record. Merge them + if (!strcmp(record->merge_key, (*p)->record->merge_key)) + { + struct record *existing = (*p)->record; + yaz_log(YLOG_LOG, "Found a matching record: %s", record->merge_key); + record->next_cluster = existing->next_cluster; + existing->next_cluster = record; + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + yaz_log(YLOG_DEBUG, "Added a new record: %s", record->merge_key); + struct reclist_bucket *new = nmem_malloc(l->nmem, + sizeof(struct reclist_bucket)); + new->record = record; + record->next_cluster = 0; + new->next = 0; + *p = new; + l->flatlist[l->num_records++] = record; + } +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/reclists.h b/reclists.h new file mode 100644 index 0000000..b80522e --- /dev/null +++ b/reclists.h @@ -0,0 +1,11 @@ +#ifndef RECLISTS_H +#define RECLISTS_H + +struct reclist; + +struct reclist *reclist_create(NMEM, int numrecs); +void reclist_insert(struct reclist *tl, struct record *record); +struct record *reclist_read_record(struct reclist *l); +void reclist_rewind(struct reclist *l); + +#endif diff --git a/relevance.c b/relevance.c new file mode 100644 index 0000000..fc85e38 --- /dev/null +++ b/relevance.c @@ -0,0 +1,136 @@ +/* + * $Id: relevance.c,v 1.1 2006-11-24 20:29:07 quinn Exp $ + */ + +#include + +#include "relevance.h" +#include "pazpar2.h" + +struct relevance +{ + struct relevance_record *records; + int num_records; + int *doc_frequency_vec; + int vec_len; + struct word_trie *wt; + NMEM nmem; +}; + +struct relevance_record +{ + struct record *record; + int *term_frequency_vec; +}; + +// We use this data structure to recognize terms in input records, +// and map them to record term vectors for counting. +struct word_trie +{ + struct + { + struct word_trie *child; + int termno; + } list[26]; +}; + +static struct word_trie *create_word_trie_node(NMEM nmem) +{ + struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie)); + int i; + for (i = 0; i < 26; i++) + { + res->list[i].child = 0; + res->list[i].termno = -1; + } + return res; +} + +static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num) +{ + while (*term) { + int c = tolower(*term); + if (c < 'a' || c > 'z') + term++; + else + { + c -= 'a'; + if (!n->list[c].child) + { + struct word_trie *new = create_word_trie_node(nmem); + n->list[c].child = new; + } + if (!*(++term)) + n->list[c].termno = num; + else + word_trie_addterm(nmem, n->list[c].child, term, num); + break; + } + } + +} + +static struct word_trie *build_word_trie(NMEM nmem, const char **terms) +{ + struct word_trie *res = create_word_trie_node(nmem); + const char **p; + int i; + + for (i = 1, p = terms; *p; p++, i++) + word_trie_addterm(nmem, res, *p, i); + return res; +} + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) +{ + struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); + const char **p; + int i; + + for (p = terms, i = 0; *p; p++, i++) + ; + res->vec_len = ++i; + res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); + bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); + res->nmem = nmem; + res->num_records = 0; + res->records = nmem_malloc(nmem, numrecs * sizeof(struct relevance_record *)); + res->wt = build_word_trie(nmem, terms); + return res; +} + +struct relevance_record *relevance_newrec(struct relevance *r, struct record *rec) +{ + struct relevance_record *res = nmem_malloc(r->nmem, + sizeof(struct relevance_record)); + res->record = rec; + res->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); + bzero(res->term_frequency_vec, r->vec_len * sizeof(int)); + return res; +} + +void relevance_countwords(struct relevance_record *rec, const char *words, int len) +{ +} + +void relevance_donerecord(struct relevance_record *rec) +{ +} + +// Prepare for a relevance-sorted read of up to num entries +void relevance_prepare_read(struct relevance *r, int num) +{ +} + +struct record *relevance_read(struct relevance *r) +{ + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/relevance.h b/relevance.h new file mode 100644 index 0000000..3a2bc15 --- /dev/null +++ b/relevance.h @@ -0,0 +1,27 @@ +#ifndef RELEVANCE_H +#define RELEVANCE_H + +#include + +#include "pazpar2.h" + +struct relevance; +struct relevance_record; + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); +struct relevance_record *relevance_newrec(struct relevance *r, struct record *rec); +void relevance_countwords(struct relevance_record *rec, const char *words, int len); +void relevance_donerecord(struct relevance_record *rec); + +void relevance_prepare_read(struct relevance *r, int num); +struct record *relevance_read(struct relevance *r); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/termlists.c b/termlists.c new file mode 100644 index 0000000..fbf346e --- /dev/null +++ b/termlists.c @@ -0,0 +1,165 @@ +/* + * $Id: termlists.c,v 1.1 2006-11-24 20:29:07 quinn Exp $ + */ + +#include +#include +#include + +#include "termlists.h" + +// Discussion: +// As terms are found in incoming records, they are added to (or updated in) a +// Hash table. When term records are updated, a frequency value is updated. At +// the same time, a highscore is maintained for the most frequent terms. + +struct termlist_bucket +{ + struct termlist_score term; + struct termlist_bucket *next; +}; + +struct termlist +{ + struct termlist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct termlist_score **highscore; + int highscore_size; + int highscore_num; + int highscore_min; + + NMEM nmem; +}; + + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) +{ + int hashsize = 1; + int halfnumterms; + struct termlist *res; + + // Calculate a hash size smallest power of 2 larger than 50% of expected numterms + halfnumterms = numterms >> 1; + if (halfnumterms < 0) + halfnumterms = 1; + while (hashsize < halfnumterms) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct termlist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); + res->highscore_size = highscore_size; + res->highscore_num = 0; + res->highscore_min = 0; + + return res; +} + +static void update_highscore(struct termlist *tl, struct termlist_score *t) +{ + int i; + int smallest; + int me = -1; + + if (t->frequency < tl->highscore_min) + return; + + smallest = 0; + for (i = 0; i < tl->highscore_num; i++) + { + if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) + smallest = i; + if (tl->highscore[i] == t) + me = i; + } + if (tl->highscore_num) + tl->highscore_min = tl->highscore[smallest]->frequency; + if (me >= 0) + return; + if (tl->highscore_num < tl->highscore_size) + { + tl->highscore[tl->highscore_num++] = t; + if (t->frequency < tl->highscore_min) + tl->highscore_min = t->frequency; + } + else + { + if (t->frequency > tl->highscore[smallest]->frequency) + { + tl->highscore[smallest] = t; + } + } +} + +void termlist_insert(struct termlist *tl, const char *term) +{ + unsigned int bucket; + struct termlist_bucket **p; + + bucket = hash(term) & tl->hashmask; + for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) + { + if (!strcmp(term, (*p)->term.term)) + { + yaz_log(YLOG_LOG, "Found a matching term: %s", term); + (*p)->term.frequency++; + update_highscore(tl, &((*p)->term)); + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + yaz_log(YLOG_DEBUG, "Added a new term: %s", term); + struct termlist_bucket *new = nmem_malloc(tl->nmem, + sizeof(struct termlist_bucket)); + new->term.term = nmem_strdup(tl->nmem, term); + new->term.frequency = 1; + new->next = 0; + *p = new; + update_highscore(tl, &new->term); + } +} + +static int compare(const void *s1, const void *s2) +{ + struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2; + return (*p2)->frequency - (*p1)->frequency; +} + +struct termlist_score **termlist_highscore(struct termlist *tl, int *len) +{ + qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); + *len = tl->highscore_num; + return tl->highscore; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/termlists.h b/termlists.h new file mode 100644 index 0000000..2dbee2d --- /dev/null +++ b/termlists.h @@ -0,0 +1,26 @@ +#ifndef TERMLISTS_H +#define TERMLISTS_H + +#include + +struct termlist_score +{ + char *term; + int frequency; +}; + +struct termlist; + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size); +void termlist_insert(struct termlist *tl, const char *term); +struct termlist_score **termlist_highscore(struct termlist *tl, int *len); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ -- 1.7.10.4