-/* $Id: recgrs.c,v 1.85 2003-10-07 09:18:21 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
+/* $Id: recgrs.c,v 1.92 2004-10-12 18:21:35 quinn Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
#include <sys/types.h>
#ifndef WIN32
#include <unistd.h>
+#include <dlfcn.h>
#endif
#include <yaz/log.h>
#include <yaz/oid.h>
-#include <recctrl.h>
-#include "grsread.h"
+#include <d1_absyn.h>
+#include <idzebra/recgrs.h>
#define GRS_MAX_WORD 512
-struct grs_handler {
- RecTypeGrs type;
- void *clientData;
- int initFlag;
- struct grs_handler *next;
+struct source_parser {
+ int len;
+ const char *tok;
+ const char *src;
+ int lookahead;
};
-struct grs_handlers {
- struct grs_handler *handlers;
-};
-
-static int read_grs_type (struct grs_handlers *h,
- struct grs_read_info *p, const char *type,
- data1_node **root)
+static int sp_lex(struct source_parser *sp)
{
- struct grs_handler *gh = h->handlers;
- const char *cp = strchr (type, '.');
-
- if (cp == NULL || cp == type)
+ while (*sp->src == ' ')
+ (sp->src)++;
+ sp->tok = sp->src;
+ sp->len = 0;
+ while (*sp->src && !strchr("<>();,-: ", *sp->src))
{
- cp = strlen(type) + type;
- *p->type = 0;
+ sp->src++;
+ sp->len++;
}
+ if (sp->len)
+ sp->lookahead = 't';
else
- strcpy (p->type, cp+1);
- for (gh = h->handlers; gh; gh = gh->next)
{
- if (!memcmp (type, gh->type->type, cp-type) &&
- gh->type->type[cp-type] == '\0')
+ sp->lookahead = *sp->src;
+ if (*sp->src)
+ sp->src++;
+ }
+ return sp->lookahead;
+}
+
+
+static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
+{
+ if (sp->lookahead != 't')
+ return 0;
+ if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
+ {
+ if (n->which == DATA1N_data)
{
- if (!gh->initFlag)
+ wrd->string = n->u.data.data;
+ wrd->length = n->u.data.len;
+ }
+ sp_lex(sp);
+ }
+ else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
+ {
+ if (n->which == DATA1N_tag)
+ {
+ wrd->string = n->u.tag.tag;
+ wrd->length = strlen(n->u.tag.tag);
+ }
+ sp_lex(sp);
+ }
+ else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
+ {
+ sp_lex(sp);
+ if (sp->lookahead != '(')
+ return 0;
+ sp_lex(sp);
+ if (sp->lookahead != 't')
+ return 0;
+
+ if (n->which == DATA1N_tag)
+ {
+ data1_xattr *p = n->u.tag.attributes;
+ while (p && strlen(p->name) != sp->len &&
+ memcmp (p->name, sp->tok, sp->len))
+ p = p->next;
+ if (p)
{
- gh->initFlag = 1;
- gh->clientData = (*gh->type->init)();
+ wrd->string = p->value;
+ wrd->length = strlen(p->value);
}
- p->clientData = gh->clientData;
- *root = (gh->type->read)(p);
- gh->clientData = p->clientData;
+ }
+ sp_lex(sp);
+ if (sp->lookahead != ')')
+ return 0;
+ sp_lex(sp);
+ }
+ else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
+ {
+ int start, len;
+ sp_lex(sp);
+ if (sp->lookahead != '(')
+ return 0;
+
+ sp_lex(sp);
+ sp_expr(sp, n, wrd);
+ if (sp->lookahead != ',')
+ return 0;
+
+ sp_lex(sp);
+ if (sp->lookahead != 't')
+ return 0;
+ start = atoi_n(sp->tok, sp->len);
+
+ sp_lex(sp);
+ if (sp->lookahead != ',')
+ return 0;
+
+ sp_lex(sp);
+ if (sp->lookahead != 't')
+ return 0;
+ len = atoi_n(sp->tok, sp->len);
+
+ sp_lex(sp);
+ if (sp->lookahead != ')')
return 0;
+
+ sp_lex(sp);
+ if (wrd->string && wrd->length)
+ {
+ wrd->string += start;
+ wrd->length -= start;
+ if (wrd->length > len)
+ wrd->length = len;
}
}
return 1;
}
-static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
+static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
{
- struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
- gh->next = h->handlers;
- h->handlers = gh;
- gh->initFlag = 0;
- gh->clientData = 0;
- gh->type = t;
-}
-
-static void *grs_init(RecType recType)
-{
- struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
- h->handlers = 0;
-
- grs_add_handler (h, recTypeGrs_sgml);
- grs_add_handler (h, recTypeGrs_regx);
-#if HAVE_TCL_H
- grs_add_handler (h, recTypeGrs_tcl);
-#endif
- grs_add_handler (h, recTypeGrs_marc);
- grs_add_handler (h, recTypeGrs_marcxml);
-#if HAVE_EXPAT_H
- grs_add_handler (h, recTypeGrs_xml);
-#endif
-#if HAVE_PERL
- grs_add_handler (h, recTypeGrs_perl);
-#endif
- return h;
-}
-
-static void grs_destroy(void *clientData)
-{
- struct grs_handlers *h = (struct grs_handlers *) clientData;
- struct grs_handler *gh = h->handlers, *gh_next;
- while (gh)
- {
- gh_next = gh->next;
- if (gh->initFlag)
- (*gh->type->destroy)(gh->clientData);
- xfree (gh);
- gh = gh_next;
- }
- xfree (h);
+ struct source_parser sp;
+ sp.len = 0;
+ sp.tok = 0;
+ sp.src = src;
+ sp.lookahead = 0;
+ sp_lex(&sp);
+
+ return sp_expr(&sp, n, wrd);
}
int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
size_t flen = 0;
data1_node *nn;
int termlist_only = 1;
+ data1_termlist *tl;
+ int xpdone = 0;
yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
if ((!n->root->u.root.absyn) ||
case DATA1N_data:
wrd->string = n->u.data.data;
wrd->length = n->u.data.len;
- if (p->flagShowRecords)
- {
- printf("%*s XData:\"", (level + 1) * 4, "");
- for (i = 0; i<wrd->length && i < 8; i++)
- fputc (wrd->string[i], stdout);
- printf("\"\n");
- }
- else {
- data1_termlist *tl;
- int xpdone = 0;
- flen = 0;
-
- /* we have to fetch the whole path to the data tag */
- for (nn = n; nn; nn = nn->parent) {
- if (nn->which == DATA1N_tag) {
- size_t tlen = strlen(nn->u.tag.tag);
- if (tlen + flen > (sizeof(tag_path_full)-2)) return;
- memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
- flen += tlen;
- tag_path_full[flen++] = '/';
- }
- else if (nn->which == DATA1N_root) break;
- }
-
- tag_path_full[flen] = 0;
+ xpdone = 0;
+ flen = 0;
- /* If we have a matching termlist... */
- if (n->root->u.root.absyn && (tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
- for (; tl; tl = tl->next) {
- wrd->reg_type = *tl->structure;
- /* this is the ! case, so structure is for the xpath index */
- if (!tl->att) {
- wrd->attrSet = VAL_IDXPATH;
- wrd->attrUse = use;
- (*p->tokenAdd)(wrd);
- xpdone = 1;
- } else {
- /* this is just the old fashioned attribute based index */
- wrd->attrSet = (int) (tl->att->parent->reference);
- wrd->attrUse = tl->att->locals->local;
- (*p->tokenAdd)(wrd);
- }
- }
- }
- /* xpath indexing is done, if there was no termlist given,
- or no ! in the termlist, and default indexing is enabled... */
- if ((!xpdone) && (!termlist_only)) {
- wrd->attrSet = VAL_IDXPATH;
- wrd->attrUse = use;
- wrd->reg_type = 'w';
- (*p->tokenAdd)(wrd);
- }
+ /* we have to fetch the whole path to the data tag */
+ for (nn = n; nn; nn = nn->parent) {
+ if (nn->which == DATA1N_tag) {
+ size_t tlen = strlen(nn->u.tag.tag);
+ if (tlen + flen > (sizeof(tag_path_full)-2)) return;
+ memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
+ flen += tlen;
+ tag_path_full[flen++] = '/';
+ }
+ else if (nn->which == DATA1N_root) break;
+ }
+
+ tag_path_full[flen] = 0;
+
+ /* If we have a matching termlist... */
+ if (n->root->u.root.absyn &&
+ (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
+ {
+ for (; tl; tl = tl->next)
+ {
+ /* need to copy recword because it may be changed */
+ RecWord wrd_tl;
+ wrd->reg_type = *tl->structure;
+ /* this is the ! case, so structure is for the xpath index */
+ memcpy (&wrd_tl, wrd, sizeof(*wrd));
+ if (tl->source)
+ sp_parse(n, &wrd_tl, tl->source);
+ if (!tl->att) {
+ wrd_tl.attrSet = VAL_IDXPATH;
+ wrd_tl.attrUse = use;
+ if (p->flagShowRecords)
+ {
+ int i;
+ printf("%*sXPath index", (level + 1) * 4, "");
+ printf (" XData:\"");
+ for (i = 0; i<wrd_tl.length && i < 40; i++)
+ fputc (wrd_tl.string[i], stdout);
+ fputc ('"', stdout);
+ if (wrd_tl.length > 40)
+ printf (" ...");
+ fputc ('\n', stdout);
+ }
+ else
+ (*p->tokenAdd)(&wrd_tl);
+ xpdone = 1;
+ } else {
+ /* this is just the old fashioned attribute based index */
+ wrd_tl.attrSet = (int) (tl->att->parent->reference);
+ wrd_tl.attrUse = tl->att->locals->local;
+ if (p->flagShowRecords)
+ {
+ int i;
+ printf("%*sIdx: [%s]", (level + 1) * 4, "",
+ tl->structure);
+ printf("%s:%s [%d] %s",
+ tl->att->parent->name,
+ tl->att->name, tl->att->value,
+ tl->source);
+ printf (" XData:\"");
+ for (i = 0; i<wrd_tl.length && i < 40; i++)
+ fputc (wrd_tl.string[i], stdout);
+ fputc ('"', stdout);
+ if (wrd_tl.length > 40)
+ printf (" ...");
+ fputc ('\n', stdout);
+ }
+ else
+ (*p->tokenAdd)(&wrd_tl);
+ }
+ }
+ }
+ /* xpath indexing is done, if there was no termlist given,
+ or no ! in the termlist, and default indexing is enabled... */
+ if (!p->flagShowRecords && !xpdone && !termlist_only)
+ {
+ wrd->attrSet = VAL_IDXPATH;
+ wrd->attrUse = use;
+ wrd->reg_type = 'w';
+ (*p->tokenAdd)(wrd);
}
break;
case DATA1N_tag:
for (; tlist; tlist = tlist->next)
{
-
- char xattr[512];
/* consider source */
wrd->string = 0;
+ assert(tlist->source);
+ sp_parse(n, wrd, tlist->source);
- if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
- {
- wrd->string = n->u.data.data;
- wrd->length = n->u.data.len;
- }
- else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
- {
- wrd->string = n->u.tag.tag;
- wrd->length = strlen(n->u.tag.tag);
- }
- else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
- n->which == DATA1N_tag)
- {
- data1_xattr *p = n->u.tag.attributes;
- while (p && strcmp (p->name, xattr))
- p = p->next;
- if (p)
- {
- wrd->string = p->value;
- wrd->length = strlen(p->value);
- }
- }
if (wrd->string)
{
if (p->flagShowRecords)
return dumpkeys(n, p, 0, &wrd);
}
-static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
- NMEM mem)
+static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
+ NMEM mem,
+ data1_node *(*grs_read)(struct grs_read_info *))
{
data1_node *n;
struct grs_read_info gri;
gri.offset = p->offset;
gri.mem = mem;
gri.dh = p->dh;
+ gri.clientData = clientData;
- if (read_grs_type (h, &gri, p->subType, &n))
- return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
+ n = (*grs_read)(&gri);
if (!n)
return RECCTRL_EXTRACT_EOF;
oe.proto = PROTO_Z3950;
return RECCTRL_EXTRACT_OK;
}
-static int grs_extract(void *clientData, struct recExtractCtrl *p)
+int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
+ data1_node *(*grs_read)(struct grs_read_info *))
{
int ret;
NMEM mem = nmem_create ();
- struct grs_handlers *h = (struct grs_handlers *) clientData;
-
- ret = grs_extract_sub(h, p, mem);
+ ret = grs_extract_sub(clientData, p, mem, grs_read);
nmem_destroy(mem);
return ret;
}
data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
}
data1_mk_text (p->dh, mem, i4, n);
- data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
+ data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
if (p->fname)
{
data1_mk_text (p->dh, mem, i4, n);
data1_mk_text (p->dh, mem, i2, n);
}
-static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
+int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
+ data1_node *(*grs_read)(struct grs_read_info *))
{
data1_node *node = 0, *onode = 0, *top;
data1_node *dnew;
NMEM mem;
struct grs_read_info gri;
const char *tagname;
- struct grs_handlers *h = (struct grs_handlers *) clientData;
+
int requested_schema = VAL_NONE;
data1_marctab *marctab;
int dummy;
gri.offset = 0;
gri.mem = mem;
gri.dh = p->dh;
+ gri.clientData = clientData;
yaz_log(LOG_DEBUG, "grs_retrieve");
- if (read_grs_type (h, &gri, p->subType, &node))
- {
- p->diagnostic = 14;
- nmem_destroy (mem);
- return 0;
- }
+ node = (*grs_read)(&gri);
if (!node)
{
p->diagnostic = 14;
dnew->u.data.what = DATA1I_text;
dnew->u.data.data = dnew->lbuf;
- sprintf(dnew->u.data.data, "%d", p->localno);
+ sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
dnew->u.data.len = strlen(dnew->u.data.data);
}
+
+ if (p->input_format == VAL_TEXT_XML)
+ zebra_xml_metadata (p, top, mem);
+
#if 0
data1_pr_tree (p->dh, node, stdout);
#endif
p->input_format : VAL_SUTRS))
{
case VAL_TEXT_XML:
- zebra_xml_metadata (p, top, mem);
#if 0
data1_pr_tree (p->dh, node, stdout);
return 0;
}
-static struct recType grs_type =
-{
- "grs",
- grs_init,
- grs_destroy,
- grs_extract,
- grs_retrieve
-};
-
-RecType recTypeGrs = &grs_type;