-/*
- * Copyright (c) 1995-2002, Index Data.
- * See the file LICENSE for details.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Id: d1_absyn.c,v 1.1 2002-10-22 12:53:33 adam Exp $
- */
+/* $Id: d1_absyn.c,v 1.10 2004-08-04 08:35:22 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
#include <stdio.h>
#include <assert.h>
#include <yaz/oid.h>
#include <yaz/log.h>
#include <data1.h>
+#include <zebra_xpath.h>
#define D1_MAX_NESTING 128
+struct data1_systag {
+ char *name;
+ char *value;
+ struct data1_systag *next;
+};
+
struct data1_absyn_cache_info
{
char *name;
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->absyn;
p = p->next;
}
return NULL;
}
+/* *ostrich*
+ We need to destroy DFAs, in xp_element (xelm) definitions
+ pop, 2002-12-13
+*/
+
+void data1_absyn_destroy (data1_handle dh)
+{
+ data1_absyn_cache p = *data1_absyn_cache_get (dh);
+
+ while (p)
+ {
+ data1_absyn *abs = p->absyn;
+ if (abs)
+ {
+ data1_xpelement *xpe = abs->xp_elements;
+ while (xpe) {
+ logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
+ if (xpe->dfa) { dfa_delete (&xpe->dfa); }
+ xpe = xpe->next;
+ }
+ }
+ p = p->next;
+ }
+}
+
void data1_absyn_trav (data1_handle dh, void *handle,
void (*fh)(data1_handle dh, void *h, data1_absyn *a))
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->attset;
p = p->next;
}
}
}
}
+/* *ostrich*
+
+ New function, a bit dummy now... I've seen it in zrpn.c... We should build
+ more clever regexps...
+ //a -> ^a/.*$
+ //a/b -> ^b/a/.*$
+ /a -> ^a/$
+ /a/b -> ^b/a/$
+
+ / -> none
+
+ pop, 2002-12-13
+
+ Now [] predicates are supported
+
+ pop, 2003-01-17
+
+ */
+
+const char * mk_xpath_regexp (data1_handle dh, char *expr)
+{
+ char *p = expr;
+ char *pp;
+ char *s;
+ int abs = 1;
+ int i;
+ int j;
+ int e=0;
+ int is_predicate = 0;
+
+ static char *stack[32];
+ static char res[1024];
+ char *r = "";
+
+ if (*p != '/') { return (""); }
+ p++;
+ if (*p == '/') { abs=0; p++; }
+
+ while (*p) {
+ i=0;
+ while (*p && !strchr("/",*p)) {
+ i++; p++;
+ }
+ stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1);
+ s = stack[e];
+ for (j=0; j< i; j++) {
+ pp = p-i+j;
+ if (*pp == '[') {
+ is_predicate=1;
+ }
+ else if (*pp == ']') {
+ is_predicate=0;
+ }
+ else {
+ if (!is_predicate) {
+ if (*pp == '*')
+ *s++ = '.';
+ *s++ = *pp;
+ }
+ }
+ }
+ *s = 0;
+ e++;
+ if (*p) {p++;}
+ }
+ e--; p = &res[0]; i=0;
+ sprintf (p, "^"); p++;
+ while (e >= 0) {
+ /* !!! res size is not checked !!! */
+ sprintf (p, "%s/",stack[e]);
+ p += strlen(stack[e]) + 1;
+ e--;
+ }
+ if (!abs) { sprintf (p, ".*"); p+=2; }
+ sprintf (p, "$"); p++;
+ r = nmem_strdup (data1_nmem_get (dh), res);
+ yaz_log(LOG_DEBUG,"Got regexp: %s",r);
+ return (r);
+}
+
+/* *ostrich*
+
+ added arg xpelement... when called from xelm context, it's 1, saying
+ that ! means xpath, not element name as attribute name...
+
+ pop, 2002-12-13
+ */
static int parse_termlists (data1_handle dh, data1_termlist ***tpp,
char *p, const char *file, int lineno,
- const char *element_name, data1_absyn *res)
+ const char *element_name, data1_absyn *res,
+ int xpelement)
{
data1_termlist **tp = *tpp;
do
file, lineno, p);
return -1;
}
- if (*attname == '!')
- strcpy(attname, element_name);
+
*tp = (data1_termlist *)
- nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+ nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
(*tp)->next = 0;
+
+ if (!xpelement) {
+ if (*attname == '!')
+ strcpy(attname, element_name);
+ }
if (!((*tp)->att = data1_getattbyname(dh, res->attset,
- attname)))
- {
- yaz_log(LOG_WARN,
- "%s:%d: Couldn't find att '%s' in attset",
- file, lineno, attname);
- return -1;
+ attname))) {
+ if ((!xpelement) || (*attname != '!')) {
+ yaz_log(LOG_WARN,
+ "%s:%d: Couldn't find att '%s' in attset",
+ file, lineno, attname);
+ return -1;
+ } else {
+ (*tp)->att = 0;
+ }
}
+
if (r == 2 && (source = strchr(structure, ':')))
*source++ = '\0'; /* cut off structure .. */
else
return 0;
}
+const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,
+ const char *default_value)
+{
+ struct data1_systag *p = absyn->systags;
+ for (; p; p = p->next)
+ if (!strcmp(p->name, tag))
+ return p->value;
+ return default_value;
+}
+
+#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')
+
+int read_absyn_line(FILE *f, int *lineno, char *line, int len,
+ char *argv[], int num)
+{
+ char *p;
+ int argc;
+ int quoted = 0;
+
+ while ((p = fgets(line, len, f)))
+ {
+ (*lineno)++;
+ while (*p && l_isspace(*p))
+ p++;
+ if (*p && *p != '#')
+ break;
+ }
+ if (!p)
+ return 0;
+
+ for (argc = 0; *p ; argc++)
+ {
+ if (*p == '#') /* trailing comment */
+ break;
+ argv[argc] = p;
+ while (*p && !(l_isspace(*p) && !quoted)) {
+ if (*p =='"') quoted = 1 - quoted;
+ if (*p =='[') quoted = 1;
+ if (*p ==']') quoted = 0;
+ p++;
+ }
+ if (*p)
+ {
+ *(p++) = '\0';
+ while (*p && l_isspace(*p))
+ p++;
+ }
+ }
+ return argc;
+}
+
+
data1_absyn *data1_read_absyn (data1_handle dh, const char *file,
int file_must_exist)
{
data1_sub_elements *cur_elements = NULL;
+ data1_xpelement *cur_xpelement = NULL;
+
data1_absyn *res = 0;
FILE *f;
data1_element **ppl[D1_MAX_NESTING];
data1_termlist *all = 0;
data1_attset_child **attset_childp;
data1_tagset **tagset_childp;
+ struct data1_systag **systagsp;
int level = 0;
int lineno = 0;
int argc;
res->tagset = 0;
res->encoding = 0;
res->enable_xpath_indexing = (f ? 0 : 1);
+ res->systags = 0;
+ systagsp = &res->systags;
tagset_childp = &res->tagset;
res->attset = data1_empty_attset (dh);
maptabp = &res->maptabs;
res->marc = 0;
marcp = &res->marc;
-
res->sub_elements = NULL;
res->main_elements = NULL;
+ res->xp_elements = NULL;
- while (f && (argc = readconf_line(f, &lineno, line, 512, argv, 50)))
+ while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
{
char *cmd = *argv;
if (!strcmp(cmd, "elm") || !strcmp(cmd, "element"))
{
assert (res->attset);
- if (parse_termlists (dh, &tp, p, file, lineno, name, res))
+ if (parse_termlists (dh, &tp, p, file, lineno, name, res, 0))
{
fclose (f);
return 0;
}
new_element->name = nmem_strdup(data1_nmem_get (dh), name);
}
+ /* *ostrich*
+ New code to support xelm directive
+ for each xelm a dfa is built. xelms are stored in res->xp_elements
+
+ maybe we should use a simple sscanf instead of dfa?
+
+ pop, 2002-12-13
+
+ Now [] predicates are supported. regexps and xpath structure is
+ a bit redundant, however it's comfortable later...
+
+ pop, 2003-01-17
+ */
+
+ else if (!strcmp(cmd, "xelm")) {
+
+ int i;
+ char *p, *xpath_expr, *termlists;
+ const char *regexp;
+ struct DFA *dfa = dfa = dfa_init();
+ data1_termlist **tp;
+
+ if (argc < 3)
+ {
+ yaz_log(LOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno);
+ continue;
+ }
+ xpath_expr = argv[1];
+ termlists = argv[2];
+ regexp = mk_xpath_regexp(dh, xpath_expr);
+ i = dfa_parse (dfa, ®exp);
+ if (i || *regexp) {
+ yaz_log(LOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
+ dfa_delete (&dfa);
+ continue;
+ }
+
+ if (!cur_xpelement)
+ {
+ cur_xpelement = (data1_xpelement *)
+ nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
+ res->xp_elements = cur_xpelement;
+ } else {
+ cur_xpelement->next = (data1_xpelement *)
+ nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
+ cur_xpelement = cur_xpelement->next;
+ }
+ cur_xpelement->next = NULL;
+ cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh),
+ xpath_expr);
+
+ dfa_mkstate (dfa);
+ cur_xpelement->dfa = dfa;
+
+#ifdef ENHANCED_XELM
+ cur_xpelement->xpath_len =
+ zebra_parse_xpath_str(xpath_expr,
+ cur_xpelement->xpath, XPATH_STEP_COUNT,
+ data1_nmem_get(dh));
+
+ /*
+ dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len);
+ */
+#endif
+ cur_xpelement->termlists = 0;
+ tp = &cur_xpelement->termlists;
+
+ /* parse termList definitions */
+ p = termlists;
+ if (*p != '-')
+ {
+ assert (res->attset);
+
+ if (parse_termlists (dh, &tp, p, file, lineno,
+ xpath_expr, res, 1))
+ {
+ fclose (f);
+ return 0;
+ }
+ *tp = all; /* append any ALL entries to the list */
+ }
+ }
else if (!strcmp(cmd, "section"))
{
char *name;
if (argc < 2)
{
yaz_log(LOG_WARN, "%s:%d: Bad # of args to section",
- file, lineno);
+ file, lineno);
continue;
}
name = argv[1];
file, lineno);
continue;
}
- if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res))
+ if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res, 0))
{
fclose (f);
return 0;
}
res->encoding = nmem_strdup (data1_nmem_get(dh), argv[1]);
}
+ else if (!strcmp(cmd, "systag"))
+ {
+ if (argc != 3)
+ {
+ yaz_log(LOG_WARN, "%s:%d: Bad # or args for systag",
+ file, lineno);
+ continue;
+ }
+ *systagsp = nmem_malloc (data1_nmem_get(dh), sizeof(**systagsp));
+
+ (*systagsp)->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
+ (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]);
+ systagsp = &(*systagsp)->next;
+ }
else
{
yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file,
res->main_elements = cur_elements->elements;
fix_element_ref (dh, res, cur_elements->elements);
}
+ *systagsp = 0;
yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file);
return res;
}