-/*
- * Copyright (C) 1994-2002, Index Data
- * All rights reserved.
- *
- * $Id: regxread.c,v 1.40 2002-05-03 13:50:25 adam Exp $
- */
+/* $Id: regxread.c,v 1.50 2004-05-25 12:13:15 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdio.h>
#include <assert.h>
#include <string.h>
logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
return -1;
}
+ if (debug_dfa_tran)
+ printf ("pattern: %.*s\n", s-s0, s0);
dfa_mkstate ((*ap)->u.pattern.dfa);
s++;
break;
if (spec->tcl_interp)
logf (LOG_LOG, "Tcl enabled");
#endif
+
+#if 0
+ debug_dfa_trav = 0;
+ debug_dfa_tran = 1;
+ debug_dfa_followpos = 0;
+ dfa_verbose = 1;
+#endif
+
lineBuf = wrbuf_alloc();
spec->lineNo = 0;
c = getc (spec_inf);
fclose (spec_inf);
wrbuf_free(lineBuf, 1);
-#if 0
- debug_dfa_trav = 1;
- debug_dfa_tran = 1;
- debug_dfa_followpos = 1;
- dfa_verbose = 1;
-#endif
for (lc = spec->context; lc; lc = lc->next)
{
struct lexRule *rp;
if (elen == 0) /* shouldn't happen, but it does! */
return ;
#if REGX_DEBUG
- if (elen > 40)
- logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
- ebuf, 15, ebuf + elen-15);
+ if (elen > 80)
+ logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
+ ebuf, 40, ebuf + elen-40);
+ else if (elen == 1 && ebuf[0] == '\n')
+ {
+ logf (LOG_LOG, "data(new line)");
+ }
else if (elen > 0)
- logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
+ logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
else
- logf (LOG_LOG, "data (%d bytes)", elen);
+ logf (LOG_LOG, "data(%d bytes)", elen);
#endif
if (spec->d1_level <= 1)
{
org_len = 0;
- res = data1_mk_node (spec->dh, spec->m, DATA1N_data, parent);
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
res->u.data.what = DATA1I_text;
res->u.data.len = 0;
res->u.data.formatted_text = formatted_text;
ttype[type_len] = '\0';
#if REGX_DEBUG
- logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
+ logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
spec->d1_level);
#endif
if (parent->which != DATA1N_variant)
{
- res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent);
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
if (spec->d1_stack[spec->d1_level])
tagDataRelease (spec);
spec->d1_stack[spec->d1_level] = res;
}
#if REGX_DEBUG
- logf (LOG_LOG, "variant node (%d)", spec->d1_level);
+ logf (LOG_LOG, "variant node(%d)", spec->d1_level);
#endif
parent = spec->d1_stack[spec->d1_level-1];
- res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent);
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
res->u.variant.type = tp;
if (value_len >= DATA1_LOCALDATA)
static void tagBegin (struct lexSpec *spec,
const char *tag, int len)
{
- struct data1_node *parent;
- data1_element *elem = NULL;
- data1_node *partag;
- data1_node *res;
- data1_element *e = NULL;
- int localtag = 0;
-
if (spec->d1_level == 0)
{
logf (LOG_WARN, "in element begin. No record type defined");
return ;
}
tagStrip (&tag, &len);
+ if (spec->d1_stack[spec->d1_level])
+ tagDataRelease (spec);
- parent = spec->d1_stack[spec->d1_level -1];
- partag = get_parent_tag(spec->dh, parent);
-
- res = data1_mk_node (spec->dh, spec->m, DATA1N_tag, parent);
-
- if (len >= DATA1_LOCALDATA)
- res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
- else
- res->u.tag.tag = res->lbuf;
-
- memcpy (res->u.tag.tag, tag, len);
- res->u.tag.tag[len] = '\0';
-
#if REGX_DEBUG
- logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
+ logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
#endif
- if (parent->which == DATA1N_variant)
- return ;
- if (partag)
- if (!(e = partag->u.tag.element))
- localtag = 1;
-
- elem = data1_getelementbytagname (spec->dh,
- spec->d1_stack[0]->u.root.absyn,
- e, res->u.tag.tag);
- res->u.tag.element = elem;
- if (spec->d1_stack[spec->d1_level])
- tagDataRelease (spec);
- spec->d1_stack[spec->d1_level] = res;
+ spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
+ spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
spec->d1_stack[++(spec->d1_level)] = NULL;
}
break;
}
#if REGX_DEBUG
- logf (LOG_LOG, "end tag (%d)", spec->d1_level);
+ logf (LOG_LOG, "end tag(%d)", spec->d1_level);
#endif
}
static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
- struct DFA *dfa)
+ struct DFA *dfa, int greedy)
{
struct DFA_state *state = dfa->states[0];
struct DFA_tran *t;
- unsigned char c;
+ unsigned char c = 0;
unsigned char c_prev = 0;
int ptr = *pptr; /* current pointer */
int start_ptr = *pptr; /* first char of match */
int last_ptr = 0; /* last char of match */
int last_rule = 0; /* rule number of current match */
+ int restore_ptr = 0;
int i;
+ if (ptr)
+ {
+ --ptr;
+ c = f_win_advance (spec, &ptr);
+ }
while (1)
{
+ if (dfa->states[0] == state)
+ {
+ c_prev = c;
+ restore_ptr = ptr;
+ }
c = f_win_advance (spec, &ptr);
+
if (ptr == F_WIN_EOF)
{
if (last_rule)
}
break;
}
+
t = state->trans;
i = state->tran_no;
while (1)
- if (--i < 0)
+ if (--i < 0) /* no transition for character c */
{
if (last_rule)
{
return 1;
}
state = dfa->states[0];
+
+ ptr = restore_ptr;
+ c = f_win_advance (spec, &ptr);
+
start_ptr = ptr;
- c_prev = c;
+
break;
}
else if (c >= t->ch[0] && c <= t->ch[1])
{
state = dfa->states[t->to];
- if (state->rule_no)
- {
- if (c_prev == '\n')
- {
- last_rule = state->rule_no;
- last_ptr = ptr;
- }
- else
- {
- last_rule = state->rule_nno;
- last_ptr = ptr;
- }
- }
- break;
+ if (state->rule_no && c_prev == '\n')
+ {
+ last_rule = state->rule_no;
+ last_ptr = ptr;
+ }
+ else if (state->rule_nno)
+ {
+ last_rule = state->rule_nno;
+ last_ptr = ptr;
+ }
+ break;
}
else
t++;
if (!strcmp(argv[1], "record") && argc == 3)
{
char *absynName = argv[2];
- data1_absyn *absyn;
data1_node *res;
#if REGX_DEBUG
logf (LOG_LOG, "begin record %s", absynName);
#endif
- absyn = data1_get_absyn (spec->dh, absynName);
-
- res = data1_mk_node (spec->dh, spec->m);
- res->which = DATA1N_root;
- res->u.root.type =
- data1_insert_string(spec->dh, res, spec->m, absynName);
- res->u.root.absyn = absyn;
- res->root = res;
+ res = data1_mk_root (spec->dh, spec->m, absynName);
- spec->d1_stack[spec->d1_level] = res;
- spec->d1_stack[++(spec->d1_level)] = NULL;
+ spec->d1_level = 0;
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ spec->d1_stack[spec->d1_level] = NULL;
}
else if (!strcmp(argv[1], "element") && argc == 3)
{
}
else if (!strcmp (argv[1], "element"))
{
- int min_level = 1;
+ int min_level = 2;
char *element = 0;
if (argc >= 3 && !strcmp(argv[2], "-record"))
{
if (argc == 3)
element = argv[2];
tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
#if REGX_DEBUG
logf (LOG_LOG, "end element end records");
argi++;
}
if (element)
- tagEnd (spec, 1, NULL, 0);
+ tagEnd (spec, 2, NULL, 0);
return TCL_OK;
}
r = execTok (spec, &s, &cmd_str, &cmd_len);
if (r < 2)
continue;
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
static char absynName[64];
- data1_absyn *absyn;
data1_node *res;
if (cmd_len > 63)
cmd_len = 63;
memcpy (absynName, cmd_str, cmd_len);
absynName[cmd_len] = '\0';
-
#if REGX_DEBUG
logf (LOG_LOG, "begin record %s", absynName);
#endif
- absyn = data1_get_absyn (spec->dh, absynName);
-
- res = data1_mk_node (spec->dh, spec->m, DATA1N_root, 0);
- res->u.root.type = absynName;
- res->u.root.absyn = absyn;
+ res = data1_mk_root (spec->dh, spec->m, absynName);
- spec->d1_stack[spec->d1_level] = res;
- spec->d1_stack[++(spec->d1_level)] = NULL;
+ spec->d1_level = 0;
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ spec->d1_stack[spec->d1_level] = NULL;
}
r = execTok (spec, &s, &cmd_str, &cmd_len);
}
}
else if (!strcmp (p, "element"))
{
- int min_level = 1;
+ int min_level = 2;
while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
{
if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
}
else
tagEnd (spec, min_level, NULL, 0);
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
#if REGX_DEBUG
logf (LOG_LOG, "end element end records");
r = execTok (spec, &s, &cmd_str, &cmd_len);
} while (r > 1);
if (element_str)
- tagEnd (spec, 1, NULL, 0);
+ tagEnd (spec, 2, NULL, 0);
}
else if (!strcmp (p, "unread"))
{
if (ap->u.pattern.body)
{
arg_start[arg_no] = *pptr;
- if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
{
arg_end[arg_no] = F_WIN_EOF;
arg_no++;
arg_start[arg_no] = F_WIN_EOF;
arg_end[arg_no] = F_WIN_EOF;
-/* return 1*/
+ yaz_log(LOG_DEBUG, "Pattern match rest of record");
+ *pptr = F_WIN_EOF;
}
else
{
else
{
arg_start[arg_no] = *pptr;
- if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
return 1;
if (sptr != arg_start[arg_no])
return 1;