-/* $Id: regxread.c,v 1.45 2002-08-19 21:11:27 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: regxread.c,v 1.51 2004-08-04 08:35:25 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
return -1;
}
+ if (debug_dfa_tran)
+ printf ("pattern: %.*s\n", s-s0, s0);
dfa_mkstate ((*ap)->u.pattern.dfa);
s++;
break;
if (spec->tcl_interp)
logf (LOG_LOG, "Tcl enabled");
#endif
+
+#if 0
+ debug_dfa_trav = 0;
+ debug_dfa_tran = 1;
+ debug_dfa_followpos = 0;
+ dfa_verbose = 1;
+#endif
+
lineBuf = wrbuf_alloc();
spec->lineNo = 0;
c = getc (spec_inf);
fclose (spec_inf);
wrbuf_free(lineBuf, 1);
-#if 0
- debug_dfa_trav = 1;
- debug_dfa_tran = 1;
- debug_dfa_followpos = 1;
- dfa_verbose = 1;
-#endif
for (lc = spec->context; lc; lc = lc->next)
{
struct lexRule *rp;
if (elen == 0) /* shouldn't happen, but it does! */
return ;
#if REGX_DEBUG
- if (elen > 40)
- logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
- ebuf, 15, ebuf + elen-15);
+ if (elen > 80)
+ logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
+ ebuf, 40, ebuf + elen-40);
+ else if (elen == 1 && ebuf[0] == '\n')
+ {
+ logf (LOG_LOG, "data(new line)");
+ }
else if (elen > 0)
- logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
+ logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
else
- logf (LOG_LOG, "data (%d bytes)", elen);
+ logf (LOG_LOG, "data(%d bytes)", elen);
#endif
if (spec->d1_level <= 1)
ttype[type_len] = '\0';
#if REGX_DEBUG
- logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
+ logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
spec->d1_level);
#endif
}
#if REGX_DEBUG
- logf (LOG_LOG, "variant node (%d)", spec->d1_level);
+ logf (LOG_LOG, "variant node(%d)", spec->d1_level);
#endif
parent = spec->d1_stack[spec->d1_level-1];
res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
tagDataRelease (spec);
#if REGX_DEBUG
- logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
+ logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
#endif
spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
break;
}
#if REGX_DEBUG
- logf (LOG_LOG, "end tag (%d)", spec->d1_level);
+ logf (LOG_LOG, "end tag(%d)", spec->d1_level);
#endif
}
static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
- struct DFA *dfa)
+ struct DFA *dfa, int greedy)
{
struct DFA_state *state = dfa->states[0];
struct DFA_tran *t;
- unsigned char c;
+ unsigned char c = 0;
unsigned char c_prev = 0;
int ptr = *pptr; /* current pointer */
int start_ptr = *pptr; /* first char of match */
int last_ptr = 0; /* last char of match */
int last_rule = 0; /* rule number of current match */
+ int restore_ptr = 0;
int i;
+ if (ptr)
+ {
+ --ptr;
+ c = f_win_advance (spec, &ptr);
+ }
while (1)
{
+ if (dfa->states[0] == state)
+ {
+ c_prev = c;
+ restore_ptr = ptr;
+ }
c = f_win_advance (spec, &ptr);
+
if (ptr == F_WIN_EOF)
{
if (last_rule)
}
break;
}
+
t = state->trans;
i = state->tran_no;
while (1)
- if (--i < 0)
+ if (--i < 0) /* no transition for character c */
{
if (last_rule)
{
return 1;
}
state = dfa->states[0];
+
+ ptr = restore_ptr;
+ c = f_win_advance (spec, &ptr);
+
start_ptr = ptr;
- c_prev = c;
+
break;
}
else if (c >= t->ch[0] && c <= t->ch[1])
{
state = dfa->states[t->to];
- if (state->rule_no)
- {
- if (c_prev == '\n')
- {
- last_rule = state->rule_no;
- last_ptr = ptr;
- }
- else
- {
- last_rule = state->rule_nno;
- last_ptr = ptr;
- }
- }
- break;
+ if (state->rule_no && c_prev == '\n')
+ {
+ last_rule = state->rule_no;
+ last_ptr = ptr;
+ }
+ else if (state->rule_nno)
+ {
+ last_rule = state->rule_nno;
+ last_ptr = ptr;
+ }
+ break;
}
else
t++;
#if HAVE_TCL_H
static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
- int argc, char **argv)
+ int argc, const char **argv)
{
struct lexSpec *spec = (struct lexSpec *) clientData;
if (argc < 2)
return TCL_ERROR;
if (!strcmp(argv[1], "record") && argc == 3)
{
- char *absynName = argv[2];
+ const char *absynName = argv[2];
data1_node *res;
#if REGX_DEBUG
#endif
res = data1_mk_root (spec->dh, spec->m, absynName);
+ spec->d1_level = 0;
+
spec->d1_stack[spec->d1_level++] = res;
res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
}
static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
- int argc, char **argv)
+ int argc, const char **argv)
{
struct lexSpec *spec = (struct lexSpec *) clientData;
if (argc < 2)
}
else if (!strcmp (argv[1], "element"))
{
- int min_level = 1;
- char *element = 0;
+ int min_level = 2;
+ const char *element = 0;
if (argc >= 3 && !strcmp(argv[2], "-record"))
{
min_level = 0;
if (argc == 3)
element = argv[2];
tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
#if REGX_DEBUG
logf (LOG_LOG, "end element end records");
}
static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
- int argc, char **argv)
+ int argc, const char **argv)
{
int argi = 1;
int textFlag = 0;
argi++;
}
if (element)
- tagEnd (spec, 1, NULL, 0);
+ tagEnd (spec, 2, NULL, 0);
return TCL_OK;
}
static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
- int argc, char **argv)
+ int argc, const char **argv)
{
struct lexSpec *spec = (struct lexSpec *) clientData;
int argi = 1;
r = execTok (spec, &s, &cmd_str, &cmd_len);
if (r < 2)
continue;
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
static char absynName[64];
data1_node *res;
#endif
res = data1_mk_root (spec->dh, spec->m, absynName);
+ spec->d1_level = 0;
+
spec->d1_stack[spec->d1_level++] = res;
res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
}
else if (!strcmp (p, "element"))
{
- int min_level = 1;
+ int min_level = 2;
while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
{
if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
}
else
tagEnd (spec, min_level, NULL, 0);
- if (spec->d1_level == 0)
+ if (spec->d1_level <= 1)
{
#if REGX_DEBUG
logf (LOG_LOG, "end element end records");
r = execTok (spec, &s, &cmd_str, &cmd_len);
} while (r > 1);
if (element_str)
- tagEnd (spec, 1, NULL, 0);
+ tagEnd (spec, 2, NULL, 0);
}
else if (!strcmp (p, "unread"))
{
if (ap->u.pattern.body)
{
arg_start[arg_no] = *pptr;
- if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
{
arg_end[arg_no] = F_WIN_EOF;
arg_no++;
arg_start[arg_no] = F_WIN_EOF;
arg_end[arg_no] = F_WIN_EOF;
-/* return 1*/
+ yaz_log(LOG_DEBUG, "Pattern match rest of record");
+ *pptr = F_WIN_EOF;
}
else
{
else
{
arg_start[arg_no] = *pptr;
- if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
return 1;
if (sptr != arg_start[arg_no])
return 1;