/*
- * Copyright (C) 1994-1996, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: regxread.c,v $
- * Revision 1.9 1997-09-29 09:02:49 adam
+ * Revision 1.22 1998-11-03 16:07:13 adam
+ * Yet another fix.
+ *
+ * Revision 1.21 1998/11/03 15:43:39 adam
+ * Fixed bug introduced by previous commit.
+ *
+ * Revision 1.20 1998/11/03 14:51:28 adam
+ * Changed code so that it creates as few data1 nodes as possible.
+ *
+ * Revision 1.19 1998/11/03 10:22:39 adam
+ * Fixed memory leak that could occur for when large data1 node were
+ * concatenated. Data-type data1_nodes may have multiple nodes.
+ *
+ * Revision 1.18 1998/10/15 13:11:47 adam
+ * Added support for option -record for "end element". When specified
+ * end element will mark end-of-record when at outer-level.
+ *
+ * Revision 1.17 1998/07/01 10:13:51 adam
+ * Minor fix.
+ *
+ * Revision 1.16 1998/06/30 15:15:09 adam
+ * Tags are trimmed: white space removed before- and after the tag.
+ *
+ * Revision 1.15 1998/06/30 12:55:45 adam
+ * Bug fix.
+ *
+ * Revision 1.14 1998/03/05 08:41:00 adam
+ * Implemented rule contexts.
+ *
+ * Revision 1.13 1997/12/12 06:33:58 adam
+ * Fixed bug that showed up when multiple filter where used.
+ * Made one routine thread-safe.
+ *
+ * Revision 1.12 1997/11/18 10:03:24 adam
+ * Member num_children removed from data1_node.
+ *
+ * Revision 1.11 1997/11/06 11:41:01 adam
+ * Implemented "begin variant" for the sgml.regx filter.
+ *
+ * Revision 1.10 1997/10/31 12:36:12 adam
+ * Minor change that avoids compiler warning.
+ *
+ * Revision 1.9 1997/09/29 09:02:49 adam
* Fixed small bug (introduced by previous commit).
*
* Revision 1.8 1997/09/17 12:19:22 adam
#include <stdio.h>
#include <assert.h>
#include <string.h>
+#include <ctype.h>
#include <tpath.h>
#include <zebrautl.h>
#define REGX_BEGIN 3
#define REGX_END 4
#define REGX_CODE 5
+#define REGX_CONTEXT 6
struct regxCode {
char *str;
struct lexRule *next;
};
-struct lexTrans {
+struct lexContext {
+ char *name;
struct DFA *dfa;
struct lexRule *rules;
struct lexRuleInfo **fastRule;
int ruleNo;
+
+ struct lexRuleAction *beginActionList;
+ struct lexRuleAction *endActionList;
+ struct lexContext *next;
+};
+
+struct lexConcatBuf {
+ int len;
+ int max;
+ char *buf;
};
struct lexSpec {
+
char *name;
- struct lexTrans trans;
+ struct lexContext *context;
+
+ struct lexContext **context_stack;
+ int context_stack_size;
+ int context_stack_top;
+
int lineNo;
NMEM m;
data1_handle dh;
void *f_win_fh;
void (*f_win_ef)(void *, off_t);
- int f_win_start;
- int f_win_end;
- int f_win_size;
- char *f_win_buf;
+ int f_win_start; /* first byte of buffer is this file offset */
+ int f_win_end; /* last byte of buffer is this offset - 1 */
+ int f_win_size; /* size of buffer */
+ char *f_win_buf; /* buffer itself */
int (*f_win_rf)(void *, char *, size_t);
off_t (*f_win_sf)(void *, off_t);
- struct lexRuleAction *beginActionList;
- struct lexRuleAction *endActionList;
+ struct lexConcatBuf **concatBuf;
+ int maxLevel;
};
static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
int *size)
{
- int i, r, off;
+ int i, r, off = start_pos - spec->f_win_start;
- if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
+ if (off >= 0 && end_pos <= spec->f_win_end)
+ {
+ *size = end_pos - start_pos;
+ return spec->f_win_buf + off;
+ }
+ if (off < 0 || start_pos >= spec->f_win_end)
{
(*spec->f_win_sf)(spec->f_win_fh, start_pos);
spec->f_win_start = start_pos;
*size = end_pos - start_pos;
return spec->f_win_buf;
}
- if (end_pos <= spec->f_win_end)
- {
- *size = end_pos - start_pos;
- return spec->f_win_buf + (start_pos - spec->f_win_start);
- }
- off = start_pos - spec->f_win_start;
for (i = 0; i<spec->f_win_end - start_pos; i++)
spec->f_win_buf[i] = spec->f_win_buf[i + off];
r = (*spec->f_win_rf)(spec->f_win_fh,
return dfa;
}
-static struct lexSpec *lexSpecMk (const char *name)
-{
- struct lexSpec *p;
-
- p = xmalloc (sizeof(*p));
- p->name = xmalloc (strlen(name)+1);
- strcpy (p->name, name);
- p->trans.dfa = lexSpecDFA ();
- p->trans.rules = NULL;
- p->trans.fastRule = NULL;
- p->beginActionList = NULL;
- p->endActionList = NULL;
- p->f_win_buf = NULL;
- return p;
-}
-
static void actionListDel (struct lexRuleAction **rap)
{
struct lexRuleAction *ra1, *ra;
*rap = NULL;
}
-static void lexSpecDel (struct lexSpec **pp)
+static struct lexContext *lexContextCreate (const char *name)
+{
+ struct lexContext *p = xmalloc (sizeof(*p));
+
+ p->name = xstrdup (name);
+ p->ruleNo = 1;
+ p->dfa = lexSpecDFA ();
+ p->rules = NULL;
+ p->fastRule = NULL;
+ p->beginActionList = NULL;
+ p->endActionList = NULL;
+ p->next = NULL;
+ return p;
+}
+
+static void lexContextDestroy (struct lexContext *p)
{
- struct lexSpec *p;
struct lexRule *rp, *rp1;
- assert (pp);
- p = *pp;
- if (!p)
- return ;
- dfa_delete (&p->trans.dfa);
- xfree (p->name);
- xfree (p->trans.fastRule);
- for (rp = p->trans.rules; rp; rp = rp1)
+ xfree (p->fastRule);
+ for (rp = p->rules; rp; rp = rp1)
{
+ rp1 = rp->next;
actionListDel (&rp->info.actionList);
xfree (rp);
}
actionListDel (&p->beginActionList);
actionListDel (&p->endActionList);
+ xfree (p->name);
+ xfree (p);
+}
+
+static struct lexSpec *lexSpecCreate (const char *name)
+{
+ struct lexSpec *p;
+ int i;
+
+ p = xmalloc (sizeof(*p));
+ p->name = xmalloc (strlen(name)+1);
+ strcpy (p->name, name);
+
+ p->context = NULL;
+ p->context_stack_size = 100;
+ p->context_stack = xmalloc (sizeof(*p->context_stack) *
+ p->context_stack_size);
+ p->f_win_buf = NULL;
+
+ p->maxLevel = 128;
+ p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
+ for (i = 0; i < p->maxLevel; i++)
+ {
+ p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf));
+ p->concatBuf[i]->len = p->concatBuf[i]->max = 0;
+ p->concatBuf[i]->buf = 0;
+ }
+ return p;
+}
+
+static void lexSpecDestroy (struct lexSpec **pp)
+{
+ struct lexSpec *p;
+ struct lexContext *lt;
+ int i;
+
+ assert (pp);
+ p = *pp;
+ if (!p)
+ return ;
+
+ for (i = 0; i < p->maxLevel; i++)
+ xfree (p->concatBuf[i]);
+ xfree (p->concatBuf);
+
+ lt = p->context;
+ while (lt)
+ {
+ struct lexContext *lt_next = lt->next;
+ lexContextDestroy (lt);
+ lt = lt_next;
+ }
+ xfree (p->name);
xfree (p->f_win_buf);
+ xfree (p->context_stack);
xfree (p);
*pp = NULL;
}
cmd[i] = *cp + 'a' - 'A';
else
break;
- if (i > sizeof(cmd)-2)
- break;
- i++;
+ if (i < sizeof(cmd)-2)
+ i++;
cp++;
}
cmd[i] = '\0';
return REGX_END;
else if (!strcmp (cmd, "body"))
return REGX_BODY;
+ else if (!strcmp (cmd, "context"))
+ return REGX_CONTEXT;
else
{
logf (LOG_WARN, "bad command %s", cmd);
{
int r, tok, len;
int bodyMark = 0;
+ const char *s0;
while ((tok = readParseToken (&s, &len)))
{
(*ap)->u.pattern.body = bodyMark;
bodyMark = 0;
(*ap)->u.pattern.dfa = lexSpecDFA ();
+ s0 = s;
r = dfa_parse ((*ap)->u.pattern.dfa, &s);
if (r || *s != '/')
{
xfree (*ap);
*ap = NULL;
- logf (LOG_WARN, "regular expression error. r=%d", r);
+ logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
return -1;
}
dfa_mkstate ((*ap)->u.pattern.dfa);
int readOneSpec (struct lexSpec *spec, const char *s)
{
- int tok, len;
+ int len, r, tok;
+ struct lexRule *rp;
+ struct lexContext *lc;
tok = readParseToken (&s, &len);
- if (tok == REGX_BEGIN)
- {
- actionListDel (&spec->beginActionList);
- actionListMk (spec, s, &spec->beginActionList);
- }
- else if (tok == REGX_END)
+ if (tok == REGX_CONTEXT)
{
- actionListDel (&spec->endActionList);
- actionListMk (spec, s, &spec->endActionList);
+ char context_name[32];
+ tok = readParseToken (&s, &len);
+ if (tok != REGX_CODE)
+ {
+ logf (LOG_WARN, "missing name after CONTEXT keyword");
+ return 0;
+ }
+ if (len > 31)
+ len = 31;
+ memcpy (context_name, s, len);
+ context_name[len] = '\0';
+ lc = lexContextCreate (context_name);
+ lc->next = spec->context;
+ spec->context = lc;
+ return 0;
}
- else if (tok == REGX_PATTERN)
+ if (!spec->context)
+ spec->context = lexContextCreate ("main");
+
+ switch (tok)
{
- int r;
- struct lexRule *rp;
- r = dfa_parse (spec->trans.dfa, &s);
+ case REGX_BEGIN:
+ actionListDel (&spec->context->beginActionList);
+ actionListMk (spec, s, &spec->context->beginActionList);
+ break;
+ case REGX_END:
+ actionListDel (&spec->context->endActionList);
+ actionListMk (spec, s, &spec->context->endActionList);
+ break;
+ case REGX_PATTERN:
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s);
+#endif
+ r = dfa_parse (spec->context->dfa, &s);
if (r)
{
logf (LOG_WARN, "regular expression error. r=%d", r);
}
s++;
rp = xmalloc (sizeof(*rp));
- rp->info.no = spec->trans.ruleNo++;
- rp->next = spec->trans.rules;
- spec->trans.rules = rp;
+ rp->info.no = spec->context->ruleNo++;
+ rp->next = spec->context->rules;
+ spec->context->rules = rp;
actionListMk (spec, s, &rp->info.actionList);
}
return 0;
int readFileSpec (struct lexSpec *spec)
{
+ struct lexContext *lc;
char *lineBuf;
int lineSize = 512;
- struct lexRule *rp;
int c, i, errors = 0;
FILE *spec_inf;
return -1;
}
spec->lineNo = 0;
- spec->trans.ruleNo = 1;
c = getc (spec_inf);
while (c != EOF)
{
}
fclose (spec_inf);
xfree (lineBuf);
- spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
- spec->trans.ruleNo);
- for (i = 0; i<spec->trans.ruleNo; i++)
- spec->trans.fastRule[i] = NULL;
- for (rp = spec->trans.rules; rp; rp = rp->next)
- spec->trans.fastRule[rp->info.no] = &rp->info;
- if (errors)
- return -1;
+
#if 0
debug_dfa_trav = 1;
debug_dfa_tran = 1;
debug_dfa_followpos = 1;
dfa_verbose = 1;
#endif
- dfa_mkstate (spec->trans.dfa);
+ for (lc = spec->context; lc; lc = lc->next)
+ {
+ struct lexRule *rp;
+ lc->fastRule = xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
+ for (i = 0; i < lc->ruleNo; i++)
+ lc->fastRule[i] = NULL;
+ for (rp = lc->rules; rp; rp = rp->next)
+ lc->fastRule[rp->info.no] = &rp->info;
+ dfa_mkstate (lc->dfa);
+ }
+ if (errors)
+ return -1;
return 0;
}
static struct lexSpec *curLexSpec = NULL;
-static void destroy_data (struct data1_node *n)
-{
- assert (n->which == DATA1N_data);
- xfree (n->u.data.data);
-}
-
static void execData (struct lexSpec *spec,
data1_node **d1_stack, int *d1_level,
const char *ebuf, int elen, int formatted_text)
{
struct data1_node *res, *parent;
+ int org_len;
if (elen == 0) /* shouldn't happen, but it does! */
return ;
#if REGX_DEBUG
if (elen > 40)
- logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
+ logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen,
+ ebuf, 15, ebuf + elen-15);
else if (elen > 0)
- logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
+ logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf);
else
- logf (LOG_DEBUG, "execData len=%d", elen);
+ logf (LOG_DEBUG, "data (%d bytes)", elen);
#endif
if (*d1_level <= 1)
parent = d1_stack[*d1_level -1];
assert (parent);
- if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
+
+ if ((res = d1_stack[*d1_level]) && res->which == DATA1N_data)
+ org_len = res->u.data.len;
+ else
{
- if (elen + res->u.data.len <= DATA1_LOCALDATA)
- memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
- else
- {
- char *nb = xmalloc (elen + res->u.data.len);
- memcpy (nb, res->u.data.data, res->u.data.len);
- memcpy (nb + res->u.data.len, ebuf, elen);
- if (res->u.data.len > DATA1_LOCALDATA)
- xfree (res->u.data.data);
- res->u.data.data = nb;
- res->destroy = destroy_data;
- }
- res->u.data.len += elen;
+ org_len = 0;
+
+ res = data1_mk_node (spec->dh, spec->m);
+ res->parent = parent;
+ res->which = DATA1N_data;
+ res->u.data.what = DATA1I_text;
+ res->u.data.len = 0;
+ res->u.data.formatted_text = formatted_text;
+#if 0
+ if (elen > DATA1_LOCALDATA)
+ res->u.data.data = nmem_malloc (spec->m, elen);
+ else
+ res->u.data.data = res->lbuf;
+ memcpy (res->u.data.data, ebuf, elen);
+#else
+ res->u.data.data = 0;
+#endif
+ res->root = parent->root;
+
+ parent->last_child = res;
+ if (d1_stack[*d1_level])
+ d1_stack[*d1_level]->next = res;
+ else
+ parent->child = res;
+ d1_stack[*d1_level] = res;
}
- else
+ if (org_len + elen >= spec->concatBuf[*d1_level]->max)
{
- res = data1_mk_node (spec->dh, spec->m);
- res->parent = parent;
- res->which = DATA1N_data;
- res->u.data.what = DATA1I_text;
- res->u.data.len = elen;
- res->u.data.formatted_text = formatted_text;
- if (elen > DATA1_LOCALDATA)
- {
- res->u.data.data = xmalloc (elen);
- res->destroy = destroy_data;
- }
- else
- res->u.data.data = res->lbuf;
- memcpy (res->u.data.data, ebuf, elen);
- res->root = parent->root;
-
- parent->num_children++;
- parent->last_child = res;
- if (d1_stack[*d1_level])
- d1_stack[*d1_level]->next = res;
- else
- parent->child = res;
- d1_stack[*d1_level] = res;
+ char *old_buf, *new_buf;
+
+ spec->concatBuf[*d1_level]->max = org_len + elen + 256;
+ new_buf = xmalloc (spec->concatBuf[*d1_level]->max);
+ if ((old_buf = spec->concatBuf[*d1_level]->buf))
+ {
+ memcpy (new_buf, old_buf, org_len);
+ xfree (old_buf);
+ }
+ spec->concatBuf[*d1_level]->buf = new_buf;
}
+ assert (spec->concatBuf[*d1_level]);
+ memcpy (spec->concatBuf[*d1_level]->buf + org_len, ebuf, elen);
+ res->u.data.len += elen;
}
static void execDataP (struct lexSpec *spec,
execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
}
+static void tagDataRelease (struct lexSpec *spec,
+ data1_node **d1_stack, int d1_level)
+{
+ data1_node *res;
+
+ if ((res = d1_stack[d1_level]) &&
+ res->which == DATA1N_data &&
+ res->u.data.what == DATA1I_text)
+ {
+ assert (!res->u.data.data);
+ assert (res->u.data.len > 0);
+ if (res->u.data.len > DATA1_LOCALDATA)
+ res->u.data.data = nmem_malloc (spec->m, res->u.data.len);
+ else
+ res->u.data.data = res->lbuf;
+ memcpy (res->u.data.data, spec->concatBuf[d1_level]->buf,
+ res->u.data.len);
+ }
+}
+
+static void variantBegin (struct lexSpec *spec,
+ data1_node **d1_stack, int *d1_level,
+ const char *class_str, int class_len,
+ const char *type_str, int type_len,
+ const char *value_str, int value_len)
+{
+ struct data1_node *parent = d1_stack[*d1_level -1];
+ char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
+ data1_vartype *tp;
+ int i;
+ data1_node *res;
+
+ if (*d1_level == 0)
+ {
+ logf (LOG_WARN, "in variant begin. No record type defined");
+ return ;
+ }
+ if (class_len >= DATA1_MAX_SYMBOL)
+ class_len = DATA1_MAX_SYMBOL-1;
+ memcpy (tclass, class_str, class_len);
+ tclass[class_len] = '\0';
+
+ if (type_len >= DATA1_MAX_SYMBOL)
+ type_len = DATA1_MAX_SYMBOL-1;
+ memcpy (ttype, type_str, type_len);
+ ttype[type_len] = '\0';
+
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, *d1_level);
+#endif
+
+ if (!(tp =
+ data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
+ tclass, ttype)))
+ return;
+
+ if (parent->which != DATA1N_variant)
+ {
+ res = data1_mk_node (spec->dh, spec->m);
+ res->parent = parent;
+ res->which = DATA1N_variant;
+ res->u.variant.type = 0;
+ res->u.variant.value = 0;
+ res->root = parent->root;
+
+ parent->last_child = res;
+ if (d1_stack[*d1_level])
+ {
+ tagDataRelease (spec, d1_stack, *d1_level);
+ d1_stack[*d1_level]->next = res;
+ }
+ else
+ parent->child = res;
+ d1_stack[*d1_level] = res;
+ d1_stack[++(*d1_level)] = NULL;
+ }
+ for (i = *d1_level-1; d1_stack[i]->which == DATA1N_variant; i--)
+ if (d1_stack[i]->u.variant.type == tp)
+ {
+ *d1_level = i;
+ break;
+ }
+
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "variant node (%d)", *d1_level);
+#endif
+ parent = d1_stack[*d1_level-1];
+ res = data1_mk_node (spec->dh, spec->m);
+ res->parent = parent;
+ res->which = DATA1N_variant;
+ res->root = parent->root;
+ res->u.variant.type = tp;
+
+ if (value_len >= DATA1_LOCALDATA)
+ value_len =DATA1_LOCALDATA-1;
+ memcpy (res->lbuf, value_str, value_len);
+ res->lbuf[value_len] = '\0';
+
+ res->u.variant.value = res->lbuf;
+
+ parent->last_child = res;
+ if (d1_stack[*d1_level])
+ {
+ tagDataRelease (spec, d1_stack, *d1_level);
+ d1_stack[*d1_level]->next = res;
+ }
+ else
+ parent->child = res;
+ d1_stack[*d1_level] = res;
+ d1_stack[++(*d1_level)] = NULL;
+}
+
+static void tagStrip (const char **tag, int *len)
+{
+ int i;
+
+ for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
+ ;
+ *len = i;
+ for (i = 0; i < *len && isspace((*tag)[i]); i++)
+ ;
+ *tag += i;
+ *len -= i;
+}
static void tagBegin (struct lexSpec *spec,
data1_node **d1_stack, int *d1_level,
logf (LOG_WARN, "in element begin. No record type defined");
return ;
}
-
+ tagStrip (&tag, &len);
+
res = data1_mk_node (spec->dh, spec->m);
res->parent = parent;
res->which = DATA1N_tag;
- res->u.tag.tag = res->lbuf;
res->u.tag.get_bytes = -1;
if (len >= DATA1_LOCALDATA)
- len = DATA1_LOCALDATA-1;
+ res->u.tag.tag = nmem_malloc (spec->m, len+1);
+ else
+ res->u.tag.tag = res->lbuf;
memcpy (res->u.tag.tag, tag, len);
res->u.tag.tag[len] = '\0';
#if REGX_DEBUG
- logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level);
+ logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, *d1_level);
#endif
if (parent->which == DATA1N_variant)
return ;
elem = data1_getelementbytagname (spec->dh, d1_stack[0]->u.root.absyn,
e, res->u.tag.tag);
-
res->u.tag.element = elem;
res->u.tag.node_selected = 0;
res->u.tag.make_variantlist = 0;
res->u.tag.no_data_requested = 0;
res->root = parent->root;
- parent->num_children++;
+
parent->last_child = res;
if (d1_stack[*d1_level])
+ {
+ tagDataRelease (spec, d1_stack, *d1_level);
d1_stack[*d1_level]->next = res;
+ }
else
parent->child = res;
d1_stack[*d1_level] = res;
}
static void tagEnd (struct lexSpec *spec,
- data1_node **d1_stack, int *d1_level,
+ data1_node **d1_stack, int *d1_level, int min_level,
const char *tag, int len)
{
- while (*d1_level > 1)
+ tagStrip (&tag, &len);
+ while (*d1_level > min_level)
{
+ tagDataRelease (spec, d1_stack, *d1_level);
(*d1_level)--;
- if (!tag ||
- (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
- !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
+ if (*d1_level == 0)
+ break;
+ if ((d1_stack[*d1_level]->which == DATA1N_tag) &&
+ (!tag ||
+ (strlen(d1_stack[*d1_level]->u.tag.tag) == (size_t) len &&
+ !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len))))
break;
}
#if REGX_DEBUG
- logf (LOG_DEBUG, "tag end (%d)", *d1_level);
+ logf (LOG_DEBUG, "end tag (%d)", *d1_level);
#endif
}
struct DFA_tran *t;
unsigned char c;
unsigned char c_prev = 0;
- int ptr = *pptr;
- int start_ptr = *pptr;
- int last_rule = 0;
- int last_ptr = 0;
+ int ptr = *pptr; /* current pointer */
+ int start_ptr = *pptr; /* first char of match */
+ int last_ptr = 0; /* last char of match */
+ int last_rule = 0; /* rule number of current match */
int i;
while (1)
return 2;
}
-static char *regxStrz (const char *src, int len)
+static char *regxStrz (const char *src, int len, char *str)
{
- static char str[64];
-
if (len > 63)
len = 63;
memcpy (str, src, len);
r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
while (r)
{
- char *p;
+ char *p, ptmp[64];
if (r == 1)
{
&cmd_str, &cmd_len);
continue;
}
- p = regxStrz (cmd_str, cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
if (!strcmp (p, "begin"))
{
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
if (r < 2)
+ {
+ logf (LOG_WARN, "missing keyword after 'begin'");
continue;
- p = regxStrz (cmd_str, cmd_len);
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
if (!strcmp (p, "record"))
{
r = execTok (spec, &s, arg_no, arg_start, arg_end,
tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
- }
+ }
+ else if (!strcmp (p, "variant"))
+ {
+ int class_len;
+ const char *class_str = NULL;
+ int type_len;
+ const char *type_str = NULL;
+ int value_len;
+ const char *value_str = NULL;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ class_str = cmd_str;
+ class_len = cmd_len;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ type_str = cmd_str;
+ type_len = cmd_len;
+
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ value_str = cmd_str;
+ value_len = cmd_len;
+
+ variantBegin (spec, d1_stack, d1_level, class_str, class_len,
+ type_str, type_len, value_str, value_len);
+
+
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ }
+ else if (!strcmp (p, "context"))
+ {
+ if (r > 1)
+ {
+ struct lexContext *lc = spec->context;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "begin context %s", p);
+#endif
+ while (lc && strcmp (p, lc->name))
+ lc = lc->next;
+ if (lc)
+ spec->context_stack[++(spec->context_stack_top)] = lc;
+ else
+ logf (LOG_WARN, "unknown context %s", p);
+
+ }
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ }
+ else
+ {
+ logf (LOG_WARN, "bad keyword '%s' after begin", p);
+ }
}
else if (!strcmp (p, "end"))
{
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
- if (r > 1)
- {
- p = regxStrz (cmd_str, cmd_len);
- if (!strcmp (p, "record"))
- {
- *d1_level = 0;
- r = execTok (spec, &s, arg_no, arg_start, arg_end,
- &cmd_str, &cmd_len);
+ if (r < 2)
+ {
+ logf (LOG_WARN, "missing keyword after 'end'");
+ continue;
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+ if (!strcmp (p, "record"))
+ {
+ int i;
+ for (i = *d1_level; i; --i)
+ tagDataRelease (spec, d1_stack, i);
+ *d1_level = 0;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
#if REGX_DEBUG
- logf (LOG_DEBUG, "end record");
+ logf (LOG_DEBUG, "end record");
#endif
- returnCode = 0;
+ returnCode = 0;
+ }
+ else if (!strcmp (p, "element"))
+ {
+ int min_level = 1;
+ while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len)) == 3)
+ {
+ if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
+ min_level = 0;
}
- else if (!strcmp (p, "element"))
+ if (r > 2)
+ {
+ tagEnd (spec, d1_stack, d1_level, min_level,
+ cmd_str, cmd_len);
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ }
+ else
+ tagEnd (spec, d1_stack, d1_level, min_level, NULL, 0);
+ if (*d1_level == 0)
{
- r = execTok (spec, &s, arg_no, arg_start, arg_end,
- &cmd_str, &cmd_len);
-#if 0
- if (*d1_level == 1)
- {
- *d1_level = 0;
- returnCode = 0;
- }
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "end element end records");
#endif
- if (r > 2)
- {
- tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
- r = execTok (spec, &s, arg_no, arg_start, arg_end,
- &cmd_str, &cmd_len);
- }
- else
- tagEnd (spec, d1_stack, d1_level, NULL, 0);
+ returnCode = 0;
}
- else
- logf (LOG_WARN, "missing record/element/variant");
- }
- else
- logf (LOG_WARN, "missing record/element/variant");
- }
+
+ }
+ else if (!strcmp (p, "context"))
+ {
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "end context");
+#endif
+ if (spec->context_stack_top)
+ (spec->context_stack_top)--;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ }
+ else
+ logf (LOG_WARN, "bad keyword '%s' after end", p);
+ }
else if (!strcmp (p, "data"))
{
int textFlag = 0;
&cmd_str, &cmd_len);
} while (r > 1);
if (element_str)
- tagEnd (spec, d1_stack, d1_level, NULL, 0);
+ tagEnd (spec, d1_stack, d1_level, 1, NULL, 0);
}
else if (!strcmp (p, "unread"))
{
logf (LOG_WARN, "missing number after -offset");
continue;
}
- p = regxStrz (cmd_str, cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
offset = atoi (p);
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
}
+ else if (!strcmp (p, "context"))
+ {
+ if (r > 1)
+ {
+ struct lexContext *lc = spec->context;
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+
+ while (lc && strcmp (p, lc->name))
+ lc = lc->next;
+ if (lc)
+ spec->context_stack[spec->context_stack_top] = lc;
+ else
+ logf (LOG_WARN, "unknown context %s", p);
+
+ }
+ r = execTok (spec, &s, arg_no, arg_start, arg_end,
+ &cmd_str, &cmd_len);
+ }
else
{
- logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str);
+ logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
continue;
}
+/*
+ * execAction: Execute action specified by 'ap'. Returns 0 if
+ * the pattern(s) associated by rule and code could be executed
+ * ok; returns 1 if code couldn't be executed.
+ */
static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
data1_node **d1_stack, int *d1_level,
int start_ptr, int *pptr)
return 1;
}
-static int execRule (struct lexSpec *spec, struct lexTrans *trans,
+static int execRule (struct lexSpec *spec, struct lexContext *context,
data1_node **d1_stack, int *d1_level,
int ruleNo, int start_ptr, int *pptr)
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "execRule %d", ruleNo);
+ logf (LOG_DEBUG, "exec rule %d", ruleNo);
#endif
- return execAction (spec, trans->fastRule[ruleNo]->actionList,
+ return execAction (spec, context->fastRule[ruleNo]->actionList,
d1_stack, d1_level, start_ptr, pptr);
}
-data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
- data1_node **d1_stack, int *d1_level,
- int *ptr)
+data1_node *lexNode (struct lexSpec *spec,
+ data1_node **d1_stack, int *d1_level, int *ptr)
{
- struct DFA_state *state = trans->dfa->states[0];
+ struct lexContext *context = spec->context_stack[spec->context_stack_top];
+ struct DFA_state *state = context->dfa->states[0];
struct DFA_tran *t;
unsigned char c;
unsigned char c_prev = '\n';
int i;
- int last_rule = 0;
- int last_ptr = *ptr;
- int start_ptr = *ptr;
- int skip_ptr = *ptr;
+ int last_rule = 0; /* rule number of current match */
+ int last_ptr = *ptr; /* last char of match */
+ int start_ptr = *ptr; /* first char of match */
+ int skip_ptr = *ptr; /* first char of run */
while (1)
{
c = f_win_advance (spec, ptr);
if (*ptr == F_WIN_EOF)
{
+ /* end of file met */
if (last_rule)
{
+ /* there was a match */
if (skip_ptr < start_ptr)
{
+ /* deal with chars that didn't match */
int size;
char *buf;
buf = f_win_get (spec, skip_ptr, start_ptr, &size);
execDataP (spec, d1_stack, d1_level, buf, size, 0);
}
+ /* restore pointer */
*ptr = last_ptr;
- if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
- start_ptr, ptr))
+ /* execute rule */
+ if (!execRule (spec, context, d1_stack, d1_level,
+ last_rule, start_ptr, ptr))
break;
+ /* restore skip pointer */
skip_ptr = *ptr;
last_rule = 0;
}
else if (skip_ptr < *ptr)
{
+ /* deal with chars that didn't match */
int size;
char *buf;
buf = f_win_get (spec, skip_ptr, *ptr, &size);
{
if (skip_ptr < start_ptr)
{
+ /* deal with chars that didn't match */
int size;
char *buf;
buf = f_win_get (spec, skip_ptr, start_ptr, &size);
execDataP (spec, d1_stack, d1_level, buf, size, 0);
}
+ /* restore pointer */
*ptr = last_ptr;
- if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
- start_ptr, ptr))
+ if (!execRule (spec, context, d1_stack, d1_level,
+ last_rule, start_ptr, ptr))
{
if (spec->f_win_ef && *ptr != F_WIN_EOF)
{
}
return NULL;
}
+ context = spec->context_stack[spec->context_stack_top];
skip_ptr = *ptr;
last_rule = 0;
- start_ptr = *ptr;
+ last_ptr = start_ptr = *ptr;
if (start_ptr > 0)
{
--start_ptr;
c_prev = f_win_advance (spec, &start_ptr);
*ptr = start_ptr;
}
- state = trans->dfa->states[0];
+ state = context->dfa->states[0];
break;
}
else if (c >= t->ch[0] && c <= t->ch[1])
{ /* transition ... */
- state = trans->dfa->states[t->to];
+ state = context->dfa->states[t->to];
if (state->rule_no)
{
if (c_prev == '\n')
return NULL;
}
-static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
+static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
+ const char *context_name)
{
- data1_node *d1_stack[512];
+ struct lexContext *lt = spec->context;
+ data1_node *d1_stack[128];
int d1_level = 0;
- int ptr = offset;
+ int i, ptr = offset;
+ spec->context_stack_top = 0;
+ while (lt)
+ {
+ if (!strcmp (lt->name, context_name))
+ break;
+ lt = lt->next;
+ }
+ if (!lt)
+ {
+ logf (LOG_WARN, "cannot find context %s", context_name);
+ return NULL;
+ }
+ spec->context_stack[spec->context_stack_top] = lt;
d1_stack[d1_level] = NULL;
- if (spec->beginActionList)
- execAction (spec, spec->beginActionList,
- d1_stack, &d1_level, 0, &ptr);
- lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
- if (spec->endActionList)
- execAction (spec, spec->endActionList,
- d1_stack, &d1_level, ptr, &ptr);
+ if (lt->beginActionList)
+ execAction (spec, lt->beginActionList, d1_stack, &d1_level, 0, &ptr);
+ lexNode (spec, d1_stack, &d1_level, &ptr);
+ for (i = d1_level; i; --i)
+ tagDataRelease (spec, d1_stack, i);
+ if (lt->endActionList)
+ execAction (spec, lt->endActionList, d1_stack, &d1_level, ptr, &ptr);
return *d1_stack;
}
data1_node *grs_read_regx (struct grs_read_info *p)
{
int res;
- data1_node *n;
#if REGX_DEBUG
logf (LOG_DEBUG, "grs_read_regx");
if (!curLexSpec || strcmp (curLexSpec->name, p->type))
{
if (curLexSpec)
- lexSpecDel (&curLexSpec);
- curLexSpec = lexSpecMk (p->type);
+ lexSpecDestroy (&curLexSpec);
+ curLexSpec = lexSpecCreate (p->type);
curLexSpec->dh = p->dh;
res = readFileSpec (curLexSpec);
if (res)
{
- lexSpecDel (&curLexSpec);
+ lexSpecDestroy (&curLexSpec);
return NULL;
}
}
+ curLexSpec->dh = p->dh;
if (!p->offset)
{
curLexSpec->f_win_start = 0;
curLexSpec->f_win_size = 500000;
}
curLexSpec->m = p->mem;
- n = lexRoot (curLexSpec, p->offset);
- return n;
+ return lexRoot (curLexSpec, p->offset, "main");
}