* Sebastian Hammer, Adam Dickmeiss
*
* $Log: regxread.c,v $
- * Revision 1.24 1999-05-21 11:08:46 adam
+ * Revision 1.32 1999-09-07 07:19:21 adam
+ * Work on character mapping. Implemented replace rules.
+ *
+ * Revision 1.31 1999/07/14 13:05:29 adam
+ * Tcl filter works with objects when TCL is version 8 or later; filter
+ * works with strings otherwise (slow).
+ *
+ * Revision 1.30 1999/07/14 10:55:28 adam
+ * Fixed memory leak.
+ *
+ * Revision 1.29 1999/07/12 07:27:54 adam
+ * Improved speed of Tcl processing. Fixed one memory leak.
+ *
+ * Revision 1.28 1999/07/06 12:26:04 adam
+ * Fixed filters so that MS-DOS CR is ignored.
+ *
+ * Revision 1.27 1999/06/28 13:25:40 quinn
+ * Improved diagnostics for Tcl
+ *
+ * Revision 1.26 1999/05/26 07:49:14 adam
+ * C++ compilation.
+ *
+ * Revision 1.25 1999/05/25 12:33:32 adam
+ * Fixed bug in Tcl filter.
+ *
+ * Revision 1.24 1999/05/21 11:08:46 adam
* Tcl filter attempts to read <filt>.tflt. Improvements to configure
* script so that it reads uninstalled Tcl source.
*
#if HAVE_TCL_H
#include <tcl.h>
+
+#if MAJOR_VERSION >= 8
+#define HAVE_TCL_OBJECTS
+#endif
#endif
#define REGX_DEBUG 0
struct regxCode {
char *str;
+#if HAVE_TCL_OBJECTS
+ Tcl_Obj *tcl_obj;
+#endif
};
struct lexRuleAction {
};
struct lexConcatBuf {
- int len;
int max;
char *buf;
};
int (*f_win_rf)(void *, char *, size_t);
off_t (*f_win_sf)(void *, off_t);
- struct lexConcatBuf **concatBuf;
+ struct lexConcatBuf *concatBuf;
int maxLevel;
data1_node **d1_stack;
int d1_level;
spec->f_win_start = start_pos;
if (!spec->f_win_buf)
- spec->f_win_buf = xmalloc (spec->f_win_size);
+ spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
*size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
spec->f_win_size);
spec->f_win_end = spec->f_win_start + *size;
struct regxCode *p = *pp;
if (p)
{
+#if HAVE_TCL_OBJECTS
+ if (p->tcl_obj)
+ Tcl_DecrRefCount (p->tcl_obj);
+#endif
xfree (p->str);
xfree (p);
*pp = NULL;
{
struct regxCode *p;
- p = xmalloc (sizeof(*p));
- p->str = xmalloc (len+1);
+ p = (struct regxCode *) xmalloc (sizeof(*p));
+ p->str = (char *) xmalloc (len+1);
memcpy (p->str, buf, len);
p->str[len] = '\0';
+#if HAVE_TCL_OBJECTS
+ p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
+ if (p->tcl_obj)
+ Tcl_IncrRefCount (p->tcl_obj);
+#endif
*pp = p;
}
static struct lexContext *lexContextCreate (const char *name)
{
- struct lexContext *p = xmalloc (sizeof(*p));
+ struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
p->name = xstrdup (name);
p->ruleNo = 1;
{
struct lexRule *rp, *rp1;
+ dfa_delete (&p->dfa);
xfree (p->fastRule);
for (rp = p->rules; rp; rp = rp1)
{
}
actionListDel (&p->beginActionList);
actionListDel (&p->endActionList);
+ actionListDel (&p->initActionList);
xfree (p->name);
xfree (p);
}
struct lexSpec *p;
int i;
- p = xmalloc (sizeof(*p));
- p->name = xmalloc (strlen(name)+1);
+ p = (struct lexSpec *) xmalloc (sizeof(*p));
+ p->name = (char *) xmalloc (strlen(name)+1);
strcpy (p->name, name);
#if HAVE_TCL_H
p->dh = dh;
p->context = NULL;
p->context_stack_size = 100;
- p->context_stack = xmalloc (sizeof(*p->context_stack) *
- p->context_stack_size);
+ p->context_stack = (struct lexContext **)
+ xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
p->f_win_buf = NULL;
p->maxLevel = 128;
- p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
+ p->concatBuf = (struct lexConcatBuf *)
+ xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
for (i = 0; i < p->maxLevel; i++)
{
- p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf));
- p->concatBuf[i]->len = p->concatBuf[i]->max = 0;
- p->concatBuf[i]->buf = 0;
+ p->concatBuf[i].max = 0;
+ p->concatBuf[i].buf = 0;
}
- p->d1_stack = xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
+ p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
p->d1_level = 0;
return p;
}
return ;
for (i = 0; i < p->maxLevel; i++)
- xfree (p->concatBuf[i]);
+ xfree (p->concatBuf[i].buf);
xfree (p->concatBuf);
lt = p->context;
lexContextDestroy (lt);
lt = lt_next;
}
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
if (p->tcl_interp)
Tcl_DeleteInterp (p->tcl_interp);
#endif
char cmd[32];
int i, level;
- while (*cp == ' ' || *cp == '\t' || *cp == '\n')
+ while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
cp++;
switch (*cp)
{
cmd[i] = *cp + 'a' - 'A';
else
break;
- if (i < sizeof(cmd)-2)
+ if (i < (int) sizeof(cmd)-2)
i++;
cp++;
}
{
logf (LOG_WARN, "bad character %d %c", *cp, *cp);
cp++;
- while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
+ while (*cp && *cp != ' ' && *cp != '\t' &&
+ *cp != '\n' && *cp != '\r')
cp++;
*cpp = cp;
return 0;
bodyMark = 1;
continue;
case REGX_CODE:
- *ap = xmalloc (sizeof(**ap));
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
(*ap)->which = tok;
regxCodeMk (&(*ap)->u.code, s, len);
s += len+1;
break;
case REGX_PATTERN:
- *ap = xmalloc (sizeof(**ap));
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
(*ap)->which = tok;
(*ap)->u.pattern.body = bodyMark;
bodyMark = 0;
logf (LOG_WARN, "cannot use INIT here");
continue;
case REGX_END:
- *ap = xmalloc (sizeof(**ap));
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
(*ap)->which = tok;
break;
}
return -1;
}
s++;
- rp = xmalloc (sizeof(*rp));
+ rp = (struct lexRule *) xmalloc (sizeof(*rp));
rp->info.no = spec->context->ruleNo++;
rp->next = spec->context->rules;
spec->context->rules = rp;
int readFileSpec (struct lexSpec *spec)
{
struct lexContext *lc;
- char *lineBuf;
- int lineSize = 512;
int c, i, errors = 0;
FILE *spec_inf = 0;
+ WRBUF lineBuf;
+ char fname[256];
- lineBuf = xmalloc (1+lineSize);
#if HAVE_TCL_H
if (spec->tcl_interp)
{
- sprintf (lineBuf, "%s.tflt", spec->name);
- spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r");
+ sprintf (fname, "%s.tflt", spec->name);
+ spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
}
#endif
if (!spec_inf)
{
- sprintf (lineBuf, "%s.flt", spec->name);
- spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r");
+ sprintf (fname, "%s.flt", spec->name);
+ spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
}
if (!spec_inf)
{
logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
- xfree (lineBuf);
- return -1;
- }
- logf (LOG_LOG, "reading regx filter %s.flt", lineBuf);
- sprintf (lineBuf, "%s.flt", spec->name);
- if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh),
- lineBuf, "r")))
- {
- logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
- xfree (lineBuf);
return -1;
}
+ logf (LOG_LOG, "reading regx filter %s", fname);
#if HAVE_TCL_H
if (spec->tcl_interp)
logf (LOG_LOG, "Tcl enabled");
#endif
+ lineBuf = wrbuf_alloc();
spec->lineNo = 0;
c = getc (spec_inf);
while (c != EOF)
{
- int off = 0;
- if (c == '#' || c == '\n' || c == ' ' || c == '\t')
+ wrbuf_rewind (lineBuf);
+ if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
{
while (c != '\n' && c != EOF)
c = getc (spec_inf);
else
{
int addLine = 0;
-
- lineBuf[off++] = c;
+
while (1)
{
int c1 = c;
+ wrbuf_putc(lineBuf, c);
c = getc (spec_inf);
+ while (c == '\r')
+ c = getc (spec_inf);
if (c == EOF)
break;
if (c1 == '\n')
break;
addLine++;
}
- lineBuf[off] = c;
- if (off < lineSize)
- off++;
}
- lineBuf[off] = '\0';
- readOneSpec (spec, lineBuf);
+ wrbuf_putc(lineBuf, '\0');
+ readOneSpec (spec, wrbuf_buf(lineBuf));
spec->lineNo += addLine;
}
}
fclose (spec_inf);
- xfree (lineBuf);
+ wrbuf_free(lineBuf, 1);
#if 0
debug_dfa_trav = 1;
for (lc = spec->context; lc; lc = lc->next)
{
struct lexRule *rp;
- lc->fastRule = xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
+ lc->fastRule = (struct lexRuleInfo **)
+ xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
for (i = 0; i < lc->ruleNo; i++)
lc->fastRule[i] = NULL;
for (rp = lc->rules; rp; rp = rp->next)
parent->child = res;
spec->d1_stack[spec->d1_level] = res;
}
- if (org_len + elen >= spec->concatBuf[spec->d1_level]->max)
+ if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
{
char *old_buf, *new_buf;
- spec->concatBuf[spec->d1_level]->max = org_len + elen + 256;
- new_buf = xmalloc (spec->concatBuf[spec->d1_level]->max);
- if ((old_buf = spec->concatBuf[spec->d1_level]->buf))
+ spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
+ new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
+ if ((old_buf = spec->concatBuf[spec->d1_level].buf))
{
memcpy (new_buf, old_buf, org_len);
xfree (old_buf);
}
- spec->concatBuf[spec->d1_level]->buf = new_buf;
+ spec->concatBuf[spec->d1_level].buf = new_buf;
}
- assert (spec->concatBuf[spec->d1_level]);
- memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen);
+ memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
res->u.data.len += elen;
}
assert (!res->u.data.data);
assert (res->u.data.len > 0);
if (res->u.data.len > DATA1_LOCALDATA)
- res->u.data.data = nmem_malloc (spec->m, res->u.data.len);
+ res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
else
res->u.data.data = res->lbuf;
- memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf,
+ memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
res->u.data.len);
}
}
static void tagBegin (struct lexSpec *spec,
const char *tag, int len)
{
- struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
+ struct data1_node *parent;
data1_element *elem = NULL;
- data1_node *partag = get_parent_tag(spec->dh, parent);
+ data1_node *partag;
data1_node *res;
data1_element *e = NULL;
int localtag = 0;
return ;
}
tagStrip (&tag, &len);
+
+ parent = spec->d1_stack[spec->d1_level -1];
+ partag = get_parent_tag(spec->dh, parent);
res = data1_mk_node (spec->dh, spec->m);
res->parent = parent;
res->u.tag.get_bytes = -1;
if (len >= DATA1_LOCALDATA)
- res->u.tag.tag = nmem_malloc (spec->m, len+1);
+ res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
else
res->u.tag.tag = res->lbuf;
else if (*s == '-')
{
*tokBuf = s++;
- while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
+ while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+ *s != ';')
s++;
*tokLen = s - *tokBuf;
*src = s;
else
{
*tokBuf = s++;
- while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
+ while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+ *s != ';')
s++;
*tokLen = s - *tokBuf;
}
static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
int argc, char **argv)
{
- struct lexSpec *spec = clientData;
+ struct lexSpec *spec = (struct lexSpec *) clientData;
if (argc < 2)
return TCL_ERROR;
if (!strcmp(argv[1], "record") && argc == 3)
static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
int argc, char **argv)
{
- struct lexSpec *spec = clientData;
+ struct lexSpec *spec = (struct lexSpec *) clientData;
if (argc < 2)
return TCL_ERROR;
{
int min_level = 1;
char *element = 0;
- if (!strcmp(argv[2], "-record"))
+ if (argc >= 3 && !strcmp(argv[2], "-record"))
{
min_level = 0;
if (argc == 4)
element = argv[3];
}
else
- {
if (argc == 3)
element = argv[2];
- }
tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
if (spec->d1_level == 0)
{
int argi = 1;
int textFlag = 0;
const char *element = 0;
- struct lexSpec *spec = clientData;
+ struct lexSpec *spec = (struct lexSpec *) clientData;
while (argi < argc)
{
static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
int argc, char **argv)
{
- struct lexSpec *spec = clientData;
+ struct lexSpec *spec = (struct lexSpec *) clientData;
int argi = 1;
int offset = 0;
int no;
static void execTcl (struct lexSpec *spec, struct regxCode *code)
{
int i;
+ int ret;
for (i = 0; i < spec->arg_no; i++)
{
char var_name[10], *var_buf;
var_buf[var_len] = ch;
}
}
- Tcl_Eval (spec->tcl_interp, code->str);
+#if HAVE_TCL_OBJECTS
+ ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
+#else
+ ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
+#endif
+ if (ret != TCL_OK)
+ {
+ const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
+ logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
+ spec->tcl_interp->errorLine,
+ spec->tcl_interp->result,
+ err ? err : "[NO ERRORINFO]");
+ }
}
/* HAVE_TCL_H */
#endif
void grs_destroy(void *clientData)
{
- struct lexSpecs *specs = clientData;
+ struct lexSpecs *specs = (struct lexSpecs *) clientData;
if (specs->spec)
{
lexSpecDestroy(&specs->spec);
void *grs_init(void)
{
- struct lexSpecs *specs = xmalloc (sizeof(*specs));
+ struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
specs->spec = 0;
return specs;
}
data1_node *grs_read_regx (struct grs_read_info *p)
{
int res;
- struct lexSpecs *specs = p->clientData;
+ struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
struct lexSpec **curLexSpec = &specs->spec;
#if REGX_DEBUG
data1_node *grs_read_tcl (struct grs_read_info *p)
{
int res;
- struct lexSpecs *specs = p->clientData;
+ struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
struct lexSpec **curLexSpec = &specs->spec;
#if REGX_DEBUG