* Sebastian Hammer, Adam Dickmeiss
*
* $Log: regxread.c,v $
- * Revision 1.3 1996-11-08 14:05:33 adam
+ * Revision 1.9 1997-09-29 09:02:49 adam
+ * Fixed small bug (introduced by previous commit).
+ *
+ * Revision 1.8 1997/09/17 12:19:22 adam
+ * Zebra version corresponds to YAZ version 1.4.
+ * Changed Zebra server so that it doesn't depend on global common_resource.
+ *
+ * Revision 1.7 1997/07/15 16:33:07 adam
+ * Check for zero length in execData.
+ *
+ * Revision 1.6 1997/02/24 10:41:51 adam
+ * Cleanup of code and commented out the "end element-end-record" code.
+ *
+ * Revision 1.5 1997/02/19 16:22:33 adam
+ * Fixed "end element" to terminate record in outer-most level.
+ *
+ * Revision 1.4 1997/02/12 20:42:58 adam
+ * Changed some log messages.
+ *
+ * Revision 1.3 1996/11/08 14:05:33 adam
* Bug fix: data1 node member u.tag.get_bytes weren't initialized.
*
* Revision 1.2 1996/10/29 14:02:09 adam
int which;
union {
struct {
- struct DFA *dfa; /* REGX_PATTERN */
+ struct DFA *dfa; /* REGX_PATTERN */
int body;
} pattern;
struct regxCode *code; /* REGX_CODE */
struct lexTrans trans;
int lineNo;
NMEM m;
+ data1_handle dh;
void *f_win_fh;
void (*f_win_ef)(void *, off_t);
-#if F_WIN_READ
+
int f_win_start;
int f_win_end;
int f_win_size;
char *f_win_buf;
int (*f_win_rf)(void *, char *, size_t);
off_t (*f_win_sf)(void *, off_t);
-#else
- char *scan_buf;
- int scan_size;
-#endif
+
struct lexRuleAction *beginActionList;
struct lexRuleAction *endActionList;
};
-#if F_WIN_READ
+
static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
int *size)
{
*pos = F_WIN_EOF;
return 0;
}
-#endif
static void regxCodeDel (struct regxCode **pp)
{
p->trans.fastRule = NULL;
p->beginActionList = NULL;
p->endActionList = NULL;
-#if F_WIN_READ
p->f_win_buf = NULL;
-#endif
return p;
}
}
actionListDel (&p->beginActionList);
actionListDel (&p->endActionList);
-#if F_WIN_READ
xfree (p->f_win_buf);
-#endif
xfree (p);
*pp = NULL;
}
cmd[i] = '\0';
if (i == 0)
{
- logf (LOG_WARN, "Bad character %d %c", *cp, *cp);
+ logf (LOG_WARN, "bad character %d %c", *cp, *cp);
cp++;
while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
cp++;
return REGX_BODY;
else
{
- logf (LOG_WARN, "Bad command %s", cmd);
+ logf (LOG_WARN, "bad command %s", cmd);
return 0;
}
}
{
xfree (*ap);
*ap = NULL;
- logf (LOG_WARN, "Regular expression error. r=%d", r);
+ logf (LOG_WARN, "regular expression error. r=%d", r);
return -1;
}
dfa_mkstate ((*ap)->u.pattern.dfa);
s++;
break;
case REGX_BEGIN:
- logf (LOG_WARN, "Cannot use begin here");
+ logf (LOG_WARN, "cannot use begin here");
continue;
case REGX_END:
*ap = xmalloc (sizeof(**ap));
r = dfa_parse (spec->trans.dfa, &s);
if (r)
{
- logf (LOG_WARN, "Regular expression error. r=%d", r);
+ logf (LOG_WARN, "regular expression error. r=%d", r);
return -1;
}
if (*s != '/')
{
- logf (LOG_WARN, "Expects / at end of pattern. got %c", *s);
+ logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
return -1;
}
s++;
FILE *spec_inf;
lineBuf = xmalloc (1+lineSize);
- logf (LOG_LOG, "Reading spec %s", spec->name);
+ logf (LOG_LOG, "reading regx filter %s.flt", spec->name);
sprintf (lineBuf, "%s.flt", spec->name);
- if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(), lineBuf, "r")))
+ if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh),
+ lineBuf, "r")))
{
- logf (LOG_ERRNO|LOG_WARN, "Cannot read spec file %s", spec->name);
+ logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
xfree (lineBuf);
return -1;
}
{
struct data1_node *res, *parent;
+ if (elen == 0) /* shouldn't happen, but it does! */
+ return ;
#if REGX_DEBUG
if (elen > 40)
logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
logf (LOG_DEBUG, "execData len=%d", elen);
#endif
- if (*d1_level <= 1)
+ if (*d1_level <= 1)
return;
parent = d1_stack[*d1_level -1];
}
else
{
- res = data1_mk_node (spec->m);
+ res = data1_mk_node (spec->dh, spec->m);
res->parent = parent;
res->which = DATA1N_data;
res->u.data.what = DATA1I_text;
{
struct data1_node *parent = d1_stack[*d1_level -1];
data1_element *elem = NULL;
- data1_node *partag = get_parent_tag(parent);
+ data1_node *partag = get_parent_tag(spec->dh, parent);
data1_node *res;
data1_element *e = NULL;
int localtag = 0;
if (*d1_level == 0)
{
- logf (LOG_WARN, "In element begin. No record type defined");
+ logf (LOG_WARN, "in element begin. No record type defined");
return ;
}
- res = data1_mk_node (spec->m);
+ res = data1_mk_node (spec->dh, spec->m);
res->parent = parent;
res->which = DATA1N_tag;
res->u.tag.tag = res->lbuf;
res->u.tag.tag[len] = '\0';
#if REGX_DEBUG
- logf (LOG_DEBUG, "Tag begin %s (%d)", res->u.tag.tag, *d1_level);
+ logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level);
#endif
if (parent->which == DATA1N_variant)
return ;
if (!(e = partag->u.tag.element))
localtag = 1;
- elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
- res->u.tag.tag);
+ elem = data1_getelementbytagname (spec->dh, d1_stack[0]->u.root.absyn,
+ e, res->u.tag.tag);
res->u.tag.element = elem;
res->u.tag.node_selected = 0;
break;
}
#if REGX_DEBUG
- logf (LOG_DEBUG, "Tag end (%d)", *d1_level);
+ logf (LOG_DEBUG, "tag end (%d)", *d1_level);
#endif
}
struct DFA_state *state = dfa->states[0];
struct DFA_tran *t;
unsigned char c;
-#if F_WIN_READ
unsigned char c_prev = 0;
-#endif
int ptr = *pptr;
int start_ptr = *pptr;
int last_rule = 0;
while (1)
{
-#if F_WIN_READ
c = f_win_advance (spec, &ptr);
if (ptr == F_WIN_EOF)
{
}
break;
}
-#else
- if (ptr == spec->scan_size)
- {
- if (last_rule)
- {
- *mptr = start_ptr;
- *pptr = last_ptr;
- return 1;
- }
- break;
- }
- c = spec->scan_buf[ptr++];
-#endif
t = state->trans;
i = state->tran_no;
while (1)
}
state = dfa->states[0];
start_ptr = ptr;
-#if F_WIN_READ
c_prev = c;
-#endif
break;
}
else if (c >= t->ch[0] && c <= t->ch[1])
state = dfa->states[t->to];
if (state->rule_no)
{
-#if F_WIN_READ
if (c_prev == '\n')
{
last_rule = state->rule_no;
last_rule = state->rule_nno;
last_ptr = ptr;
}
-#else
- last_rule = state->rule_no;
- last_ptr = ptr;
-#endif
}
break;
}
{
if (n >= arg_no)
n = arg_no-1;
-#if F_WIN_READ
*tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
-#else
- *tokBuf = spec->scan_buf + arg_start[n];
- *tokLen = arg_end[n] - arg_start[n];
-#endif
}
}
else if (*s == '\"')
#if REGX_DEBUG
logf (LOG_DEBUG, "begin record %s", absynName);
#endif
- if (!(absyn = data1_get_absyn (absynName)))
+ if (!(absyn = data1_get_absyn (spec->dh, absynName)))
logf (LOG_WARN, "Unknown tagset: %s", absynName);
else
{
data1_node *res;
- res = data1_mk_node (spec->m);
+ res = data1_mk_node (spec->dh, spec->m);
res->which = DATA1N_root;
res->u.root.type = absynName;
res->u.root.absyn = absyn;
{
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
+#if 0
+ if (*d1_level == 1)
+ {
+ *d1_level = 0;
+ returnCode = 0;
+ }
+#endif
if (r > 2)
{
tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
tagEnd (spec, d1_stack, d1_level, NULL, 0);
}
else
- logf (LOG_WARN, "Missing record/element/variant");
+ logf (LOG_WARN, "missing record/element/variant");
}
else
- logf (LOG_WARN, "Missing record/element/variant");
+ logf (LOG_WARN, "missing record/element/variant");
}
else if (!strcmp (p, "data"))
{
break;
}
else
- logf (LOG_WARN, "Bad data option: %.*s",
+ logf (LOG_WARN, "bad data option: %.*s",
cmd_len, cmd_str);
}
if (r != 2)
{
- logf (LOG_WARN, "Missing data item after data");
+ logf (LOG_WARN, "missing data item after data");
continue;
}
if (element_str)
&cmd_str, &cmd_len);
if (r < 2)
{
- logf (LOG_WARN, "Missing number after -offset");
+ logf (LOG_WARN, "missing number after -offset");
continue;
}
p = regxStrz (cmd_str, cmd_len);
offset = 0;
if (r < 2)
{
- logf (LOG_WARN, "Missing index after unread command");
+ logf (LOG_WARN, "missing index after unread command");
continue;
}
if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
{
- logf (LOG_WARN, "Bad index after unread command");
+ logf (LOG_WARN, "bad index after unread command");
continue;
}
else
}
else
{
- logf (LOG_WARN, "Unknown code command: %.*s", cmd_len, cmd_str);
+ logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str);
r = execTok (spec, &s, arg_no, arg_start, arg_end,
&cmd_str, &cmd_len);
continue;
}
if (r > 1)
{
- logf (LOG_WARN, "Ignoring token %.*s", cmd_len, cmd_str);
+ logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
do {
r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
&cmd_len);
break;
case REGX_END:
arg_start[arg_no] = *pptr;
-#if F_WIN_READ
arg_end[arg_no] = F_WIN_EOF;
-#else
- arg_end[arg_no] = spec->scan_size;
-#endif
arg_no++;
-#if F_WIN_READ
*pptr = F_WIN_EOF;
-#else
- *pptr = spec->scan_size;
-#endif
}
ap = ap->next;
}
struct DFA_state *state = trans->dfa->states[0];
struct DFA_tran *t;
unsigned char c;
-#if F_WIN_READ
unsigned char c_prev = '\n';
-#endif
int i;
int last_rule = 0;
int last_ptr = *ptr;
while (1)
{
-#if F_WIN_READ
c = f_win_advance (spec, ptr);
if (*ptr == F_WIN_EOF)
{
if (*ptr == F_WIN_EOF)
break;
}
-#else
- if (*ptr == spec->scan_size)
- {
- if (last_rule)
- {
- if (skip_ptr < start_ptr)
- {
- execDataP (spec, d1_stack, d1_level,
- spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
- 0);
- }
- *ptr = last_ptr;
- execRule (spec, trans, d1_stack, d1_level, last_rule,
- start_ptr, ptr);
- skip_ptr = *ptr;
- last_rule = 0;
- }
- else if (skip_ptr < *ptr)
- {
- execDataP (spec, d1_stack, d1_level,
- spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
- }
- if (*ptr == spec->scan_size)
- break;
- }
- c = spec->scan_buf[(*ptr)++];
-#endif
t = state->trans;
i = state->tran_no;
while (1)
{
if (skip_ptr < start_ptr)
{
-#if F_WIN_READ
int size;
char *buf;
buf = f_win_get (spec, skip_ptr, start_ptr, &size);
execDataP (spec, d1_stack, d1_level, buf, size, 0);
-#else
- execDataP (spec, d1_stack, d1_level,
- spec->scan_buf + skip_ptr,
- start_ptr - skip_ptr, 0);
-#endif
}
*ptr = last_ptr;
if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
start_ptr, ptr))
{
if (spec->f_win_ef && *ptr != F_WIN_EOF)
+ {
+#if REGX_DEBUG
+ logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr);
+#endif
(*spec->f_win_ef)(spec->f_win_fh, *ptr);
+ }
return NULL;
}
skip_ptr = *ptr;
last_rule = 0;
start_ptr = *ptr;
-#if F_WIN_READ
if (start_ptr > 0)
{
--start_ptr;
c_prev = f_win_advance (spec, &start_ptr);
}
-#endif
}
else
{
-#if F_WIN_READ
c_prev = f_win_advance (spec, &start_ptr);
*ptr = start_ptr;
-#else
- *ptr = ++start_ptr;
-#endif
}
state = trans->dfa->states[0];
break;
state = trans->dfa->states[t->to];
if (state->rule_no)
{
-#if F_WIN_READ
if (c_prev == '\n')
{
last_rule = state->rule_no;
last_rule = state->rule_nno;
last_ptr = *ptr;
}
-#else
- if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
- {
- last_rule = state->rule_no;
- last_ptr = *ptr;
- }
- else if (state->rule_nno)
- {
- last_rule = state->rule_nno;
- last_ptr = *ptr;
- }
-#endif
}
break;
}
}
data1_node *grs_read_regx (struct grs_read_info *p)
-/*
- int (*rf)(void *, char *, size_t),
- off_t (*sf)(void *, off_t),
- void (*ef)(void *, off_t),
- void *fh,
- off_t offset,
- const char *name, NMEM m
-*/
{
int res;
-#if !F_WIN_READ
- static int size;
- int rd = 0;
-#endif
data1_node *n;
#if REGX_DEBUG
- logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
- name);
+ logf (LOG_DEBUG, "grs_read_regx");
#endif
if (!curLexSpec || strcmp (curLexSpec->name, p->type))
{
if (curLexSpec)
lexSpecDel (&curLexSpec);
curLexSpec = lexSpecMk (p->type);
+ curLexSpec->dh = p->dh;
res = readFileSpec (curLexSpec);
if (res)
{
return NULL;
}
}
-#if F_WIN_READ
if (!p->offset)
{
curLexSpec->f_win_start = 0;
curLexSpec->f_win_ef = p->endf;
curLexSpec->f_win_size = 500000;
}
-#else
- if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
- abort();
- do
- {
- if (rd+4096 > size && !(curLexSpec->scan_buf
- = xrealloc (curLexSpec->scan_buf, size *= 2)))
- abort();
- if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
- return NULL;
- rd += res;
- } while (res);
- curLexSpec->scan_size = rd;
-#endif
curLexSpec->m = p->mem;
n = lexRoot (curLexSpec, p->offset);
-#if !F_WIN_READ
- xfree (curLexSpec->scan_buf);
-#endif
return n;
}