2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.4 1997-02-12 20:42:58 adam
8 * Changed some log messages.
10 * Revision 1.3 1996/11/08 14:05:33 adam
11 * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
13 * Revision 1.2 1996/10/29 14:02:09 adam
14 * Doesn't use the global data1_tabpath (from YAZ). Instead the function
15 * data1_get_tabpath is used.
17 * Revision 1.1 1996/10/11 10:57:30 adam
18 * New module recctrl. Used to manage records (extract/retrieval).
20 * Revision 1.24 1996/06/17 14:25:31 adam
21 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
23 * Revision 1.23 1996/06/04 10:19:00 adam
24 * Minor changes - removed include of ctype.h.
26 * Revision 1.22 1996/06/03 15:23:13 adam
27 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
29 * Revision 1.21 1996/05/14 16:58:38 adam
32 * Revision 1.20 1996/05/01 13:46:36 adam
33 * First work on multiple records in one file.
34 * New option, -offset, to the "unread" command in the filter module.
36 * Revision 1.19 1996/02/12 16:18:20 adam
37 * Yet another bug fix in implementation of unread command.
39 * Revision 1.18 1996/02/12 16:07:54 adam
40 * Bug fix in new unread command.
42 * Revision 1.17 1996/02/12 15:56:11 adam
43 * New code command: unread.
45 * Revision 1.16 1996/01/17 14:57:51 adam
46 * Prototype changed for reader functions in extract/retrieve. File
47 * is identified by 'void *' instead of 'int.
49 * Revision 1.15 1996/01/08 19:15:47 adam
50 * New input filter that works!
52 * Revision 1.14 1996/01/08 09:10:38 adam
53 * Yet another complete rework on this module.
55 * Revision 1.13 1995/12/15 17:21:50 adam
56 * This version is able to set data.formatted_text in data1-nodes.
58 * Revision 1.12 1995/12/15 16:20:10 adam
59 * The filter files (*.flt) are read from the path given by data1_tabpath.
61 * Revision 1.11 1995/12/15 12:35:16 adam
64 * Revision 1.10 1995/12/15 10:35:36 adam
67 * Revision 1.9 1995/12/14 16:38:48 adam
68 * Completely new attempt to make regular expression parsing.
70 * Revision 1.8 1995/12/13 17:16:59 adam
73 * Revision 1.7 1995/12/13 16:51:58 adam
74 * Modified to set last_child in data1_nodes.
75 * Uses destroy handler to free up data text nodes.
77 * Revision 1.6 1995/12/13 13:45:37 quinn
78 * Changed data1 to use nmem.
80 * Revision 1.5 1995/12/11 09:12:52 adam
81 * The rec_get function returns NULL if record doesn't exist - will
82 * happen in the server if the result set records have been deleted since
83 * the creation of the set (i.e. the search).
84 * The server saves a result temporarily if it is 'volatile', i.e. the
85 * set is register dependent.
87 * Revision 1.4 1995/12/05 16:57:40 adam
88 * More work on regular patterns.
90 * Revision 1.3 1995/12/05 09:37:09 adam
91 * One malloc was renamed to xmalloc.
93 * Revision 1.2 1995/12/04 17:59:24 adam
94 * More work on regular expression conversion.
96 * Revision 1.1 1995/12/04 14:25:30 adam
97 * Started work on regular expression parsed input to structured records.
105 #include <zebrautl.h>
111 #define F_WIN_EOF 2000000000
115 #define REGX_PATTERN 1
125 struct lexRuleAction {
129 struct DFA *dfa; /* REGX_PATTERN */
132 struct regxCode *code; /* REGX_CODE */
134 struct lexRuleAction *next;
139 struct lexRuleAction *actionList;
143 struct lexRuleInfo info;
144 struct lexRule *next;
149 struct lexRule *rules;
150 struct lexRuleInfo **fastRule;
156 struct lexTrans trans;
160 void (*f_win_ef)(void *, off_t);
166 int (*f_win_rf)(void *, char *, size_t);
167 off_t (*f_win_sf)(void *, off_t);
172 struct lexRuleAction *beginActionList;
173 struct lexRuleAction *endActionList;
177 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
182 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
184 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
185 spec->f_win_start = start_pos;
187 if (!spec->f_win_buf)
188 spec->f_win_buf = xmalloc (spec->f_win_size);
189 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
191 spec->f_win_end = spec->f_win_start + *size;
193 if (*size > end_pos - start_pos)
194 *size = end_pos - start_pos;
195 return spec->f_win_buf;
197 if (end_pos <= spec->f_win_end)
199 *size = end_pos - start_pos;
200 return spec->f_win_buf + (start_pos - spec->f_win_start);
202 off = start_pos - spec->f_win_start;
203 for (i = 0; i<spec->f_win_end - start_pos; i++)
204 spec->f_win_buf[i] = spec->f_win_buf[i + off];
205 r = (*spec->f_win_rf)(spec->f_win_fh,
207 spec->f_win_size - i);
208 spec->f_win_start = start_pos;
209 spec->f_win_end += r;
211 if (*size > end_pos - start_pos)
212 *size = end_pos - start_pos;
213 return spec->f_win_buf;
216 static int f_win_advance (struct lexSpec *spec, int *pos)
221 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
222 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
223 if (*pos == F_WIN_EOF)
225 buf = f_win_get (spec, *pos, *pos+1, &size);
236 static void regxCodeDel (struct regxCode **pp)
238 struct regxCode *p = *pp;
247 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
251 p = xmalloc (sizeof(*p));
252 p->str = xmalloc (len+1);
253 memcpy (p->str, buf, len);
258 static struct DFA *lexSpecDFA (void)
263 dfa_parse_cmap_del (dfa, ' ');
264 dfa_parse_cmap_del (dfa, '\t');
265 dfa_parse_cmap_add (dfa, '/', 0);
269 static struct lexSpec *lexSpecMk (const char *name)
273 p = xmalloc (sizeof(*p));
274 p->name = xmalloc (strlen(name)+1);
275 strcpy (p->name, name);
276 p->trans.dfa = lexSpecDFA ();
277 p->trans.rules = NULL;
278 p->trans.fastRule = NULL;
279 p->beginActionList = NULL;
280 p->endActionList = NULL;
287 static void actionListDel (struct lexRuleAction **rap)
289 struct lexRuleAction *ra1, *ra;
291 for (ra = *rap; ra; ra = ra1)
297 dfa_delete (&ra->u.pattern.dfa);
300 regxCodeDel (&ra->u.code);
308 static void lexSpecDel (struct lexSpec **pp)
311 struct lexRule *rp, *rp1;
317 dfa_delete (&p->trans.dfa);
319 xfree (p->trans.fastRule);
320 for (rp = p->trans.rules; rp; rp = rp1)
322 actionListDel (&rp->info.actionList);
325 actionListDel (&p->beginActionList);
326 actionListDel (&p->endActionList);
328 xfree (p->f_win_buf);
334 static int readParseToken (const char **cpp, int *len)
336 const char *cp = *cpp;
340 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
369 if (*cp >= 'a' && *cp <= 'z')
371 else if (*cp >= 'A' && *cp <= 'Z')
372 cmd[i] = *cp + 'a' - 'A';
375 if (i > sizeof(cmd)-2)
383 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
385 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
391 if (!strcmp (cmd, "begin"))
393 else if (!strcmp (cmd, "end"))
395 else if (!strcmp (cmd, "body"))
399 logf (LOG_WARN, "bad command %s", cmd);
405 static int actionListMk (struct lexSpec *spec, const char *s,
406 struct lexRuleAction **ap)
411 while ((tok = readParseToken (&s, &len)))
419 *ap = xmalloc (sizeof(**ap));
421 regxCodeMk (&(*ap)->u.code, s, len);
425 *ap = xmalloc (sizeof(**ap));
427 (*ap)->u.pattern.body = bodyMark;
429 (*ap)->u.pattern.dfa = lexSpecDFA ();
430 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
435 logf (LOG_WARN, "regular expression error. r=%d", r);
438 dfa_mkstate ((*ap)->u.pattern.dfa);
442 logf (LOG_WARN, "cannot use begin here");
445 *ap = xmalloc (sizeof(**ap));
455 int readOneSpec (struct lexSpec *spec, const char *s)
459 tok = readParseToken (&s, &len);
460 if (tok == REGX_BEGIN)
462 actionListDel (&spec->beginActionList);
463 actionListMk (spec, s, &spec->beginActionList);
465 else if (tok == REGX_END)
467 actionListDel (&spec->endActionList);
468 actionListMk (spec, s, &spec->endActionList);
470 else if (tok == REGX_PATTERN)
474 r = dfa_parse (spec->trans.dfa, &s);
477 logf (LOG_WARN, "regular expression error. r=%d", r);
482 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
486 rp = xmalloc (sizeof(*rp));
487 rp->info.no = spec->trans.ruleNo++;
488 rp->next = spec->trans.rules;
489 spec->trans.rules = rp;
490 actionListMk (spec, s, &rp->info.actionList);
495 int readFileSpec (struct lexSpec *spec)
500 int c, i, errors = 0;
503 lineBuf = xmalloc (1+lineSize);
504 logf (LOG_LOG, "reading regx filter %s.flt", spec->name);
505 sprintf (lineBuf, "%s.flt", spec->name);
506 if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(), lineBuf, "r")))
508 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
513 spec->trans.ruleNo = 1;
518 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
520 while (c != '\n' && c != EOF)
539 if (c != ' ' && c != '\t')
548 readOneSpec (spec, lineBuf);
549 spec->lineNo += addLine;
554 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
556 for (i = 0; i<spec->trans.ruleNo; i++)
557 spec->trans.fastRule[i] = NULL;
558 for (rp = spec->trans.rules; rp; rp = rp->next)
559 spec->trans.fastRule[rp->info.no] = &rp->info;
565 debug_dfa_followpos = 1;
568 dfa_mkstate (spec->trans.dfa);
572 static struct lexSpec *curLexSpec = NULL;
574 static void destroy_data (struct data1_node *n)
576 assert (n->which == DATA1N_data);
577 xfree (n->u.data.data);
580 static void execData (struct lexSpec *spec,
581 data1_node **d1_stack, int *d1_level,
582 const char *ebuf, int elen, int formatted_text)
584 struct data1_node *res, *parent;
588 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
590 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
592 logf (LOG_DEBUG, "execData len=%d", elen);
598 parent = d1_stack[*d1_level -1];
600 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
602 if (elen + res->u.data.len <= DATA1_LOCALDATA)
603 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
606 char *nb = xmalloc (elen + res->u.data.len);
607 memcpy (nb, res->u.data.data, res->u.data.len);
608 memcpy (nb + res->u.data.len, ebuf, elen);
609 if (res->u.data.len > DATA1_LOCALDATA)
610 xfree (res->u.data.data);
611 res->u.data.data = nb;
612 res->destroy = destroy_data;
614 res->u.data.len += elen;
618 res = data1_mk_node (spec->m);
619 res->parent = parent;
620 res->which = DATA1N_data;
621 res->u.data.what = DATA1I_text;
622 res->u.data.len = elen;
623 res->u.data.formatted_text = formatted_text;
624 if (elen > DATA1_LOCALDATA)
626 res->u.data.data = xmalloc (elen);
627 res->destroy = destroy_data;
630 res->u.data.data = res->lbuf;
631 memcpy (res->u.data.data, ebuf, elen);
632 res->root = parent->root;
634 parent->num_children++;
635 parent->last_child = res;
636 if (d1_stack[*d1_level])
637 d1_stack[*d1_level]->next = res;
640 d1_stack[*d1_level] = res;
644 static void execDataP (struct lexSpec *spec,
645 data1_node **d1_stack, int *d1_level,
646 const char *ebuf, int elen, int formatted_text)
648 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
652 static void tagBegin (struct lexSpec *spec,
653 data1_node **d1_stack, int *d1_level,
654 const char *tag, int len)
656 struct data1_node *parent = d1_stack[*d1_level -1];
657 data1_element *elem = NULL;
658 data1_node *partag = get_parent_tag(parent);
660 data1_element *e = NULL;
665 logf (LOG_WARN, "in element begin. No record type defined");
669 res = data1_mk_node (spec->m);
670 res->parent = parent;
671 res->which = DATA1N_tag;
672 res->u.tag.tag = res->lbuf;
673 res->u.tag.get_bytes = -1;
675 if (len >= DATA1_LOCALDATA)
676 len = DATA1_LOCALDATA-1;
678 memcpy (res->u.tag.tag, tag, len);
679 res->u.tag.tag[len] = '\0';
682 logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level);
684 if (parent->which == DATA1N_variant)
687 if (!(e = partag->u.tag.element))
690 elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
693 res->u.tag.element = elem;
694 res->u.tag.node_selected = 0;
695 res->u.tag.make_variantlist = 0;
696 res->u.tag.no_data_requested = 0;
697 res->root = parent->root;
698 parent->num_children++;
699 parent->last_child = res;
700 if (d1_stack[*d1_level])
701 d1_stack[*d1_level]->next = res;
704 d1_stack[*d1_level] = res;
705 d1_stack[++(*d1_level)] = NULL;
708 static void tagEnd (struct lexSpec *spec,
709 data1_node **d1_stack, int *d1_level,
710 const char *tag, int len)
712 while (*d1_level > 1)
716 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
717 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
721 logf (LOG_DEBUG, "tag end (%d)", *d1_level);
726 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
729 struct DFA_state *state = dfa->states[0];
733 unsigned char c_prev = 0;
736 int start_ptr = *pptr;
744 c = f_win_advance (spec, &ptr);
745 if (ptr == F_WIN_EOF)
756 if (ptr == spec->scan_size)
766 c = spec->scan_buf[ptr++];
775 *mptr = start_ptr; /* match starts here */
776 *pptr = last_ptr; /* match end here (+1) */
779 state = dfa->states[0];
786 else if (c >= t->ch[0] && c <= t->ch[1])
788 state = dfa->states[t->to];
794 last_rule = state->rule_no;
799 last_rule = state->rule_nno;
803 last_rule = state->rule_no;
815 static int execTok (struct lexSpec *spec, const char **src,
816 int arg_no, int *arg_start, int *arg_end,
817 const char **tokBuf, int *tokLen)
819 const char *s = *src;
821 while (*s == ' ' || *s == '\t')
825 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
829 while (*s >= '0' && *s <= '9')
830 n = n*10 + (*s++ -'0');
841 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
843 *tokBuf = spec->scan_buf + arg_start[n];
844 *tokLen = arg_end[n] - arg_start[n];
851 while (*s && *s != '\"')
853 *tokLen = s - *tokBuf;
858 else if (*s == '\n' || *s == ';')
866 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
868 *tokLen = s - *tokBuf;
875 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
877 *tokLen = s - *tokBuf;
883 static char *regxStrz (const char *src, int len)
889 memcpy (str, src, len);
894 static int execCode (struct lexSpec *spec,
895 int arg_no, int *arg_start, int *arg_end, int *pptr,
896 struct regxCode *code,
897 data1_node **d1_stack, int *d1_level)
899 const char *s = code->str;
904 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
911 r = execTok (spec, &s, arg_no, arg_start, arg_end,
915 p = regxStrz (cmd_str, cmd_len);
916 if (!strcmp (p, "begin"))
918 r = execTok (spec, &s, arg_no, arg_start, arg_end,
922 p = regxStrz (cmd_str, cmd_len);
923 if (!strcmp (p, "record"))
925 r = execTok (spec, &s, arg_no, arg_start, arg_end,
931 static char absynName[64];
936 memcpy (absynName, cmd_str, cmd_len);
937 absynName[cmd_len] = '\0';
940 logf (LOG_DEBUG, "begin record %s", absynName);
942 if (!(absyn = data1_get_absyn (absynName)))
943 logf (LOG_WARN, "Unknown tagset: %s", absynName);
948 res = data1_mk_node (spec->m);
949 res->which = DATA1N_root;
950 res->u.root.type = absynName;
951 res->u.root.absyn = absyn;
954 d1_stack[*d1_level] = res;
955 d1_stack[++(*d1_level)] = NULL;
958 r = execTok (spec, &s, arg_no, arg_start, arg_end,
961 else if (!strcmp (p, "element"))
963 r = execTok (spec, &s, arg_no, arg_start, arg_end,
967 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
968 r = execTok (spec, &s, arg_no, arg_start, arg_end,
972 else if (!strcmp (p, "end"))
974 r = execTok (spec, &s, arg_no, arg_start, arg_end,
978 p = regxStrz (cmd_str, cmd_len);
979 if (!strcmp (p, "record"))
982 r = execTok (spec, &s, arg_no, arg_start, arg_end,
985 logf (LOG_DEBUG, "end record");
989 else if (!strcmp (p, "element"))
991 r = execTok (spec, &s, arg_no, arg_start, arg_end,
995 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
996 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1000 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1003 logf (LOG_WARN, "missing record/element/variant");
1006 logf (LOG_WARN, "missing record/element/variant");
1008 else if (!strcmp (p, "data"))
1012 const char *element_str = NULL;
1014 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1015 &cmd_str, &cmd_len)) == 3)
1017 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1019 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1021 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1022 &element_str, &element_len);
1027 logf (LOG_WARN, "bad data option: %.*s",
1032 logf (LOG_WARN, "missing data item after data");
1036 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1039 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1041 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1042 &cmd_str, &cmd_len);
1045 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1047 else if (!strcmp (p, "unread"))
1050 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1051 &cmd_str, &cmd_len);
1052 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1054 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1055 &cmd_str, &cmd_len);
1058 logf (LOG_WARN, "missing number after -offset");
1061 p = regxStrz (cmd_str, cmd_len);
1063 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1064 &cmd_str, &cmd_len);
1070 logf (LOG_WARN, "missing index after unread command");
1073 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1075 logf (LOG_WARN, "bad index after unread command");
1080 no = *cmd_str - '0';
1083 *pptr = arg_start[no] + offset;
1085 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1086 &cmd_str, &cmd_len);
1090 logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str);
1091 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1092 &cmd_str, &cmd_len);
1097 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1099 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1108 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1109 data1_node **d1_stack, int *d1_level,
1110 int start_ptr, int *pptr)
1117 arg_start[0] = start_ptr;
1125 if (ap->u.pattern.body)
1127 arg_start[arg_no] = *pptr;
1128 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1130 arg_end[arg_no] = F_WIN_EOF;
1132 arg_start[arg_no] = F_WIN_EOF;
1133 arg_end[arg_no] = F_WIN_EOF;
1138 arg_end[arg_no] = sptr;
1140 arg_start[arg_no] = sptr;
1141 arg_end[arg_no] = *pptr;
1146 arg_start[arg_no] = *pptr;
1147 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1149 if (sptr != arg_start[arg_no])
1151 arg_end[arg_no] = *pptr;
1156 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1157 ap->u.code, d1_stack, d1_level))
1161 arg_start[arg_no] = *pptr;
1163 arg_end[arg_no] = F_WIN_EOF;
1165 arg_end[arg_no] = spec->scan_size;
1171 *pptr = spec->scan_size;
1179 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1180 data1_node **d1_stack, int *d1_level,
1181 int ruleNo, int start_ptr, int *pptr)
1184 logf (LOG_DEBUG, "execRule %d", ruleNo);
1186 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1187 d1_stack, d1_level, start_ptr, pptr);
1190 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1191 data1_node **d1_stack, int *d1_level,
1194 struct DFA_state *state = trans->dfa->states[0];
1198 unsigned char c_prev = '\n';
1202 int last_ptr = *ptr;
1203 int start_ptr = *ptr;
1204 int skip_ptr = *ptr;
1209 c = f_win_advance (spec, ptr);
1210 if (*ptr == F_WIN_EOF)
1214 if (skip_ptr < start_ptr)
1218 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1219 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1222 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1228 else if (skip_ptr < *ptr)
1232 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1233 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1235 if (*ptr == F_WIN_EOF)
1239 if (*ptr == spec->scan_size)
1243 if (skip_ptr < start_ptr)
1245 execDataP (spec, d1_stack, d1_level,
1246 spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
1250 execRule (spec, trans, d1_stack, d1_level, last_rule,
1255 else if (skip_ptr < *ptr)
1257 execDataP (spec, d1_stack, d1_level,
1258 spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
1260 if (*ptr == spec->scan_size)
1263 c = spec->scan_buf[(*ptr)++];
1269 { /* no transition for character c ... */
1272 if (skip_ptr < start_ptr)
1277 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1278 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1280 execDataP (spec, d1_stack, d1_level,
1281 spec->scan_buf + skip_ptr,
1282 start_ptr - skip_ptr, 0);
1286 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1289 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1290 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1300 c_prev = f_win_advance (spec, &start_ptr);
1307 c_prev = f_win_advance (spec, &start_ptr);
1313 state = trans->dfa->states[0];
1316 else if (c >= t->ch[0] && c <= t->ch[1])
1317 { /* transition ... */
1318 state = trans->dfa->states[t->to];
1324 last_rule = state->rule_no;
1327 else if (state->rule_nno)
1329 last_rule = state->rule_nno;
1333 if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
1335 last_rule = state->rule_no;
1338 else if (state->rule_nno)
1340 last_rule = state->rule_nno;
1353 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1355 data1_node *d1_stack[512];
1359 d1_stack[d1_level] = NULL;
1360 if (spec->beginActionList)
1361 execAction (spec, spec->beginActionList,
1362 d1_stack, &d1_level, 0, &ptr);
1363 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1364 if (spec->endActionList)
1365 execAction (spec, spec->endActionList,
1366 d1_stack, &d1_level, ptr, &ptr);
1370 data1_node *grs_read_regx (struct grs_read_info *p)
1372 int (*rf)(void *, char *, size_t),
1373 off_t (*sf)(void *, off_t),
1374 void (*ef)(void *, off_t),
1377 const char *name, NMEM m
1388 logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
1391 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1394 lexSpecDel (&curLexSpec);
1395 curLexSpec = lexSpecMk (p->type);
1396 res = readFileSpec (curLexSpec);
1399 lexSpecDel (&curLexSpec);
1406 curLexSpec->f_win_start = 0;
1407 curLexSpec->f_win_end = 0;
1408 curLexSpec->f_win_rf = p->readf;
1409 curLexSpec->f_win_sf = p->seekf;
1410 curLexSpec->f_win_fh = p->fh;
1411 curLexSpec->f_win_ef = p->endf;
1412 curLexSpec->f_win_size = 500000;
1415 if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
1419 if (rd+4096 > size && !(curLexSpec->scan_buf
1420 = xrealloc (curLexSpec->scan_buf, size *= 2)))
1422 if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
1426 curLexSpec->scan_size = rd;
1428 curLexSpec->m = p->mem;
1429 n = lexRoot (curLexSpec, p->offset);
1431 xfree (curLexSpec->scan_buf);