2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.2 1996-10-29 14:02:09 adam
8 * Doesn't use the global data1_tabpath (from YAZ). Instead the function
9 * data1_get_tabpath is used.
11 * Revision 1.1 1996/10/11 10:57:30 adam
12 * New module recctrl. Used to manage records (extract/retrieval).
14 * Revision 1.24 1996/06/17 14:25:31 adam
15 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
17 * Revision 1.23 1996/06/04 10:19:00 adam
18 * Minor changes - removed include of ctype.h.
20 * Revision 1.22 1996/06/03 15:23:13 adam
21 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
23 * Revision 1.21 1996/05/14 16:58:38 adam
26 * Revision 1.20 1996/05/01 13:46:36 adam
27 * First work on multiple records in one file.
28 * New option, -offset, to the "unread" command in the filter module.
30 * Revision 1.19 1996/02/12 16:18:20 adam
31 * Yet another bug fix in implementation of unread command.
33 * Revision 1.18 1996/02/12 16:07:54 adam
34 * Bug fix in new unread command.
36 * Revision 1.17 1996/02/12 15:56:11 adam
37 * New code command: unread.
39 * Revision 1.16 1996/01/17 14:57:51 adam
40 * Prototype changed for reader functions in extract/retrieve. File
41 * is identified by 'void *' instead of 'int.
43 * Revision 1.15 1996/01/08 19:15:47 adam
44 * New input filter that works!
46 * Revision 1.14 1996/01/08 09:10:38 adam
47 * Yet another complete rework on this module.
49 * Revision 1.13 1995/12/15 17:21:50 adam
50 * This version is able to set data.formatted_text in data1-nodes.
52 * Revision 1.12 1995/12/15 16:20:10 adam
53 * The filter files (*.flt) are read from the path given by data1_tabpath.
55 * Revision 1.11 1995/12/15 12:35:16 adam
58 * Revision 1.10 1995/12/15 10:35:36 adam
61 * Revision 1.9 1995/12/14 16:38:48 adam
62 * Completely new attempt to make regular expression parsing.
64 * Revision 1.8 1995/12/13 17:16:59 adam
67 * Revision 1.7 1995/12/13 16:51:58 adam
68 * Modified to set last_child in data1_nodes.
69 * Uses destroy handler to free up data text nodes.
71 * Revision 1.6 1995/12/13 13:45:37 quinn
72 * Changed data1 to use nmem.
74 * Revision 1.5 1995/12/11 09:12:52 adam
75 * The rec_get function returns NULL if record doesn't exist - will
76 * happen in the server if the result set records have been deleted since
77 * the creation of the set (i.e. the search).
78 * The server saves a result temporarily if it is 'volatile', i.e. the
79 * set is register dependent.
81 * Revision 1.4 1995/12/05 16:57:40 adam
82 * More work on regular patterns.
84 * Revision 1.3 1995/12/05 09:37:09 adam
85 * One malloc was renamed to xmalloc.
87 * Revision 1.2 1995/12/04 17:59:24 adam
88 * More work on regular expression conversion.
90 * Revision 1.1 1995/12/04 14:25:30 adam
91 * Started work on regular expression parsed input to structured records.
105 #define F_WIN_EOF 2000000000
109 #define REGX_PATTERN 1
119 struct lexRuleAction {
123 struct DFA *dfa; /* REGX_PATTERN */
126 struct regxCode *code; /* REGX_CODE */
128 struct lexRuleAction *next;
133 struct lexRuleAction *actionList;
137 struct lexRuleInfo info;
138 struct lexRule *next;
143 struct lexRule *rules;
144 struct lexRuleInfo **fastRule;
150 struct lexTrans trans;
154 void (*f_win_ef)(void *, off_t);
160 int (*f_win_rf)(void *, char *, size_t);
161 off_t (*f_win_sf)(void *, off_t);
166 struct lexRuleAction *beginActionList;
167 struct lexRuleAction *endActionList;
171 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
176 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
178 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
179 spec->f_win_start = start_pos;
181 if (!spec->f_win_buf)
182 spec->f_win_buf = xmalloc (spec->f_win_size);
183 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
185 spec->f_win_end = spec->f_win_start + *size;
187 if (*size > end_pos - start_pos)
188 *size = end_pos - start_pos;
189 return spec->f_win_buf;
191 if (end_pos <= spec->f_win_end)
193 *size = end_pos - start_pos;
194 return spec->f_win_buf + (start_pos - spec->f_win_start);
196 off = start_pos - spec->f_win_start;
197 for (i = 0; i<spec->f_win_end - start_pos; i++)
198 spec->f_win_buf[i] = spec->f_win_buf[i + off];
199 r = (*spec->f_win_rf)(spec->f_win_fh,
201 spec->f_win_size - i);
202 spec->f_win_start = start_pos;
203 spec->f_win_end += r;
205 if (*size > end_pos - start_pos)
206 *size = end_pos - start_pos;
207 return spec->f_win_buf;
210 static int f_win_advance (struct lexSpec *spec, int *pos)
215 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
216 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
217 if (*pos == F_WIN_EOF)
219 buf = f_win_get (spec, *pos, *pos+1, &size);
230 static void regxCodeDel (struct regxCode **pp)
232 struct regxCode *p = *pp;
241 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
245 p = xmalloc (sizeof(*p));
246 p->str = xmalloc (len+1);
247 memcpy (p->str, buf, len);
252 static struct DFA *lexSpecDFA (void)
257 dfa_parse_cmap_del (dfa, ' ');
258 dfa_parse_cmap_del (dfa, '\t');
259 dfa_parse_cmap_add (dfa, '/', 0);
263 static struct lexSpec *lexSpecMk (const char *name)
267 p = xmalloc (sizeof(*p));
268 p->name = xmalloc (strlen(name)+1);
269 strcpy (p->name, name);
270 p->trans.dfa = lexSpecDFA ();
271 p->trans.rules = NULL;
272 p->trans.fastRule = NULL;
273 p->beginActionList = NULL;
274 p->endActionList = NULL;
281 static void actionListDel (struct lexRuleAction **rap)
283 struct lexRuleAction *ra1, *ra;
285 for (ra = *rap; ra; ra = ra1)
291 dfa_delete (&ra->u.pattern.dfa);
294 regxCodeDel (&ra->u.code);
302 static void lexSpecDel (struct lexSpec **pp)
305 struct lexRule *rp, *rp1;
311 dfa_delete (&p->trans.dfa);
313 xfree (p->trans.fastRule);
314 for (rp = p->trans.rules; rp; rp = rp1)
316 actionListDel (&rp->info.actionList);
319 actionListDel (&p->beginActionList);
320 actionListDel (&p->endActionList);
322 xfree (p->f_win_buf);
328 static int readParseToken (const char **cpp, int *len)
330 const char *cp = *cpp;
334 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
363 if (*cp >= 'a' && *cp <= 'z')
365 else if (*cp >= 'A' && *cp <= 'Z')
366 cmd[i] = *cp + 'a' - 'A';
369 if (i > sizeof(cmd)-2)
377 logf (LOG_WARN, "Bad character %d %c", *cp, *cp);
379 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
385 if (!strcmp (cmd, "begin"))
387 else if (!strcmp (cmd, "end"))
389 else if (!strcmp (cmd, "body"))
393 logf (LOG_WARN, "Bad command %s", cmd);
399 static int actionListMk (struct lexSpec *spec, const char *s,
400 struct lexRuleAction **ap)
405 while ((tok = readParseToken (&s, &len)))
413 *ap = xmalloc (sizeof(**ap));
415 regxCodeMk (&(*ap)->u.code, s, len);
419 *ap = xmalloc (sizeof(**ap));
421 (*ap)->u.pattern.body = bodyMark;
423 (*ap)->u.pattern.dfa = lexSpecDFA ();
424 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
429 logf (LOG_WARN, "Regular expression error. r=%d", r);
432 dfa_mkstate ((*ap)->u.pattern.dfa);
436 logf (LOG_WARN, "Cannot use begin here");
439 *ap = xmalloc (sizeof(**ap));
449 int readOneSpec (struct lexSpec *spec, const char *s)
453 tok = readParseToken (&s, &len);
454 if (tok == REGX_BEGIN)
456 actionListDel (&spec->beginActionList);
457 actionListMk (spec, s, &spec->beginActionList);
459 else if (tok == REGX_END)
461 actionListDel (&spec->endActionList);
462 actionListMk (spec, s, &spec->endActionList);
464 else if (tok == REGX_PATTERN)
468 r = dfa_parse (spec->trans.dfa, &s);
471 logf (LOG_WARN, "Regular expression error. r=%d", r);
476 logf (LOG_WARN, "Expects / at end of pattern. got %c", *s);
480 rp = xmalloc (sizeof(*rp));
481 rp->info.no = spec->trans.ruleNo++;
482 rp->next = spec->trans.rules;
483 spec->trans.rules = rp;
484 actionListMk (spec, s, &rp->info.actionList);
489 int readFileSpec (struct lexSpec *spec)
494 int c, i, errors = 0;
497 lineBuf = xmalloc (1+lineSize);
498 logf (LOG_LOG, "Reading spec %s", spec->name);
499 sprintf (lineBuf, "%s.flt", spec->name);
500 if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(), lineBuf, "r")))
502 logf (LOG_ERRNO|LOG_WARN, "Cannot read spec file %s", spec->name);
507 spec->trans.ruleNo = 1;
512 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
514 while (c != '\n' && c != EOF)
533 if (c != ' ' && c != '\t')
542 readOneSpec (spec, lineBuf);
543 spec->lineNo += addLine;
548 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
550 for (i = 0; i<spec->trans.ruleNo; i++)
551 spec->trans.fastRule[i] = NULL;
552 for (rp = spec->trans.rules; rp; rp = rp->next)
553 spec->trans.fastRule[rp->info.no] = &rp->info;
559 debug_dfa_followpos = 1;
562 dfa_mkstate (spec->trans.dfa);
566 static struct lexSpec *curLexSpec = NULL;
568 static void destroy_data (struct data1_node *n)
570 assert (n->which == DATA1N_data);
571 xfree (n->u.data.data);
574 static void execData (struct lexSpec *spec,
575 data1_node **d1_stack, int *d1_level,
576 const char *ebuf, int elen, int formatted_text)
578 struct data1_node *res, *parent;
582 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
584 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
586 logf (LOG_DEBUG, "execData len=%d", elen);
592 parent = d1_stack[*d1_level -1];
594 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
596 if (elen + res->u.data.len <= DATA1_LOCALDATA)
597 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
600 char *nb = xmalloc (elen + res->u.data.len);
601 memcpy (nb, res->u.data.data, res->u.data.len);
602 memcpy (nb + res->u.data.len, ebuf, elen);
603 if (res->u.data.len > DATA1_LOCALDATA)
604 xfree (res->u.data.data);
605 res->u.data.data = nb;
606 res->destroy = destroy_data;
608 res->u.data.len += elen;
612 res = data1_mk_node (spec->m);
613 res->parent = parent;
614 res->which = DATA1N_data;
615 res->u.data.what = DATA1I_text;
616 res->u.data.len = elen;
617 res->u.data.formatted_text = formatted_text;
618 if (elen > DATA1_LOCALDATA)
620 res->u.data.data = xmalloc (elen);
621 res->destroy = destroy_data;
624 res->u.data.data = res->lbuf;
625 memcpy (res->u.data.data, ebuf, elen);
626 res->root = parent->root;
628 parent->num_children++;
629 parent->last_child = res;
630 if (d1_stack[*d1_level])
631 d1_stack[*d1_level]->next = res;
634 d1_stack[*d1_level] = res;
638 static void execDataP (struct lexSpec *spec,
639 data1_node **d1_stack, int *d1_level,
640 const char *ebuf, int elen, int formatted_text)
642 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
646 static void tagBegin (struct lexSpec *spec,
647 data1_node **d1_stack, int *d1_level,
648 const char *tag, int len)
650 struct data1_node *parent = d1_stack[*d1_level -1];
651 data1_element *elem = NULL;
652 data1_node *partag = get_parent_tag(parent);
654 data1_element *e = NULL;
659 logf (LOG_WARN, "In element begin. No record type defined");
663 res = data1_mk_node (spec->m);
664 res->parent = parent;
665 res->which = DATA1N_tag;
666 res->u.tag.tag = res->lbuf;
668 if (len >= DATA1_LOCALDATA)
669 len = DATA1_LOCALDATA-1;
671 memcpy (res->u.tag.tag, tag, len);
672 res->u.tag.tag[len] = '\0';
675 logf (LOG_DEBUG, "Tag begin %s (%d)", res->u.tag.tag, *d1_level);
677 if (parent->which == DATA1N_variant)
680 if (!(e = partag->u.tag.element))
683 elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
686 res->u.tag.element = elem;
687 res->u.tag.node_selected = 0;
688 res->u.tag.make_variantlist = 0;
689 res->u.tag.no_data_requested = 0;
690 res->root = parent->root;
691 parent->num_children++;
692 parent->last_child = res;
693 if (d1_stack[*d1_level])
694 d1_stack[*d1_level]->next = res;
697 d1_stack[*d1_level] = res;
698 d1_stack[++(*d1_level)] = NULL;
701 static void tagEnd (struct lexSpec *spec,
702 data1_node **d1_stack, int *d1_level,
703 const char *tag, int len)
705 while (*d1_level > 1)
709 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
710 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
714 logf (LOG_DEBUG, "Tag end (%d)", *d1_level);
719 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
722 struct DFA_state *state = dfa->states[0];
726 unsigned char c_prev = 0;
729 int start_ptr = *pptr;
737 c = f_win_advance (spec, &ptr);
738 if (ptr == F_WIN_EOF)
749 if (ptr == spec->scan_size)
759 c = spec->scan_buf[ptr++];
768 *mptr = start_ptr; /* match starts here */
769 *pptr = last_ptr; /* match end here (+1) */
772 state = dfa->states[0];
779 else if (c >= t->ch[0] && c <= t->ch[1])
781 state = dfa->states[t->to];
787 last_rule = state->rule_no;
792 last_rule = state->rule_nno;
796 last_rule = state->rule_no;
808 static int execTok (struct lexSpec *spec, const char **src,
809 int arg_no, int *arg_start, int *arg_end,
810 const char **tokBuf, int *tokLen)
812 const char *s = *src;
814 while (*s == ' ' || *s == '\t')
818 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
822 while (*s >= '0' && *s <= '9')
823 n = n*10 + (*s++ -'0');
834 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
836 *tokBuf = spec->scan_buf + arg_start[n];
837 *tokLen = arg_end[n] - arg_start[n];
844 while (*s && *s != '\"')
846 *tokLen = s - *tokBuf;
851 else if (*s == '\n' || *s == ';')
859 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
861 *tokLen = s - *tokBuf;
868 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
870 *tokLen = s - *tokBuf;
876 static char *regxStrz (const char *src, int len)
882 memcpy (str, src, len);
887 static int execCode (struct lexSpec *spec,
888 int arg_no, int *arg_start, int *arg_end, int *pptr,
889 struct regxCode *code,
890 data1_node **d1_stack, int *d1_level)
892 const char *s = code->str;
897 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
904 r = execTok (spec, &s, arg_no, arg_start, arg_end,
908 p = regxStrz (cmd_str, cmd_len);
909 if (!strcmp (p, "begin"))
911 r = execTok (spec, &s, arg_no, arg_start, arg_end,
915 p = regxStrz (cmd_str, cmd_len);
916 if (!strcmp (p, "record"))
918 r = execTok (spec, &s, arg_no, arg_start, arg_end,
924 static char absynName[64];
929 memcpy (absynName, cmd_str, cmd_len);
930 absynName[cmd_len] = '\0';
933 logf (LOG_DEBUG, "begin record %s", absynName);
935 if (!(absyn = data1_get_absyn (absynName)))
936 logf (LOG_WARN, "Unknown tagset: %s", absynName);
941 res = data1_mk_node (spec->m);
942 res->which = DATA1N_root;
943 res->u.root.type = absynName;
944 res->u.root.absyn = absyn;
947 d1_stack[*d1_level] = res;
948 d1_stack[++(*d1_level)] = NULL;
951 r = execTok (spec, &s, arg_no, arg_start, arg_end,
954 else if (!strcmp (p, "element"))
956 r = execTok (spec, &s, arg_no, arg_start, arg_end,
960 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
961 r = execTok (spec, &s, arg_no, arg_start, arg_end,
965 else if (!strcmp (p, "end"))
967 r = execTok (spec, &s, arg_no, arg_start, arg_end,
971 p = regxStrz (cmd_str, cmd_len);
972 if (!strcmp (p, "record"))
975 r = execTok (spec, &s, arg_no, arg_start, arg_end,
978 logf (LOG_DEBUG, "end record");
982 else if (!strcmp (p, "element"))
984 r = execTok (spec, &s, arg_no, arg_start, arg_end,
988 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
989 r = execTok (spec, &s, arg_no, arg_start, arg_end,
993 tagEnd (spec, d1_stack, d1_level, NULL, 0);
996 logf (LOG_WARN, "Missing record/element/variant");
999 logf (LOG_WARN, "Missing record/element/variant");
1001 else if (!strcmp (p, "data"))
1005 const char *element_str = NULL;
1007 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1008 &cmd_str, &cmd_len)) == 3)
1010 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1012 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1014 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1015 &element_str, &element_len);
1020 logf (LOG_WARN, "Bad data option: %.*s",
1025 logf (LOG_WARN, "Missing data item after data");
1029 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1032 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1034 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1035 &cmd_str, &cmd_len);
1038 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1040 else if (!strcmp (p, "unread"))
1043 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1044 &cmd_str, &cmd_len);
1045 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1047 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1048 &cmd_str, &cmd_len);
1051 logf (LOG_WARN, "Missing number after -offset");
1054 p = regxStrz (cmd_str, cmd_len);
1056 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1057 &cmd_str, &cmd_len);
1063 logf (LOG_WARN, "Missing index after unread command");
1066 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1068 logf (LOG_WARN, "Bad index after unread command");
1073 no = *cmd_str - '0';
1076 *pptr = arg_start[no] + offset;
1078 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1079 &cmd_str, &cmd_len);
1083 logf (LOG_WARN, "Unknown code command: %.*s", cmd_len, cmd_str);
1084 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1085 &cmd_str, &cmd_len);
1090 logf (LOG_WARN, "Ignoring token %.*s", cmd_len, cmd_str);
1092 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1101 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1102 data1_node **d1_stack, int *d1_level,
1103 int start_ptr, int *pptr)
1110 arg_start[0] = start_ptr;
1118 if (ap->u.pattern.body)
1120 arg_start[arg_no] = *pptr;
1121 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1123 arg_end[arg_no] = F_WIN_EOF;
1125 arg_start[arg_no] = F_WIN_EOF;
1126 arg_end[arg_no] = F_WIN_EOF;
1131 arg_end[arg_no] = sptr;
1133 arg_start[arg_no] = sptr;
1134 arg_end[arg_no] = *pptr;
1139 arg_start[arg_no] = *pptr;
1140 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1142 if (sptr != arg_start[arg_no])
1144 arg_end[arg_no] = *pptr;
1149 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1150 ap->u.code, d1_stack, d1_level))
1154 arg_start[arg_no] = *pptr;
1156 arg_end[arg_no] = F_WIN_EOF;
1158 arg_end[arg_no] = spec->scan_size;
1164 *pptr = spec->scan_size;
1172 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1173 data1_node **d1_stack, int *d1_level,
1174 int ruleNo, int start_ptr, int *pptr)
1177 logf (LOG_DEBUG, "execRule %d", ruleNo);
1179 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1180 d1_stack, d1_level, start_ptr, pptr);
1183 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1184 data1_node **d1_stack, int *d1_level,
1187 struct DFA_state *state = trans->dfa->states[0];
1191 unsigned char c_prev = '\n';
1195 int last_ptr = *ptr;
1196 int start_ptr = *ptr;
1197 int skip_ptr = *ptr;
1202 c = f_win_advance (spec, ptr);
1203 if (*ptr == F_WIN_EOF)
1207 if (skip_ptr < start_ptr)
1211 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1212 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1215 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1221 else if (skip_ptr < *ptr)
1225 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1226 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1228 if (*ptr == F_WIN_EOF)
1232 if (*ptr == spec->scan_size)
1236 if (skip_ptr < start_ptr)
1238 execDataP (spec, d1_stack, d1_level,
1239 spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
1243 execRule (spec, trans, d1_stack, d1_level, last_rule,
1248 else if (skip_ptr < *ptr)
1250 execDataP (spec, d1_stack, d1_level,
1251 spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
1253 if (*ptr == spec->scan_size)
1256 c = spec->scan_buf[(*ptr)++];
1262 { /* no transition for character c ... */
1265 if (skip_ptr < start_ptr)
1270 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1271 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1273 execDataP (spec, d1_stack, d1_level,
1274 spec->scan_buf + skip_ptr,
1275 start_ptr - skip_ptr, 0);
1279 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1282 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1283 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1293 c_prev = f_win_advance (spec, &start_ptr);
1300 c_prev = f_win_advance (spec, &start_ptr);
1306 state = trans->dfa->states[0];
1309 else if (c >= t->ch[0] && c <= t->ch[1])
1310 { /* transition ... */
1311 state = trans->dfa->states[t->to];
1317 last_rule = state->rule_no;
1320 else if (state->rule_nno)
1322 last_rule = state->rule_nno;
1326 if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
1328 last_rule = state->rule_no;
1331 else if (state->rule_nno)
1333 last_rule = state->rule_nno;
1346 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1348 data1_node *d1_stack[512];
1352 d1_stack[d1_level] = NULL;
1353 if (spec->beginActionList)
1354 execAction (spec, spec->beginActionList,
1355 d1_stack, &d1_level, 0, &ptr);
1356 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1357 if (spec->endActionList)
1358 execAction (spec, spec->endActionList,
1359 d1_stack, &d1_level, ptr, &ptr);
1363 data1_node *grs_read_regx (struct grs_read_info *p)
1365 int (*rf)(void *, char *, size_t),
1366 off_t (*sf)(void *, off_t),
1367 void (*ef)(void *, off_t),
1370 const char *name, NMEM m
1381 logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
1384 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1387 lexSpecDel (&curLexSpec);
1388 curLexSpec = lexSpecMk (p->type);
1389 res = readFileSpec (curLexSpec);
1392 lexSpecDel (&curLexSpec);
1399 curLexSpec->f_win_start = 0;
1400 curLexSpec->f_win_end = 0;
1401 curLexSpec->f_win_rf = p->readf;
1402 curLexSpec->f_win_sf = p->seekf;
1403 curLexSpec->f_win_fh = p->fh;
1404 curLexSpec->f_win_ef = p->endf;
1405 curLexSpec->f_win_size = 500000;
1408 if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
1412 if (rd+4096 > size && !(curLexSpec->scan_buf
1413 = xrealloc (curLexSpec->scan_buf, size *= 2)))
1415 if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
1419 curLexSpec->scan_size = rd;
1421 curLexSpec->m = p->mem;
1422 n = lexRoot (curLexSpec, p->offset);
1424 xfree (curLexSpec->scan_buf);