2 * Copyright (C) 1994-1998, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.21 1998-06-22 11:33:39 adam
8 * Added two type casts.
10 * Revision 1.20 1998/06/08 14:40:44 adam
11 * Fixed problem with signed character(s) in regular expressions.
13 * Revision 1.19 1998/01/12 14:39:39 adam
14 * Fixed bug in term_Tnode.
16 * Revision 1.18 1997/09/29 09:05:17 adam
17 * Thread safe DFA module. We simply had to put a few static vars to
18 * the DFA_parse structure.
20 * Revision 1.17 1997/09/18 08:59:17 adam
21 * Extra generic handle for the character mapping routines.
23 * Revision 1.16 1997/09/05 15:29:57 adam
24 * Changed prototype for chr_map_input - added const.
25 * Added support for C++, headers uses extern "C" for public definitions.
27 * Revision 1.15 1997/02/10 10:19:20 adam
28 * Added facility for open character sets, eg [a-].
30 * Revision 1.14 1996/10/29 13:57:22 adam
31 * Include of zebrautl.h instead of alexutil.h.
33 * Revision 1.13 1996/06/17 14:24:08 adam
34 * Bug fix: read_charset didn't handle character mapping.
36 * Revision 1.12 1996/06/04 10:20:02 adam
37 * Added support for character mapping.
39 * Revision 1.11 1996/01/08 19:15:24 adam
40 * Allow single $ in expressions.
42 * Revision 1.10 1996/01/08 09:09:17 adam
43 * Function dfa_parse got 'const' string argument.
44 * New functions to define char mappings made public.
46 * Revision 1.9 1995/12/06 12:24:58 adam
47 * Removed verbatim mode code.
49 * Revision 1.8 1995/12/06 09:09:58 adam
50 * Work on left and right anchors.
52 * Revision 1.7 1995/11/27 09:23:02 adam
53 * New berbatim hook in regular expressions. "[]n ..".
55 * Revision 1.6 1995/10/16 09:31:25 adam
58 * Revision 1.5 1995/10/02 15:17:58 adam
59 * Bug fix in dfa_delete.
61 * Revision 1.4 1995/09/28 09:18:52 adam
62 * Removed various preprocessor defines.
64 * Revision 1.3 1995/09/04 12:33:26 adam
65 * Various cleanup. YAZ util used instead.
67 * Revision 1.2 1995/01/25 11:30:50 adam
68 * Simple error reporting when parsing regular expressions.
69 * Memory usage reduced.
71 * Revision 1.1 1995/01/24 16:02:52 adam
72 * New private header file in dfa module (dfap.h).
73 * Module no longer uses yacc to parse regular expressions.
87 #define DFA_OPEN_RANGE 1
97 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
98 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
99 /* a character in range [ch[0]..ch[1]] */
100 /* otherwise ch[0] represents */
101 /* accepting no (-acceptno) */
103 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
104 unsigned nullable : 1;
105 Set firstpos; /* first positions */
106 Set lastpos; /* last positions */
109 struct Tblock { /* Tnode bucket (block) */
110 struct Tblock *next; /* pointer to next bucket */
111 struct Tnode *tarray; /* array of nodes in bucket */
115 #define STATE_HASH 199
116 #define POSET_CHUNK 100
118 int debug_dfa_trav = 0;
119 int debug_dfa_tran = 0;
120 int debug_dfa_followpos = 0;
123 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
124 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
126 static void term_Tnode (struct DFA_parse *parse_info);
129 del_followpos (struct DFA_parse *parse_info),
130 init_pos (struct DFA_parse *parse_info),
131 del_pos (struct DFA_parse *parse_info),
132 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
133 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
134 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
135 init_followpos (struct DFA_parse *parse_info),
136 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
137 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
138 pr_followpos (struct DFA_parse *parse_info),
140 lex (struct DFA_parse *parse_info);
143 nextchar (struct DFA_parse *parse_info, int *esc),
144 read_charset (struct DFA_parse *parse_info);
147 *str_char (unsigned c);
164 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
165 *expr_2 (struct DFA_parse *parse_info),
166 *expr_3 (struct DFA_parse *parse_info),
167 *expr_4 (struct DFA_parse *parse_info);
169 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
171 struct Tnode *t1, *t2, *tn;
173 if (!(t1 = expr_2 (parse_info)))
175 while (parse_info->lookahead == L_ALT)
178 if (!(t2 = expr_2 (parse_info)))
181 tn = mk_Tnode (parse_info);
190 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
192 struct Tnode *t1, *t2, *tn;
194 if (!(t1 = expr_3 (parse_info)))
196 while (parse_info->lookahead == L_WILD ||
197 parse_info->lookahead == L_ANYZ ||
198 parse_info->lookahead == L_ANY ||
199 parse_info->lookahead == L_LP ||
200 parse_info->lookahead == L_CHAR ||
201 parse_info->lookahead == L_CHARS)
203 if (!(t2 = expr_3 (parse_info)))
206 tn = mk_Tnode (parse_info);
215 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
217 struct Tnode *t1, *tn;
219 if (!(t1 = expr_4 (parse_info)))
221 if (parse_info->lookahead == L_CLOS0)
224 tn = mk_Tnode (parse_info);
229 else if (parse_info->lookahead == L_CLOS1)
232 tn = mk_Tnode (parse_info);
237 else if (parse_info->lookahead == L_QUEST)
240 tn = mk_Tnode(parse_info);
243 tn->u.p[1] = mk_Tnode(parse_info);
244 tn->u.p[1]->pos = EPSILON;
250 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
254 switch (parse_info->lookahead)
258 if (!(t1 = expr_1 (parse_info)))
260 if (parse_info->lookahead == L_RP)
266 t1 = mk_Tnode(parse_info);
267 t1->pos = ++parse_info->position;
268 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
272 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
276 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
280 t1 = mk_Tnode(parse_info);
282 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
283 t1->u.p[1] = mk_Tnode(parse_info);
284 t1->u.p[1]->pos = EPSILON;
288 t1 = mk_Tnode(parse_info);
290 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
298 static void do_parse (struct DFA_parse *parse_info, const char **s,
301 int start_anchor_flag = 0;
302 struct Tnode *t1, *t2, *tn;
304 parse_info->err_code = 0;
305 parse_info->expr_ptr = * (const unsigned char **) s;
307 parse_info->inside_string = 0;
309 if (parse_info->lookahead == L_START)
311 start_anchor_flag = 1;
314 if (parse_info->lookahead == L_END)
316 t1 = mk_Tnode (parse_info);
317 t1->pos = ++parse_info->position;
318 t1->u.ch[1] = t1->u.ch[0] = '\n';
323 t1 = expr_1 (parse_info);
324 if (t1 && parse_info->lookahead == L_END)
326 t2 = mk_Tnode (parse_info);
327 t2->pos = ++parse_info->position;
328 t2->u.ch[1] = t2->u.ch[0] = '\n';
330 tn = mk_Tnode (parse_info);
339 if (t1 && parse_info->lookahead == 0)
341 t2 = mk_Tnode(parse_info);
342 t2->pos = ++parse_info->position;
343 t2->u.ch[0] = -(++parse_info->rule);
344 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
346 *tnp = mk_Tnode(parse_info);
353 if (!parse_info->err_code)
355 if (parse_info->lookahead == L_RP)
356 parse_info->err_code = DFA_ERR_RP;
357 else if (parse_info->lookahead == L_LP)
358 parse_info->err_code = DFA_ERR_LP;
360 parse_info->err_code = DFA_ERR_SYNTAX;
363 *s = (const char *) parse_info->expr_ptr;
366 static int nextchar (struct DFA_parse *parse_info, int *esc)
369 if (*parse_info->expr_ptr == '\0')
371 else if (*parse_info->expr_ptr != '\\')
372 return *parse_info->expr_ptr++;
374 switch (*++parse_info->expr_ptr)
381 ++parse_info->expr_ptr;
384 ++parse_info->expr_ptr;
387 ++parse_info->expr_ptr;
390 ++parse_info->expr_ptr;
393 ++parse_info->expr_ptr;
396 return *parse_info->expr_ptr++;
400 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
402 if (*parse_info->expr_ptr == ' ')
405 return *parse_info->expr_ptr++;
407 return nextchar (parse_info, esc);
410 static int read_charset (struct DFA_parse *parse_info)
412 int i, ch0, ch1, esc0, esc1, cc = 0;
413 parse_info->look_chars = mk_BSet (&parse_info->charset);
414 res_BSet (parse_info->charset, parse_info->look_chars);
416 ch0 = nextchar_set (parse_info, &esc0);
417 if (!esc0 && ch0 == '^')
420 ch0 = nextchar_set (parse_info, &esc0);
424 if (!esc0 && ch0 == ']')
426 if (parse_info->cmap)
430 const char *mcp = mapfrom;
432 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
436 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
437 ch1 = nextchar_set (parse_info, &esc1);
438 if (!esc1 && ch1 == '-')
441 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
444 if (!esc1 && ch1 == ']')
450 if (!esc1 && ch1 == ']')
452 add_BSet (parse_info->charset, parse_info->look_chars, '-');
456 if (!open_range && parse_info->cmap)
460 const char *mcp = mapfrom;
462 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
466 for (i=ch0; ++i<=ch1;)
467 add_BSet (parse_info->charset, parse_info->look_chars, i);
469 ch0 = nextchar_set (parse_info, &esc0);
480 com_BSet (parse_info->charset, parse_info->look_chars);
484 static int map_l_char (struct DFA_parse *parse_info)
487 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
488 int i = 0, len = strlen(cp0);
490 if (cp0[0] == 1 && cp0[1])
492 parse_info->expr_ptr++;
493 parse_info->look_ch = ((unsigned char *) cp0)[1];
496 if (!parse_info->cmap)
499 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
502 parse_info->expr_ptr = (const unsigned char *) cp0;
503 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
504 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
508 static int lex_sub(struct DFA_parse *parse_info)
511 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
512 if (parse_info->look_ch == '\"')
515 return map_l_char (parse_info);
516 parse_info->inside_string = !parse_info->inside_string;
518 else if (esc || parse_info->inside_string)
519 return map_l_char (parse_info);
520 else if (parse_info->look_ch == '[')
521 return read_charset(parse_info);
525 for (cc = parse_info->charMap; *cc; cc += 2)
526 if (*cc == parse_info->look_ch)
529 --parse_info->expr_ptr;
532 return map_l_char (parse_info);
537 static void lex (struct DFA_parse *parse_info)
539 parse_info->lookahead = lex_sub (parse_info);
542 static const char *str_char (unsigned c)
546 if (c < 32 || c >= 127)
559 sprintf (s+1, "x%02x", c);
581 static void out_char (int c)
583 printf ("%s", str_char (c));
586 static void term_Tnode (struct DFA_parse *parse_info)
588 struct Tblock *t, *t1;
589 for (t = parse_info->start; (t1 = t);)
597 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
601 if (parse_info->use_Tnode == parse_info->max_Tnode)
603 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
604 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
607 if (parse_info->use_Tnode == 0)
608 parse_info->start = tnew;
610 parse_info->end->next = tnew;
611 parse_info->end = tnew;
613 parse_info->max_Tnode += TADD;
615 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
618 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
620 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
621 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
627 tn0->pos = ++parse_info->position;
628 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
630 tn0->u.ch[1] = parse_info->charset->size;
633 tn0->u.ch[1] = ch1-1;
634 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
636 tn1 = mk_Tnode(parse_info);
640 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
642 tn1->pos = ++(parse_info->position);
643 if ((ch1 = travi_BSet (parse_info->charset, charset,
646 tn1->u.ch[1] = parse_info->charset->size;
649 tn1->u.ch[1] = ch1-1;
656 static void del_followpos (struct DFA_parse *parse_info)
658 ifree (parse_info->followpos);
661 static void init_pos (struct DFA_parse *parse_info)
663 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
664 * (1+parse_info->position));
667 static void del_pos (struct DFA_parse *parse_info)
669 ifree (parse_info->posar);
672 static void add_follow (struct DFA_parse *parse_info,
673 Set lastpos, Set firstpos)
677 parse_info->followpos[lastpos->value] =
678 union_Set (parse_info->poset,
679 parse_info->followpos[lastpos->value], firstpos);
680 lastpos = lastpos->next;
684 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
686 struct Tnode **posar = parse_info->posar;
687 SetType poset = parse_info->poset;
692 dfa_trav (parse_info, n->u.p[0]);
693 dfa_trav (parse_info, n->u.p[1]);
694 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
695 n->firstpos = mk_Set (poset);
696 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
697 if (n->u.p[0]->nullable)
698 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
699 n->lastpos = mk_Set (poset);
700 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
701 if (n->u.p[1]->nullable)
702 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
704 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
706 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
707 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
708 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
709 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
714 dfa_trav (parse_info, n->u.p[0]);
715 dfa_trav (parse_info, n->u.p[1]);
716 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
718 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
719 n->u.p[1]->firstpos);
720 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
722 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
723 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
724 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
725 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
730 dfa_trav (parse_info, n->u.p[0]);
731 n->nullable = n->u.p[0]->nullable;
732 n->firstpos = n->u.p[0]->firstpos;
733 n->lastpos = n->u.p[0]->lastpos;
734 add_follow (parse_info, n->lastpos, n->firstpos);
739 dfa_trav (parse_info, n->u.p[0]);
741 n->firstpos = n->u.p[0]->firstpos;
742 n->lastpos = n->u.p[0]->lastpos;
743 add_follow (parse_info, n->lastpos, n->firstpos);
749 n->lastpos = mk_Set (poset);
750 n->firstpos = mk_Set (poset);
757 n->firstpos = mk_Set (poset);
758 n->firstpos = add_Set (poset, n->firstpos, n->pos);
759 n->lastpos = mk_Set (poset);
760 n->lastpos = add_Set (poset, n->lastpos, n->pos);
763 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
764 else if (n->u.ch[1] > n->u.ch[0])
767 out_char (n->u.ch[0]);
768 if (n->u.ch[1] > n->u.ch[0]+1)
770 out_char (n->u.ch[1]);
774 out_char (n->u.ch[0]);
778 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
779 printf (" firstpos :");
780 pr_Set (poset, n->firstpos);
781 printf (" lastpos :");
782 pr_Set (poset, n->lastpos);
786 static void init_followpos (struct DFA_parse *parse_info)
790 parse_info->followpos = fa =
791 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
792 for (i = parse_info->position+1; --i >= 0; fa++)
793 *fa = mk_Set (parse_info->poset);
796 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
803 struct DFA_state *dfa_from, *dfa_to;
804 struct Tnode **posar;
810 posar = parse_info->posar;
811 max_char = parse_info->charset->size;
812 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
814 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
815 i = add_DFA_state (dfas, &tran_set, &dfa_from);
817 dfa_from->rule_no = 0;
818 poset = parse_info->poset;
819 followpos = parse_info->followpos;
820 while ((dfa_from = get_DFA_state (dfas)))
824 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
825 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
826 *pos_i++ = tran_set->value;
831 c = posar[tran_set->value]->u.ch[1];
836 dfa_from->rule_no = -i;
837 dfa_from->rule_nno = -j;
839 for (char_1 = 0; char_1 <= max_char; char_1++)
842 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
843 if (posar[i]->u.ch[1] >= char_1
844 && (c=posar[i]->u.ch[0]) < char_0)
850 if (char_0 > max_char)
855 tran_set = mk_Set (poset);
856 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
858 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
859 char_1 = c - 1; /* forward chunk */
860 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
861 char_1 = c; /* backward chunk */
862 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
863 tran_set = union_Set (poset, tran_set, followpos[i]);
867 add_DFA_state (dfas, &tran_set, &dfa_to);
868 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
873 sort_DFA_states (dfas);
876 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
879 struct DFA_tran *tran;
880 int prev_no, i, c, no;
882 for (no=0; no < dfas->no; ++no)
884 s = dfas->sortarray[no];
885 assert (s->no == no);
886 printf ("DFA state %d", no);
888 printf ("#%d", s->rule_no);
890 printf (" #%d", s->rule_nno);
893 pr_Set (parse_info->poset, s->set);
895 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
897 if (prev_no != tran->to)
901 printf (" goto %d on [", tran->to);
904 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
905 printf ("%s", str_char(c));
913 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
917 printf ("%d/%d tree nodes used, %d bytes each\n",
918 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
919 k = inf_BSetHandle (parse_info->charset, &i, &j);
920 printf ("%ld/%ld character sets, %d bytes each\n",
921 i/k, j/k, k*sizeof(BSetWord));
922 k = inf_SetType (parse_info->poset, &i, &j);
923 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
924 printf ("%d DFA states\n", dfas->no);
927 static void pr_followpos (struct DFA_parse *parse_info)
929 struct Tnode **posar = parse_info->posar;
931 printf ("\nfollowsets:\n");
932 for (i=1; i <= parse_info->position; i++)
935 pr_Set (parse_info->poset, parse_info->followpos[i]);
938 if (posar[i]->u.ch[0] < 0)
939 printf ("#%d", -posar[i]->u.ch[0]);
940 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
943 out_char (posar[i]->u.ch[0]);
944 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
946 out_char (posar[i]->u.ch[1]);
950 out_char (posar[i]->u.ch[0]);
956 void dfa_parse_cmap_clean (struct DFA *d)
958 struct DFA_parse *dfa = d->parse_info;
963 dfa->charMapSize = 7;
964 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
969 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
971 struct DFA_parse *dfa = d->parse_info;
976 for (cp = cmap; *cp; cp += 2)
978 size = cp - cmap + 1;
979 if (size > dfa->charMapSize)
982 ifree (dfa->charMap);
983 dfa->charMapSize = size;
984 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
986 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
989 void dfa_parse_cmap_del (struct DFA *d, int from)
991 struct DFA_parse *dfa = d->parse_info;
995 for (cc = dfa->charMap; *cc; cc += 2)
998 while ((cc[0] = cc[2]))
1007 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1009 struct DFA_parse *dfa = d->parse_info;
1014 for (cc = dfa->charMap; *cc; cc += 2)
1020 indx = cc - dfa->charMap;
1021 size = dfa->charMapSize;
1024 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1025 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1026 ifree (dfa->charMap);
1028 dfa->charMapSize = size+16;
1030 dfa->charMap[indx] = from;
1031 dfa->charMap[indx+1] = to;
1032 dfa->charMap[indx+2] = 0;
1035 void dfa_parse_cmap_thompson (struct DFA *d)
1037 static int thompson_chars[] =
1053 dfa_parse_cmap_new (d, thompson_chars);
1056 static struct DFA_parse *dfa_parse_init (void)
1058 struct DFA_parse *parse_info =
1059 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1061 parse_info->charset = mk_BSetHandle (255, 20);
1062 parse_info->position = 0;
1063 parse_info->rule = 0;
1064 parse_info->root = NULL;
1066 parse_info->anyset = mk_BSet (&parse_info->charset);
1067 res_BSet (parse_info->charset, parse_info->anyset);
1068 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1069 com_BSet (parse_info->charset, parse_info->anyset);
1070 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1071 parse_info->start = parse_info->end = NULL;
1072 parse_info->charMap = NULL;
1073 parse_info->charMapSize = 0;
1074 parse_info->cmap = NULL;
1078 static void rm_dfa_parse (struct DFA_parse **dfap)
1080 struct DFA_parse *parse_info = *dfap;
1081 assert (parse_info);
1082 term_Tnode(parse_info);
1083 rm_BSetHandle (&parse_info->charset);
1084 ifree (parse_info->charMap);
1089 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1091 struct DFA_states *dfas;
1092 struct DFA_parse *parse_info = dfap;
1094 assert (poset_chunk > 10);
1097 parse_info->poset = mk_SetType (poset_chunk);
1098 init_pos(parse_info);
1099 init_followpos(parse_info);
1100 assert (parse_info->root);
1101 dfa_trav (parse_info, parse_info->root);
1103 if (debug_dfa_followpos)
1104 pr_followpos(parse_info);
1105 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1106 mk_dfa_tran (parse_info, dfas);
1108 pr_tran (parse_info, dfas);
1110 pr_verbose (parse_info, dfas);
1111 del_pos(parse_info);
1112 del_followpos(parse_info);
1113 rm_SetType (parse_info->poset);
1117 struct DFA *dfa_init (void)
1121 dfa = imalloc (sizeof(*dfa));
1122 dfa->parse_info = dfa_parse_init ();
1123 dfa->state_info = NULL;
1125 dfa_parse_cmap_thompson (dfa);
1129 void dfa_set_cmap (struct DFA *dfa, void *vp,
1130 const char **(*cmap)(void *vp, const char **from, int len))
1132 dfa->parse_info->cmap = cmap;
1133 dfa->parse_info->cmap_data = vp;
1136 int dfa_parse (struct DFA *dfa, const char **pattern)
1139 struct DFA_parse *parse_info;
1142 assert (dfa->parse_info);
1143 parse_info = dfa->parse_info;
1144 do_parse (parse_info, pattern, &top);
1145 if (parse_info->err_code)
1146 return parse_info->err_code;
1147 if ( !dfa->parse_info->root )
1148 dfa->parse_info->root = top;
1153 n = mk_Tnode (parse_info);
1155 n->u.p[0] = dfa->parse_info->root;
1157 dfa->parse_info->root = n;
1162 void dfa_mkstate (struct DFA *dfa)
1166 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1167 dfa->no_states = dfa->state_info->no;
1168 dfa->states = dfa->state_info->sortarray;
1169 rm_dfa_parse (&dfa->parse_info);
1172 void dfa_delete (struct DFA **dfap)
1176 if ((*dfap)->parse_info)
1177 rm_dfa_parse (&(*dfap)->parse_info);
1178 if ((*dfap)->state_info)
1179 rm_DFA_states (&(*dfap)->state_info);