1 /* $Id: dfa.c,v 1.34 2005-03-30 09:25:23 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
31 #include <idzebra/util.h>
35 #define DFA_OPEN_RANGE 1
45 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
46 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
47 /* a character in range [ch[0]..ch[1]] */
48 /* otherwise ch[0] represents */
49 /* accepting no (-acceptno) */
51 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
52 unsigned nullable : 1;
53 DFASet firstpos; /* first positions */
54 DFASet lastpos; /* last positions */
57 struct Tblock { /* Tnode bucket (block) */
58 struct Tblock *next; /* pointer to next bucket */
59 struct Tnode *tarray; /* array of nodes in bucket */
63 #define STATE_HASH 199
64 #define POSET_CHUNK 100
66 int debug_dfa_trav = 0;
67 int debug_dfa_tran = 0;
68 int debug_dfa_followpos = 0;
71 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
72 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
74 static void term_Tnode (struct DFA_parse *parse_info);
77 del_followpos (struct DFA_parse *parse_info),
78 init_pos (struct DFA_parse *parse_info),
79 del_pos (struct DFA_parse *parse_info),
80 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
81 add_follow (struct DFA_parse *parse_info, DFASet lastpos, DFASet firstpos),
82 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
83 init_followpos (struct DFA_parse *parse_info),
84 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
85 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
86 pr_followpos (struct DFA_parse *parse_info),
88 lex (struct DFA_parse *parse_info);
91 nextchar (struct DFA_parse *parse_info, int *esc),
92 read_charset (struct DFA_parse *parse_info);
95 *str_char (unsigned c);
112 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
113 *expr_2 (struct DFA_parse *parse_info),
114 *expr_3 (struct DFA_parse *parse_info),
115 *expr_4 (struct DFA_parse *parse_info);
117 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
119 struct Tnode *t1, *t2, *tn;
121 if (!(t1 = expr_2 (parse_info)))
123 while (parse_info->lookahead == L_ALT)
126 if (!(t2 = expr_2 (parse_info)))
129 tn = mk_Tnode (parse_info);
138 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
140 struct Tnode *t1, *t2, *tn;
142 if (!(t1 = expr_3 (parse_info)))
144 while (parse_info->lookahead == L_WILD ||
145 parse_info->lookahead == L_ANYZ ||
146 parse_info->lookahead == L_ANY ||
147 parse_info->lookahead == L_LP ||
148 parse_info->lookahead == L_CHAR ||
149 parse_info->lookahead == L_CHARS)
151 if (!(t2 = expr_3 (parse_info)))
154 tn = mk_Tnode (parse_info);
163 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
165 struct Tnode *t1, *tn;
167 if (!(t1 = expr_4 (parse_info)))
169 if (parse_info->lookahead == L_CLOS0)
172 tn = mk_Tnode (parse_info);
177 else if (parse_info->lookahead == L_CLOS1)
180 tn = mk_Tnode (parse_info);
185 else if (parse_info->lookahead == L_QUEST)
188 tn = mk_Tnode(parse_info);
191 tn->u.p[1] = mk_Tnode(parse_info);
192 tn->u.p[1]->pos = EPSILON;
198 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
202 switch (parse_info->lookahead)
206 if (!(t1 = expr_1 (parse_info)))
208 if (parse_info->lookahead == L_RP)
214 t1 = mk_Tnode(parse_info);
215 t1->pos = ++parse_info->position;
216 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
220 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
224 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
228 t1 = mk_Tnode(parse_info);
230 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
231 t1->u.p[1] = mk_Tnode(parse_info);
232 t1->u.p[1]->pos = EPSILON;
236 t1 = mk_Tnode(parse_info);
238 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
246 static void do_parse (struct DFA_parse *parse_info, const char **s,
249 int start_anchor_flag = 0;
250 struct Tnode *t1, *t2, *tn;
252 parse_info->err_code = 0;
253 parse_info->expr_ptr = * (const unsigned char **) s;
255 parse_info->inside_string = 0;
257 if (parse_info->lookahead == L_START)
259 start_anchor_flag = 1;
262 if (parse_info->lookahead == L_END)
264 t1 = mk_Tnode (parse_info);
265 t1->pos = ++parse_info->position;
266 t1->u.ch[1] = t1->u.ch[0] = '\n';
271 t1 = expr_1 (parse_info);
272 if (t1 && parse_info->lookahead == L_END)
274 t2 = mk_Tnode (parse_info);
275 t2->pos = ++parse_info->position;
276 t2->u.ch[1] = t2->u.ch[0] = '\n';
278 tn = mk_Tnode (parse_info);
287 if (t1 && parse_info->lookahead == 0)
289 t2 = mk_Tnode(parse_info);
290 t2->pos = ++parse_info->position;
291 t2->u.ch[0] = -(++parse_info->rule);
292 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
294 *tnp = mk_Tnode(parse_info);
301 if (!parse_info->err_code)
303 if (parse_info->lookahead == L_RP)
304 parse_info->err_code = DFA_ERR_RP;
305 else if (parse_info->lookahead == L_LP)
306 parse_info->err_code = DFA_ERR_LP;
308 parse_info->err_code = DFA_ERR_SYNTAX;
311 *s = (const char *) parse_info->expr_ptr;
314 static int nextchar (struct DFA_parse *parse_info, int *esc)
317 if (*parse_info->expr_ptr == '\0')
319 else if (*parse_info->expr_ptr != '\\')
320 return *parse_info->expr_ptr++;
322 switch (*++parse_info->expr_ptr)
329 ++parse_info->expr_ptr;
332 ++parse_info->expr_ptr;
335 ++parse_info->expr_ptr;
338 ++parse_info->expr_ptr;
341 ++parse_info->expr_ptr;
344 return *parse_info->expr_ptr++;
348 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
350 if (*parse_info->expr_ptr == ' ')
353 return *parse_info->expr_ptr++;
355 return nextchar (parse_info, esc);
358 static int read_charset (struct DFA_parse *parse_info)
360 int i, ch0, ch1, esc0, esc1, cc = 0;
361 parse_info->look_chars = mk_BSet (&parse_info->charset);
362 res_BSet (parse_info->charset, parse_info->look_chars);
364 ch0 = nextchar_set (parse_info, &esc0);
365 if (!esc0 && ch0 == '^')
368 ch0 = nextchar_set (parse_info, &esc0);
372 if (!esc0 && ch0 == ']')
374 if (!esc0 && ch0 == '-')
379 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
383 if (parse_info->cmap)
387 const char *mcp = mapfrom;
389 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
393 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
394 ch1 = nextchar_set (parse_info, &esc1);
396 if (!esc1 && ch1 == '-')
399 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
402 if (!esc1 && ch1 == ']')
408 if (!esc1 && ch1 == ']')
410 add_BSet (parse_info->charset, parse_info->look_chars, '-');
414 if (!open_range && parse_info->cmap)
418 const char *mcp = mapfrom;
420 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
424 for (i=ch0; ++i<=ch1;)
425 add_BSet (parse_info->charset, parse_info->look_chars, i);
427 ch0 = nextchar_set (parse_info, &esc0);
438 com_BSet (parse_info->charset, parse_info->look_chars);
442 static int map_l_char (struct DFA_parse *parse_info)
445 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
446 int i = 0, len = strlen(cp0);
448 if (cp0[0] == 1 && cp0[1])
450 parse_info->expr_ptr++;
451 parse_info->look_ch = ((unsigned char *) cp0)[1];
454 if (!parse_info->cmap)
457 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
460 parse_info->expr_ptr = (const unsigned char *) cp0;
461 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
462 yaz_log (YLOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
466 static int lex_sub(struct DFA_parse *parse_info)
469 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
470 if (parse_info->look_ch == '\"')
473 return map_l_char (parse_info);
474 parse_info->inside_string = !parse_info->inside_string;
476 else if (esc || parse_info->inside_string)
477 return map_l_char (parse_info);
478 else if (parse_info->look_ch == '[')
479 return read_charset(parse_info);
483 for (cc = parse_info->charMap; *cc; cc += 2)
484 if (*cc == (int) (parse_info->look_ch))
487 --parse_info->expr_ptr;
490 return map_l_char (parse_info);
495 static void lex (struct DFA_parse *parse_info)
497 parse_info->lookahead = lex_sub (parse_info);
500 static const char *str_char (unsigned c)
504 if (c < 32 || c >= 127)
517 sprintf (s+1, "x%02x", c);
539 static void out_char (int c)
541 printf ("%s", str_char (c));
544 static void term_Tnode (struct DFA_parse *parse_info)
546 struct Tblock *t, *t1;
547 for (t = parse_info->start; (t1 = t);)
555 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
559 if (parse_info->use_Tnode == parse_info->max_Tnode)
561 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
562 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
565 if (parse_info->use_Tnode == 0)
566 parse_info->start = tnew;
568 parse_info->end->next = tnew;
569 parse_info->end = tnew;
571 parse_info->max_Tnode += TADD;
573 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
576 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
578 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
579 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
585 tn0->pos = ++parse_info->position;
586 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
588 tn0->u.ch[1] = parse_info->charset->size;
591 tn0->u.ch[1] = ch1-1;
592 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
594 tn1 = mk_Tnode(parse_info);
598 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
600 tn1->pos = ++(parse_info->position);
601 if ((ch1 = travi_BSet (parse_info->charset, charset,
604 tn1->u.ch[1] = parse_info->charset->size;
607 tn1->u.ch[1] = ch1-1;
614 static void del_followpos (struct DFA_parse *parse_info)
616 ifree (parse_info->followpos);
619 static void init_pos (struct DFA_parse *parse_info)
621 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
622 * (1+parse_info->position));
625 static void del_pos (struct DFA_parse *parse_info)
627 ifree (parse_info->posar);
630 static void add_follow (struct DFA_parse *parse_info,
631 DFASet lastpos, DFASet firstpos)
635 parse_info->followpos[lastpos->value] =
636 union_DFASet (parse_info->poset,
637 parse_info->followpos[lastpos->value], firstpos);
638 lastpos = lastpos->next;
642 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
644 struct Tnode **posar = parse_info->posar;
645 DFASetType poset = parse_info->poset;
650 dfa_trav (parse_info, n->u.p[0]);
651 dfa_trav (parse_info, n->u.p[1]);
652 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
653 n->firstpos = mk_DFASet (poset);
654 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[0]->firstpos);
655 if (n->u.p[0]->nullable)
656 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[1]->firstpos);
657 n->lastpos = mk_DFASet (poset);
658 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[1]->lastpos);
659 if (n->u.p[1]->nullable)
660 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[0]->lastpos);
662 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
664 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
665 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
666 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
667 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
672 dfa_trav (parse_info, n->u.p[0]);
673 dfa_trav (parse_info, n->u.p[1]);
674 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
676 n->firstpos = merge_DFASet (poset, n->u.p[0]->firstpos,
677 n->u.p[1]->firstpos);
678 n->lastpos = merge_DFASet (poset, n->u.p[0]->lastpos,
680 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
681 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
682 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
683 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
688 dfa_trav (parse_info, n->u.p[0]);
689 n->nullable = n->u.p[0]->nullable;
690 n->firstpos = n->u.p[0]->firstpos;
691 n->lastpos = n->u.p[0]->lastpos;
692 add_follow (parse_info, n->lastpos, n->firstpos);
697 dfa_trav (parse_info, n->u.p[0]);
699 n->firstpos = n->u.p[0]->firstpos;
700 n->lastpos = n->u.p[0]->lastpos;
701 add_follow (parse_info, n->lastpos, n->firstpos);
707 n->lastpos = mk_DFASet (poset);
708 n->firstpos = mk_DFASet (poset);
715 n->firstpos = mk_DFASet (poset);
716 n->firstpos = add_DFASet (poset, n->firstpos, n->pos);
717 n->lastpos = mk_DFASet (poset);
718 n->lastpos = add_DFASet (poset, n->lastpos, n->pos);
722 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
723 else if (n->u.ch[1] > n->u.ch[0])
726 out_char (n->u.ch[0]);
727 if (n->u.ch[1] > n->u.ch[0]+1)
729 out_char (n->u.ch[1]);
733 out_char (n->u.ch[0]);
738 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
739 printf (" firstpos :");
740 pr_DFASet (poset, n->firstpos);
741 printf (" lastpos :");
742 pr_DFASet (poset, n->lastpos);
746 static void init_followpos (struct DFA_parse *parse_info)
750 parse_info->followpos = fa =
751 (DFASet *) imalloc (sizeof(DFASet) * (1+parse_info->position));
752 for (i = parse_info->position+1; --i >= 0; fa++)
753 *fa = mk_DFASet (parse_info->poset);
756 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
763 struct DFA_state *dfa_from, *dfa_to;
764 struct Tnode **posar;
770 posar = parse_info->posar;
771 max_char = parse_info->charset->size;
772 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
774 tran_set = cp_DFASet (parse_info->poset, parse_info->root->firstpos);
775 i = add_DFA_state (dfas, &tran_set, &dfa_from);
777 dfa_from->rule_no = 0;
778 poset = parse_info->poset;
779 followpos = parse_info->followpos;
780 while ((dfa_from = get_DFA_state (dfas)))
784 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
785 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
786 *pos_i++ = tran_set->value;
791 c = posar[tran_set->value]->u.ch[1];
796 dfa_from->rule_no = -i;
797 dfa_from->rule_nno = -j;
799 for (char_1 = 0; char_1 <= max_char; char_1++)
802 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
803 if (posar[i]->u.ch[1] >= char_1
804 && (c=posar[i]->u.ch[0]) < char_0)
812 if (char_0 > max_char)
817 tran_set = mk_DFASet (poset);
818 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
820 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
821 char_1 = c - 1; /* forward chunk */
822 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
823 char_1 = c; /* backward chunk */
824 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
825 tran_set = union_DFASet (poset, tran_set, followpos[i]);
829 add_DFA_state (dfas, &tran_set, &dfa_to);
830 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
835 sort_DFA_states (dfas);
838 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
841 struct DFA_tran *tran;
842 int prev_no, i, c, no;
844 for (no=0; no < dfas->no; ++no)
846 s = dfas->sortarray[no];
847 assert (s->no == no);
848 printf ("DFA state %d", no);
850 printf ("#%d", s->rule_no);
852 printf (" #%d", s->rule_nno);
855 pr_DFASet (parse_info->poset, s->set);
857 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
859 if (prev_no != tran->to)
863 printf (" goto %d on [", tran->to);
866 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
867 printf ("%s", str_char(c));
875 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
879 printf ("%d/%d tree nodes used, %d bytes each\n",
880 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
881 k = inf_BSetHandle (parse_info->charset, &i, &j);
882 printf ("%ld/%ld character sets, %d bytes each\n",
883 i/k, j/k, k*sizeof(BSetWord));
884 k = inf_DFASetType (parse_info->poset, &i, &j);
885 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
886 printf ("%d DFA states\n", dfas->no);
889 static void pr_followpos (struct DFA_parse *parse_info)
891 struct Tnode **posar = parse_info->posar;
893 printf ("\nfollowsets:\n");
894 for (i=1; i <= parse_info->position; i++)
897 pr_DFASet (parse_info->poset, parse_info->followpos[i]);
900 if (posar[i]->u.ch[0] < 0)
901 printf ("#%d", -posar[i]->u.ch[0]);
902 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
905 out_char (posar[i]->u.ch[0]);
906 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
908 out_char (posar[i]->u.ch[1]);
912 out_char (posar[i]->u.ch[0]);
918 void dfa_parse_cmap_clean (struct DFA *d)
920 struct DFA_parse *dfa = d->parse_info;
925 dfa->charMapSize = 7;
926 dfa->charMap = (int *)
927 imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
932 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
934 struct DFA_parse *dfa = d->parse_info;
939 for (cp = cmap; *cp; cp += 2)
941 size = cp - cmap + 1;
942 if (size > dfa->charMapSize)
945 ifree (dfa->charMap);
946 dfa->charMapSize = size;
947 dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
949 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
952 void dfa_parse_cmap_del (struct DFA *d, int from)
954 struct DFA_parse *dfa = d->parse_info;
958 for (cc = dfa->charMap; *cc; cc += 2)
961 while ((cc[0] = cc[2]))
970 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
972 struct DFA_parse *dfa = d->parse_info;
977 for (cc = dfa->charMap; *cc; cc += 2)
983 indx = cc - dfa->charMap;
984 size = dfa->charMapSize;
987 int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
988 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
989 ifree (dfa->charMap);
991 dfa->charMapSize = size+16;
993 dfa->charMap[indx] = from;
994 dfa->charMap[indx+1] = to;
995 dfa->charMap[indx+2] = 0;
998 void dfa_parse_cmap_thompson (struct DFA *d)
1000 static int thompson_chars[] =
1016 dfa_parse_cmap_new (d, thompson_chars);
1019 static struct DFA_parse *dfa_parse_init (void)
1021 struct DFA_parse *parse_info =
1022 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1024 parse_info->charset = mk_BSetHandle (255, 20);
1025 parse_info->position = 0;
1026 parse_info->rule = 0;
1027 parse_info->root = NULL;
1029 parse_info->anyset = mk_BSet (&parse_info->charset);
1030 res_BSet (parse_info->charset, parse_info->anyset);
1031 com_BSet (parse_info->charset, parse_info->anyset);
1032 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1033 parse_info->start = parse_info->end = NULL;
1034 parse_info->charMap = NULL;
1035 parse_info->charMapSize = 0;
1036 parse_info->cmap = NULL;
1040 static void rm_dfa_parse (struct DFA_parse **dfap)
1042 struct DFA_parse *parse_info = *dfap;
1043 assert (parse_info);
1044 term_Tnode(parse_info);
1045 rm_BSetHandle (&parse_info->charset);
1046 ifree (parse_info->charMap);
1051 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1053 struct DFA_states *dfas;
1054 struct DFA_parse *parse_info = dfap;
1056 assert (poset_chunk > 10);
1059 parse_info->poset = mk_DFASetType (poset_chunk);
1060 init_pos(parse_info);
1061 init_followpos(parse_info);
1062 assert (parse_info->root);
1063 dfa_trav (parse_info, parse_info->root);
1065 if (debug_dfa_followpos)
1066 pr_followpos(parse_info);
1067 init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
1068 mk_dfa_tran (parse_info, dfas);
1071 pr_tran (parse_info, dfas);
1074 pr_verbose (parse_info, dfas);
1075 del_pos(parse_info);
1076 del_followpos(parse_info);
1077 rm_DFASetType (parse_info->poset);
1081 struct DFA *dfa_init (void)
1085 dfa = (struct DFA *) imalloc (sizeof(*dfa));
1086 dfa->parse_info = dfa_parse_init ();
1087 dfa->state_info = NULL;
1089 dfa_parse_cmap_thompson (dfa);
1093 void dfa_set_cmap (struct DFA *dfa, void *vp,
1094 const char **(*cmap)(void *vp, const char **from, int len))
1096 dfa->parse_info->cmap = cmap;
1097 dfa->parse_info->cmap_data = vp;
1100 int dfa_parse (struct DFA *dfa, const char **pattern)
1103 struct DFA_parse *parse_info;
1106 assert (dfa->parse_info);
1107 parse_info = dfa->parse_info;
1109 if (!parse_info->cmap)
1111 res_BSet (parse_info->charset, parse_info->anyset);
1112 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1113 com_BSet (parse_info->charset, parse_info->anyset);
1115 do_parse (parse_info, pattern, &top);
1116 if (parse_info->err_code)
1117 return parse_info->err_code;
1118 if ( !dfa->parse_info->root )
1119 dfa->parse_info->root = top;
1124 n = mk_Tnode (parse_info);
1126 n->u.p[0] = dfa->parse_info->root;
1128 dfa->parse_info->root = n;
1133 void dfa_mkstate (struct DFA *dfa)
1137 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1138 dfa->no_states = dfa->state_info->no;
1139 dfa->states = dfa->state_info->sortarray;
1140 rm_dfa_parse (&dfa->parse_info);
1143 void dfa_delete (struct DFA **dfap)
1147 if ((*dfap)->parse_info)
1148 rm_dfa_parse (&(*dfap)->parse_info);
1149 if ((*dfap)->state_info)
1150 rm_DFA_states (&(*dfap)->state_info);