1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2009 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 #include <idzebra/util.h>
40 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
41 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
42 /* a character in range [ch[0]..ch[1]] */
43 /* otherwise ch[0] represents */
44 /* accepting no (-acceptno) */
46 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
47 unsigned nullable : 1;
48 DFASet firstpos; /* first positions */
49 DFASet lastpos; /* last positions */
52 struct Tblock { /* Tnode bucket (block) */
53 struct Tblock *next; /* pointer to next bucket */
54 struct Tnode *tarray; /* array of nodes in bucket */
58 #define STATE_HASH 199
59 #define POSET_CHUNK 100
61 int debug_dfa_trav = 0;
62 int debug_dfa_tran = 0;
63 int debug_dfa_followpos = 0;
66 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
67 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
69 static void term_Tnode (struct DFA_parse *parse_info);
72 del_followpos (struct DFA_parse *parse_info),
73 init_pos (struct DFA_parse *parse_info),
74 del_pos (struct DFA_parse *parse_info),
75 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
76 add_follow (struct DFA_parse *parse_info, DFASet lastpos, DFASet firstpos),
77 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
78 init_followpos (struct DFA_parse *parse_info),
79 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
80 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
81 pr_followpos (struct DFA_parse *parse_info),
83 lex (struct DFA_parse *parse_info);
86 nextchar (struct DFA_parse *parse_info, int *esc),
87 read_charset (struct DFA_parse *parse_info);
90 *str_char (unsigned c);
107 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
108 *expr_2 (struct DFA_parse *parse_info),
109 *expr_3 (struct DFA_parse *parse_info),
110 *expr_4 (struct DFA_parse *parse_info);
112 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
114 struct Tnode *t1, *t2, *tn;
116 if (!(t1 = expr_2 (parse_info)))
118 while (parse_info->lookahead == L_ALT)
121 if (!(t2 = expr_2 (parse_info)))
124 tn = mk_Tnode (parse_info);
133 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
135 struct Tnode *t1, *t2, *tn;
137 if (!(t1 = expr_3 (parse_info)))
139 while (parse_info->lookahead == L_WILD ||
140 parse_info->lookahead == L_ANYZ ||
141 parse_info->lookahead == L_ANY ||
142 parse_info->lookahead == L_LP ||
143 parse_info->lookahead == L_CHAR ||
144 parse_info->lookahead == L_CHARS)
146 if (!(t2 = expr_3 (parse_info)))
149 tn = mk_Tnode (parse_info);
158 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
160 struct Tnode *t1, *tn;
162 if (!(t1 = expr_4 (parse_info)))
164 if (parse_info->lookahead == L_CLOS0)
167 tn = mk_Tnode (parse_info);
172 else if (parse_info->lookahead == L_CLOS1)
175 tn = mk_Tnode (parse_info);
180 else if (parse_info->lookahead == L_QUEST)
183 tn = mk_Tnode(parse_info);
186 tn->u.p[1] = mk_Tnode(parse_info);
187 tn->u.p[1]->pos = EPSILON;
193 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
197 switch (parse_info->lookahead)
201 if (!(t1 = expr_1 (parse_info)))
203 if (parse_info->lookahead == L_RP)
209 t1 = mk_Tnode(parse_info);
210 t1->pos = ++parse_info->position;
211 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
215 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
219 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
223 t1 = mk_Tnode(parse_info);
225 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
226 t1->u.p[1] = mk_Tnode(parse_info);
227 t1->u.p[1]->pos = EPSILON;
231 t1 = mk_Tnode(parse_info);
233 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
241 static void do_parse (struct DFA_parse *parse_info, const char **s,
244 int start_anchor_flag = 0;
245 struct Tnode *t1, *t2, *tn;
247 parse_info->err_code = 0;
248 parse_info->expr_ptr = * (const unsigned char **) s;
250 parse_info->inside_string = 0;
252 if (parse_info->lookahead == L_START)
254 start_anchor_flag = 1;
257 if (parse_info->lookahead == L_END)
259 t1 = mk_Tnode (parse_info);
260 t1->pos = ++parse_info->position;
261 t1->u.ch[1] = t1->u.ch[0] = '\n';
266 t1 = expr_1 (parse_info);
267 if (t1 && parse_info->lookahead == L_END)
269 t2 = mk_Tnode (parse_info);
270 t2->pos = ++parse_info->position;
271 t2->u.ch[1] = t2->u.ch[0] = '\n';
273 tn = mk_Tnode (parse_info);
282 if (t1 && parse_info->lookahead == 0)
284 t2 = mk_Tnode(parse_info);
285 t2->pos = ++parse_info->position;
286 t2->u.ch[0] = -(++parse_info->rule);
287 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
289 *tnp = mk_Tnode(parse_info);
296 if (!parse_info->err_code)
298 if (parse_info->lookahead == L_RP)
299 parse_info->err_code = DFA_ERR_RP;
300 else if (parse_info->lookahead == L_LP)
301 parse_info->err_code = DFA_ERR_LP;
303 parse_info->err_code = DFA_ERR_SYNTAX;
306 *s = (const char *) parse_info->expr_ptr;
309 static int nextchar (struct DFA_parse *parse_info, int *esc)
312 if (*parse_info->expr_ptr == '\0')
314 else if (*parse_info->expr_ptr != '\\')
315 return *parse_info->expr_ptr++;
317 switch (*++parse_info->expr_ptr)
324 ++parse_info->expr_ptr;
327 ++parse_info->expr_ptr;
330 ++parse_info->expr_ptr;
333 ++parse_info->expr_ptr;
336 ++parse_info->expr_ptr;
339 return *parse_info->expr_ptr++;
343 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
345 if (*parse_info->expr_ptr == ' ')
348 return *parse_info->expr_ptr++;
350 return nextchar (parse_info, esc);
353 static int read_charset (struct DFA_parse *parse_info)
355 int i, ch0, esc0, cc = 0;
356 parse_info->look_chars = mk_BSet (&parse_info->charset);
357 res_BSet (parse_info->charset, parse_info->look_chars);
359 ch0 = nextchar_set (parse_info, &esc0);
360 if (!esc0 && ch0 == '^')
363 ch0 = nextchar_set (parse_info, &esc0);
366 ch0 is last met character
372 if (!esc0 && ch0 == ']')
374 if (!esc0 && ch0 == '-')
379 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
385 ch0 = nextchar(parse_info, &esc0);
389 if (parse_info->cmap)
393 const char *mcp = mapfrom;
395 mapto = parse_info->cmap(parse_info->cmap_data, &mcp, 1);
400 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
401 ch1 = nextchar_set (parse_info, &esc1);
403 if (!esc1 && ch1 == '-')
406 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
408 if (!esc1 && ch1 == ']')
415 ch1 = nextchar(parse_info, &esc1);
417 else if (parse_info->cmap)
421 const char *mcp = mapfrom;
423 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
427 for (i = ch0; ++i <= ch1;)
428 add_BSet (parse_info->charset, parse_info->look_chars, i);
432 ch0 = nextchar_set (parse_info, &esc0);
441 com_BSet (parse_info->charset, parse_info->look_chars);
445 static int map_l_char (struct DFA_parse *parse_info)
448 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
449 int i = 0, len = strlen(cp0);
451 if (cp0[0] == 1 && cp0[1])
453 parse_info->expr_ptr++;
454 parse_info->look_ch = ((unsigned char *) cp0)[1];
457 if (!parse_info->cmap)
460 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
463 parse_info->expr_ptr = (const unsigned char *) cp0;
464 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
465 yaz_log (YLOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
469 static int lex_sub(struct DFA_parse *parse_info)
472 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
473 if (parse_info->look_ch == '\"')
476 return map_l_char (parse_info);
477 parse_info->inside_string = !parse_info->inside_string;
479 else if (esc || parse_info->inside_string)
480 return map_l_char (parse_info);
481 else if (parse_info->look_ch == '[')
482 return read_charset(parse_info);
486 for (cc = parse_info->charMap; *cc; cc += 2)
487 if (*cc == (int) (parse_info->look_ch))
490 --parse_info->expr_ptr;
493 return map_l_char (parse_info);
498 static void lex (struct DFA_parse *parse_info)
500 parse_info->lookahead = lex_sub (parse_info);
503 static const char *str_char (unsigned c)
507 if (c < 32 || c >= 127)
520 sprintf (s+1, "x%02x", c);
542 static void out_char (int c)
544 printf ("%s", str_char (c));
547 static void term_Tnode (struct DFA_parse *parse_info)
549 struct Tblock *t, *t1;
550 for (t = parse_info->start; (t1 = t);)
558 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
562 if (parse_info->use_Tnode == parse_info->max_Tnode)
564 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
565 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
568 if (parse_info->use_Tnode == 0)
569 parse_info->start = tnew;
571 parse_info->end->next = tnew;
572 parse_info->end = tnew;
574 parse_info->max_Tnode += TADD;
576 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
579 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
581 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
582 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
588 tn0->pos = ++parse_info->position;
589 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
591 tn0->u.ch[1] = parse_info->charset->size;
594 tn0->u.ch[1] = ch1-1;
595 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
597 tn1 = mk_Tnode(parse_info);
601 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
603 tn1->pos = ++(parse_info->position);
604 if ((ch1 = travi_BSet (parse_info->charset, charset,
607 tn1->u.ch[1] = parse_info->charset->size;
610 tn1->u.ch[1] = ch1-1;
617 static void del_followpos (struct DFA_parse *parse_info)
619 ifree (parse_info->followpos);
622 static void init_pos (struct DFA_parse *parse_info)
624 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
625 * (1+parse_info->position));
628 static void del_pos (struct DFA_parse *parse_info)
630 ifree (parse_info->posar);
633 static void add_follow (struct DFA_parse *parse_info,
634 DFASet lastpos, DFASet firstpos)
638 parse_info->followpos[lastpos->value] =
639 union_DFASet (parse_info->poset,
640 parse_info->followpos[lastpos->value], firstpos);
641 lastpos = lastpos->next;
645 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
647 struct Tnode **posar = parse_info->posar;
648 DFASetType poset = parse_info->poset;
653 dfa_trav (parse_info, n->u.p[0]);
654 dfa_trav (parse_info, n->u.p[1]);
655 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
656 n->firstpos = mk_DFASet (poset);
657 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[0]->firstpos);
658 if (n->u.p[0]->nullable)
659 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[1]->firstpos);
660 n->lastpos = mk_DFASet (poset);
661 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[1]->lastpos);
662 if (n->u.p[1]->nullable)
663 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[0]->lastpos);
665 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
667 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
668 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
669 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
670 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
675 dfa_trav (parse_info, n->u.p[0]);
676 dfa_trav (parse_info, n->u.p[1]);
677 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
679 n->firstpos = merge_DFASet (poset, n->u.p[0]->firstpos,
680 n->u.p[1]->firstpos);
681 n->lastpos = merge_DFASet (poset, n->u.p[0]->lastpos,
683 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
684 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
685 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
686 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
691 dfa_trav (parse_info, n->u.p[0]);
692 n->nullable = n->u.p[0]->nullable;
693 n->firstpos = n->u.p[0]->firstpos;
694 n->lastpos = n->u.p[0]->lastpos;
695 add_follow (parse_info, n->lastpos, n->firstpos);
700 dfa_trav (parse_info, n->u.p[0]);
702 n->firstpos = n->u.p[0]->firstpos;
703 n->lastpos = n->u.p[0]->lastpos;
704 add_follow (parse_info, n->lastpos, n->firstpos);
710 n->lastpos = mk_DFASet (poset);
711 n->firstpos = mk_DFASet (poset);
718 n->firstpos = mk_DFASet (poset);
719 n->firstpos = add_DFASet (poset, n->firstpos, n->pos);
720 n->lastpos = mk_DFASet (poset);
721 n->lastpos = add_DFASet (poset, n->lastpos, n->pos);
725 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
726 else if (n->u.ch[1] > n->u.ch[0])
729 out_char (n->u.ch[0]);
730 if (n->u.ch[1] > n->u.ch[0]+1)
732 out_char (n->u.ch[1]);
736 out_char (n->u.ch[0]);
741 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
742 printf (" firstpos :");
743 pr_DFASet (poset, n->firstpos);
744 printf (" lastpos :");
745 pr_DFASet (poset, n->lastpos);
749 static void init_followpos (struct DFA_parse *parse_info)
753 parse_info->followpos = fa =
754 (DFASet *) imalloc (sizeof(DFASet) * (1+parse_info->position));
755 for (i = parse_info->position+1; --i >= 0; fa++)
756 *fa = mk_DFASet (parse_info->poset);
759 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
766 struct DFA_state *dfa_from, *dfa_to;
767 struct Tnode **posar;
773 posar = parse_info->posar;
774 max_char = parse_info->charset->size;
775 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
777 tran_set = cp_DFASet (parse_info->poset, parse_info->root->firstpos);
778 i = add_DFA_state (dfas, &tran_set, &dfa_from);
780 dfa_from->rule_no = 0;
781 poset = parse_info->poset;
782 followpos = parse_info->followpos;
783 while ((dfa_from = get_DFA_state (dfas)))
787 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
788 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
789 *pos_i++ = tran_set->value;
794 c = posar[tran_set->value]->u.ch[1];
799 dfa_from->rule_no = -i;
800 dfa_from->rule_nno = -j;
802 for (char_1 = 0; char_1 <= max_char; char_1++)
805 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
806 if (posar[i]->u.ch[1] >= char_1
807 && (c=posar[i]->u.ch[0]) < char_0)
815 if (char_0 > max_char)
820 tran_set = mk_DFASet (poset);
821 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
823 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
824 char_1 = c - 1; /* forward chunk */
825 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
826 char_1 = c; /* backward chunk */
827 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
828 tran_set = union_DFASet (poset, tran_set, followpos[i]);
832 add_DFA_state (dfas, &tran_set, &dfa_to);
833 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
838 sort_DFA_states (dfas);
841 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
844 struct DFA_tran *tran;
845 int prev_no, i, c, no;
847 for (no=0; no < dfas->no; ++no)
849 s = dfas->sortarray[no];
850 assert (s->no == no);
851 printf ("DFA state %d", no);
853 printf ("#%d", s->rule_no);
855 printf (" #%d", s->rule_nno);
858 pr_DFASet (parse_info->poset, s->set);
860 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
862 if (prev_no != tran->to)
866 printf (" goto %d on [", tran->to);
869 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
870 printf ("%s", str_char(c));
878 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
882 printf ("%d/%d tree nodes used, %ld bytes each\n",
883 parse_info->use_Tnode, parse_info->max_Tnode, (long) sizeof (struct Tnode));
884 k = inf_BSetHandle (parse_info->charset, &i, &j);
885 printf ("%ld/%ld character sets, %ld bytes each\n",
886 i/k, j/k, (long) k*sizeof(BSetWord));
887 k = inf_DFASetType (parse_info->poset, &i, &j);
888 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
889 printf ("%d DFA states\n", dfas->no);
892 static void pr_followpos (struct DFA_parse *parse_info)
894 struct Tnode **posar = parse_info->posar;
896 printf ("\nfollowsets:\n");
897 for (i=1; i <= parse_info->position; i++)
900 pr_DFASet (parse_info->poset, parse_info->followpos[i]);
903 if (posar[i]->u.ch[0] < 0)
904 printf ("#%d", -posar[i]->u.ch[0]);
905 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
908 out_char (posar[i]->u.ch[0]);
909 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
911 out_char (posar[i]->u.ch[1]);
915 out_char (posar[i]->u.ch[0]);
921 void dfa_parse_cmap_clean (struct DFA *d)
923 struct DFA_parse *dfa = d->parse_info;
928 dfa->charMapSize = 7;
929 dfa->charMap = (int *)
930 imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
935 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
937 struct DFA_parse *dfa = d->parse_info;
942 for (cp = cmap; *cp; cp += 2)
944 size = cp - cmap + 1;
945 if (size > dfa->charMapSize)
948 ifree (dfa->charMap);
949 dfa->charMapSize = size;
950 dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
952 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
955 void dfa_parse_cmap_del (struct DFA *d, int from)
957 struct DFA_parse *dfa = d->parse_info;
961 for (cc = dfa->charMap; *cc; cc += 2)
964 while ((cc[0] = cc[2]))
973 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
975 struct DFA_parse *dfa = d->parse_info;
980 for (cc = dfa->charMap; *cc; cc += 2)
986 indx = cc - dfa->charMap;
987 size = dfa->charMapSize;
990 int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
991 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
992 ifree (dfa->charMap);
994 dfa->charMapSize = size+16;
996 dfa->charMap[indx] = from;
997 dfa->charMap[indx+1] = to;
998 dfa->charMap[indx+2] = 0;
1001 void dfa_parse_cmap_thompson (struct DFA *d)
1003 static int thompson_chars[] =
1019 dfa_parse_cmap_new (d, thompson_chars);
1022 static struct DFA_parse *dfa_parse_init (void)
1024 struct DFA_parse *parse_info =
1025 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1027 parse_info->charset = mk_BSetHandle (255, 20);
1028 parse_info->position = 0;
1029 parse_info->rule = 0;
1030 parse_info->root = NULL;
1032 /* initialize the anyset which by default does not include \n */
1033 parse_info->anyset = mk_BSet (&parse_info->charset);
1034 res_BSet (parse_info->charset, parse_info->anyset);
1035 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1036 com_BSet (parse_info->charset, parse_info->anyset);
1038 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1039 parse_info->start = parse_info->end = NULL;
1040 parse_info->charMap = NULL;
1041 parse_info->charMapSize = 0;
1042 parse_info->cmap = NULL;
1046 static void rm_dfa_parse (struct DFA_parse **dfap)
1048 struct DFA_parse *parse_info = *dfap;
1049 assert (parse_info);
1050 term_Tnode(parse_info);
1051 rm_BSetHandle (&parse_info->charset);
1052 ifree (parse_info->charMap);
1057 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1059 struct DFA_states *dfas;
1060 struct DFA_parse *parse_info = dfap;
1062 assert (poset_chunk > 10);
1065 parse_info->poset = mk_DFASetType (poset_chunk);
1066 init_pos(parse_info);
1067 init_followpos(parse_info);
1068 assert (parse_info->root);
1069 dfa_trav (parse_info, parse_info->root);
1071 if (debug_dfa_followpos)
1072 pr_followpos(parse_info);
1073 init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
1074 mk_dfa_tran (parse_info, dfas);
1077 pr_tran (parse_info, dfas);
1080 pr_verbose (parse_info, dfas);
1081 del_pos(parse_info);
1082 del_followpos(parse_info);
1083 rm_DFASetType (parse_info->poset);
1087 struct DFA *dfa_init (void)
1091 dfa = (struct DFA *) imalloc (sizeof(*dfa));
1092 dfa->parse_info = dfa_parse_init ();
1093 dfa->state_info = NULL;
1095 dfa_parse_cmap_thompson (dfa);
1099 void dfa_anyset_includes_nl(struct DFA *dfa)
1101 add_BSet (dfa->parse_info->charset, dfa->parse_info->anyset, '\n');
1104 void dfa_set_cmap (struct DFA *dfa, void *vp,
1105 const char **(*cmap)(void *vp, const char **from, int len))
1107 dfa->parse_info->cmap = cmap;
1108 dfa->parse_info->cmap_data = vp;
1111 int dfa_get_last_rule (struct DFA *dfa)
1113 return dfa->parse_info->rule;
1116 int dfa_parse (struct DFA *dfa, const char **pattern)
1119 struct DFA_parse *parse_info;
1122 assert (dfa->parse_info);
1123 parse_info = dfa->parse_info;
1125 do_parse (parse_info, pattern, &top);
1126 if (parse_info->err_code)
1127 return parse_info->err_code;
1128 if ( !dfa->parse_info->root )
1129 dfa->parse_info->root = top;
1134 n = mk_Tnode (parse_info);
1136 n->u.p[0] = dfa->parse_info->root;
1138 dfa->parse_info->root = n;
1143 void dfa_mkstate (struct DFA *dfa)
1147 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1148 dfa->no_states = dfa->state_info->no;
1149 dfa->states = dfa->state_info->sortarray;
1150 rm_dfa_parse (&dfa->parse_info);
1153 void dfa_delete (struct DFA **dfap)
1157 if ((*dfap)->parse_info)
1158 rm_dfa_parse (&(*dfap)->parse_info);
1159 if ((*dfap)->state_info)
1160 rm_DFA_states (&(*dfap)->state_info);
1167 * c-file-style: "Stroustrup"
1168 * indent-tabs-mode: nil
1170 * vim: shiftwidth=4 tabstop=8 expandtab