2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.25 1999-02-02 14:50:05 adam
8 * Updated WIN32 code specific sections. Changed header.
10 * Revision 1.24 1998/10/28 10:48:55 adam
11 * Added type cast to prevent warning.
13 * Revision 1.23 1998/09/02 14:15:28 adam
14 * Zebra uses GNU Configure.
16 * Revision 1.22 1998/06/24 12:16:10 adam
17 * Support for relations on text operands. Open range support in
18 * DFA module (i.e. [-j], [g-]).
20 * Revision 1.21 1998/06/22 11:33:39 adam
21 * Added two type casts.
23 * Revision 1.20 1998/06/08 14:40:44 adam
24 * Fixed problem with signed character(s) in regular expressions.
26 * Revision 1.19 1998/01/12 14:39:39 adam
27 * Fixed bug in term_Tnode.
29 * Revision 1.18 1997/09/29 09:05:17 adam
30 * Thread safe DFA module. We simply had to put a few static vars to
31 * the DFA_parse structure.
33 * Revision 1.17 1997/09/18 08:59:17 adam
34 * Extra generic handle for the character mapping routines.
36 * Revision 1.16 1997/09/05 15:29:57 adam
37 * Changed prototype for chr_map_input - added const.
38 * Added support for C++, headers uses extern "C" for public definitions.
40 * Revision 1.15 1997/02/10 10:19:20 adam
41 * Added facility for open character sets, eg [a-].
43 * Revision 1.14 1996/10/29 13:57:22 adam
44 * Include of zebrautl.h instead of alexutil.h.
46 * Revision 1.13 1996/06/17 14:24:08 adam
47 * Bug fix: read_charset didn't handle character mapping.
49 * Revision 1.12 1996/06/04 10:20:02 adam
50 * Added support for character mapping.
52 * Revision 1.11 1996/01/08 19:15:24 adam
53 * Allow single $ in expressions.
55 * Revision 1.10 1996/01/08 09:09:17 adam
56 * Function dfa_parse got 'const' string argument.
57 * New functions to define char mappings made public.
59 * Revision 1.9 1995/12/06 12:24:58 adam
60 * Removed verbatim mode code.
62 * Revision 1.8 1995/12/06 09:09:58 adam
63 * Work on left and right anchors.
65 * Revision 1.7 1995/11/27 09:23:02 adam
66 * New berbatim hook in regular expressions. "[]n ..".
68 * Revision 1.6 1995/10/16 09:31:25 adam
71 * Revision 1.5 1995/10/02 15:17:58 adam
72 * Bug fix in dfa_delete.
74 * Revision 1.4 1995/09/28 09:18:52 adam
75 * Removed various preprocessor defines.
77 * Revision 1.3 1995/09/04 12:33:26 adam
78 * Various cleanup. YAZ util used instead.
80 * Revision 1.2 1995/01/25 11:30:50 adam
81 * Simple error reporting when parsing regular expressions.
82 * Memory usage reduced.
84 * Revision 1.1 1995/01/24 16:02:52 adam
85 * New private header file in dfa module (dfap.h).
86 * Module no longer uses yacc to parse regular expressions.
100 #define DFA_OPEN_RANGE 1
106 #define EPSILON 16004
110 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
111 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
112 /* a character in range [ch[0]..ch[1]] */
113 /* otherwise ch[0] represents */
114 /* accepting no (-acceptno) */
116 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
117 unsigned nullable : 1;
118 Set firstpos; /* first positions */
119 Set lastpos; /* last positions */
122 struct Tblock { /* Tnode bucket (block) */
123 struct Tblock *next; /* pointer to next bucket */
124 struct Tnode *tarray; /* array of nodes in bucket */
128 #define STATE_HASH 199
129 #define POSET_CHUNK 100
131 int debug_dfa_trav = 0;
132 int debug_dfa_tran = 0;
133 int debug_dfa_followpos = 0;
136 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
137 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
139 static void term_Tnode (struct DFA_parse *parse_info);
142 del_followpos (struct DFA_parse *parse_info),
143 init_pos (struct DFA_parse *parse_info),
144 del_pos (struct DFA_parse *parse_info),
145 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
146 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
147 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
148 init_followpos (struct DFA_parse *parse_info),
149 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
150 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
151 pr_followpos (struct DFA_parse *parse_info),
153 lex (struct DFA_parse *parse_info);
156 nextchar (struct DFA_parse *parse_info, int *esc),
157 read_charset (struct DFA_parse *parse_info);
160 *str_char (unsigned c);
177 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
178 *expr_2 (struct DFA_parse *parse_info),
179 *expr_3 (struct DFA_parse *parse_info),
180 *expr_4 (struct DFA_parse *parse_info);
182 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
184 struct Tnode *t1, *t2, *tn;
186 if (!(t1 = expr_2 (parse_info)))
188 while (parse_info->lookahead == L_ALT)
191 if (!(t2 = expr_2 (parse_info)))
194 tn = mk_Tnode (parse_info);
203 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
205 struct Tnode *t1, *t2, *tn;
207 if (!(t1 = expr_3 (parse_info)))
209 while (parse_info->lookahead == L_WILD ||
210 parse_info->lookahead == L_ANYZ ||
211 parse_info->lookahead == L_ANY ||
212 parse_info->lookahead == L_LP ||
213 parse_info->lookahead == L_CHAR ||
214 parse_info->lookahead == L_CHARS)
216 if (!(t2 = expr_3 (parse_info)))
219 tn = mk_Tnode (parse_info);
228 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
230 struct Tnode *t1, *tn;
232 if (!(t1 = expr_4 (parse_info)))
234 if (parse_info->lookahead == L_CLOS0)
237 tn = mk_Tnode (parse_info);
242 else if (parse_info->lookahead == L_CLOS1)
245 tn = mk_Tnode (parse_info);
250 else if (parse_info->lookahead == L_QUEST)
253 tn = mk_Tnode(parse_info);
256 tn->u.p[1] = mk_Tnode(parse_info);
257 tn->u.p[1]->pos = EPSILON;
263 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
267 switch (parse_info->lookahead)
271 if (!(t1 = expr_1 (parse_info)))
273 if (parse_info->lookahead == L_RP)
279 t1 = mk_Tnode(parse_info);
280 t1->pos = ++parse_info->position;
281 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
285 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
289 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
293 t1 = mk_Tnode(parse_info);
295 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
296 t1->u.p[1] = mk_Tnode(parse_info);
297 t1->u.p[1]->pos = EPSILON;
301 t1 = mk_Tnode(parse_info);
303 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
311 static void do_parse (struct DFA_parse *parse_info, const char **s,
314 int start_anchor_flag = 0;
315 struct Tnode *t1, *t2, *tn;
317 parse_info->err_code = 0;
318 parse_info->expr_ptr = * (const unsigned char **) s;
320 parse_info->inside_string = 0;
322 if (parse_info->lookahead == L_START)
324 start_anchor_flag = 1;
327 if (parse_info->lookahead == L_END)
329 t1 = mk_Tnode (parse_info);
330 t1->pos = ++parse_info->position;
331 t1->u.ch[1] = t1->u.ch[0] = '\n';
336 t1 = expr_1 (parse_info);
337 if (t1 && parse_info->lookahead == L_END)
339 t2 = mk_Tnode (parse_info);
340 t2->pos = ++parse_info->position;
341 t2->u.ch[1] = t2->u.ch[0] = '\n';
343 tn = mk_Tnode (parse_info);
352 if (t1 && parse_info->lookahead == 0)
354 t2 = mk_Tnode(parse_info);
355 t2->pos = ++parse_info->position;
356 t2->u.ch[0] = -(++parse_info->rule);
357 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
359 *tnp = mk_Tnode(parse_info);
366 if (!parse_info->err_code)
368 if (parse_info->lookahead == L_RP)
369 parse_info->err_code = DFA_ERR_RP;
370 else if (parse_info->lookahead == L_LP)
371 parse_info->err_code = DFA_ERR_LP;
373 parse_info->err_code = DFA_ERR_SYNTAX;
376 *s = (const char *) parse_info->expr_ptr;
379 static int nextchar (struct DFA_parse *parse_info, int *esc)
382 if (*parse_info->expr_ptr == '\0')
384 else if (*parse_info->expr_ptr != '\\')
385 return *parse_info->expr_ptr++;
387 switch (*++parse_info->expr_ptr)
394 ++parse_info->expr_ptr;
397 ++parse_info->expr_ptr;
400 ++parse_info->expr_ptr;
403 ++parse_info->expr_ptr;
406 ++parse_info->expr_ptr;
409 return *parse_info->expr_ptr++;
413 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
415 if (*parse_info->expr_ptr == ' ')
418 return *parse_info->expr_ptr++;
420 return nextchar (parse_info, esc);
423 static int read_charset (struct DFA_parse *parse_info)
425 int i, ch0, ch1, esc0, esc1, cc = 0;
426 parse_info->look_chars = mk_BSet (&parse_info->charset);
427 res_BSet (parse_info->charset, parse_info->look_chars);
429 ch0 = nextchar_set (parse_info, &esc0);
430 if (!esc0 && ch0 == '^')
433 ch0 = nextchar_set (parse_info, &esc0);
437 if (!esc0 && ch0 == ']')
439 if (!esc0 && ch0 == '-')
444 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
448 if (parse_info->cmap)
452 const char *mcp = mapfrom;
454 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
458 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
459 ch1 = nextchar_set (parse_info, &esc1);
461 if (!esc1 && ch1 == '-')
464 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
467 if (!esc1 && ch1 == ']')
473 if (!esc1 && ch1 == ']')
475 add_BSet (parse_info->charset, parse_info->look_chars, '-');
479 if (!open_range && parse_info->cmap)
483 const char *mcp = mapfrom;
485 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
489 for (i=ch0; ++i<=ch1;)
490 add_BSet (parse_info->charset, parse_info->look_chars, i);
492 ch0 = nextchar_set (parse_info, &esc0);
503 com_BSet (parse_info->charset, parse_info->look_chars);
507 static int map_l_char (struct DFA_parse *parse_info)
510 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
511 int i = 0, len = strlen(cp0);
513 if (cp0[0] == 1 && cp0[1])
515 parse_info->expr_ptr++;
516 parse_info->look_ch = ((unsigned char *) cp0)[1];
519 if (!parse_info->cmap)
522 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
525 parse_info->expr_ptr = (const unsigned char *) cp0;
526 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
527 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
531 static int lex_sub(struct DFA_parse *parse_info)
534 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
535 if (parse_info->look_ch == '\"')
538 return map_l_char (parse_info);
539 parse_info->inside_string = !parse_info->inside_string;
541 else if (esc || parse_info->inside_string)
542 return map_l_char (parse_info);
543 else if (parse_info->look_ch == '[')
544 return read_charset(parse_info);
548 for (cc = parse_info->charMap; *cc; cc += 2)
549 if (*cc == (int) (parse_info->look_ch))
552 --parse_info->expr_ptr;
555 return map_l_char (parse_info);
560 static void lex (struct DFA_parse *parse_info)
562 parse_info->lookahead = lex_sub (parse_info);
565 static const char *str_char (unsigned c)
569 if (c < 32 || c >= 127)
582 sprintf (s+1, "x%02x", c);
604 static void out_char (int c)
606 printf ("%s", str_char (c));
609 static void term_Tnode (struct DFA_parse *parse_info)
611 struct Tblock *t, *t1;
612 for (t = parse_info->start; (t1 = t);)
620 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
624 if (parse_info->use_Tnode == parse_info->max_Tnode)
626 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
627 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
630 if (parse_info->use_Tnode == 0)
631 parse_info->start = tnew;
633 parse_info->end->next = tnew;
634 parse_info->end = tnew;
636 parse_info->max_Tnode += TADD;
638 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
641 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
643 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
644 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
650 tn0->pos = ++parse_info->position;
651 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
653 tn0->u.ch[1] = parse_info->charset->size;
656 tn0->u.ch[1] = ch1-1;
657 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
659 tn1 = mk_Tnode(parse_info);
663 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
665 tn1->pos = ++(parse_info->position);
666 if ((ch1 = travi_BSet (parse_info->charset, charset,
669 tn1->u.ch[1] = parse_info->charset->size;
672 tn1->u.ch[1] = ch1-1;
679 static void del_followpos (struct DFA_parse *parse_info)
681 ifree (parse_info->followpos);
684 static void init_pos (struct DFA_parse *parse_info)
686 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
687 * (1+parse_info->position));
690 static void del_pos (struct DFA_parse *parse_info)
692 ifree (parse_info->posar);
695 static void add_follow (struct DFA_parse *parse_info,
696 Set lastpos, Set firstpos)
700 parse_info->followpos[lastpos->value] =
701 union_Set (parse_info->poset,
702 parse_info->followpos[lastpos->value], firstpos);
703 lastpos = lastpos->next;
707 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
709 struct Tnode **posar = parse_info->posar;
710 SetType poset = parse_info->poset;
715 dfa_trav (parse_info, n->u.p[0]);
716 dfa_trav (parse_info, n->u.p[1]);
717 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
718 n->firstpos = mk_Set (poset);
719 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
720 if (n->u.p[0]->nullable)
721 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
722 n->lastpos = mk_Set (poset);
723 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
724 if (n->u.p[1]->nullable)
725 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
727 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
729 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
730 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
731 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
732 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
737 dfa_trav (parse_info, n->u.p[0]);
738 dfa_trav (parse_info, n->u.p[1]);
739 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
741 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
742 n->u.p[1]->firstpos);
743 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
745 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
746 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
747 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
748 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
753 dfa_trav (parse_info, n->u.p[0]);
754 n->nullable = n->u.p[0]->nullable;
755 n->firstpos = n->u.p[0]->firstpos;
756 n->lastpos = n->u.p[0]->lastpos;
757 add_follow (parse_info, n->lastpos, n->firstpos);
762 dfa_trav (parse_info, n->u.p[0]);
764 n->firstpos = n->u.p[0]->firstpos;
765 n->lastpos = n->u.p[0]->lastpos;
766 add_follow (parse_info, n->lastpos, n->firstpos);
772 n->lastpos = mk_Set (poset);
773 n->firstpos = mk_Set (poset);
780 n->firstpos = mk_Set (poset);
781 n->firstpos = add_Set (poset, n->firstpos, n->pos);
782 n->lastpos = mk_Set (poset);
783 n->lastpos = add_Set (poset, n->lastpos, n->pos);
787 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
788 else if (n->u.ch[1] > n->u.ch[0])
791 out_char (n->u.ch[0]);
792 if (n->u.ch[1] > n->u.ch[0]+1)
794 out_char (n->u.ch[1]);
798 out_char (n->u.ch[0]);
803 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
804 printf (" firstpos :");
805 pr_Set (poset, n->firstpos);
806 printf (" lastpos :");
807 pr_Set (poset, n->lastpos);
811 static void init_followpos (struct DFA_parse *parse_info)
815 parse_info->followpos = fa =
816 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
817 for (i = parse_info->position+1; --i >= 0; fa++)
818 *fa = mk_Set (parse_info->poset);
821 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
828 struct DFA_state *dfa_from, *dfa_to;
829 struct Tnode **posar;
835 posar = parse_info->posar;
836 max_char = parse_info->charset->size;
837 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
839 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
840 i = add_DFA_state (dfas, &tran_set, &dfa_from);
842 dfa_from->rule_no = 0;
843 poset = parse_info->poset;
844 followpos = parse_info->followpos;
845 while ((dfa_from = get_DFA_state (dfas)))
849 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
850 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
851 *pos_i++ = tran_set->value;
856 c = posar[tran_set->value]->u.ch[1];
861 dfa_from->rule_no = -i;
862 dfa_from->rule_nno = -j;
864 for (char_1 = 0; char_1 <= max_char; char_1++)
867 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
868 if (posar[i]->u.ch[1] >= char_1
869 && (c=posar[i]->u.ch[0]) < char_0)
877 if (char_0 > max_char)
882 tran_set = mk_Set (poset);
883 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
885 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
886 char_1 = c - 1; /* forward chunk */
887 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
888 char_1 = c; /* backward chunk */
889 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
890 tran_set = union_Set (poset, tran_set, followpos[i]);
894 add_DFA_state (dfas, &tran_set, &dfa_to);
895 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
900 sort_DFA_states (dfas);
903 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
906 struct DFA_tran *tran;
907 int prev_no, i, c, no;
909 for (no=0; no < dfas->no; ++no)
911 s = dfas->sortarray[no];
912 assert (s->no == no);
913 printf ("DFA state %d", no);
915 printf ("#%d", s->rule_no);
917 printf (" #%d", s->rule_nno);
920 pr_Set (parse_info->poset, s->set);
922 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
924 if (prev_no != tran->to)
928 printf (" goto %d on [", tran->to);
931 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
932 printf ("%s", str_char(c));
940 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
944 printf ("%d/%d tree nodes used, %d bytes each\n",
945 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
946 k = inf_BSetHandle (parse_info->charset, &i, &j);
947 printf ("%ld/%ld character sets, %d bytes each\n",
948 i/k, j/k, k*sizeof(BSetWord));
949 k = inf_SetType (parse_info->poset, &i, &j);
950 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
951 printf ("%d DFA states\n", dfas->no);
954 static void pr_followpos (struct DFA_parse *parse_info)
956 struct Tnode **posar = parse_info->posar;
958 printf ("\nfollowsets:\n");
959 for (i=1; i <= parse_info->position; i++)
962 pr_Set (parse_info->poset, parse_info->followpos[i]);
965 if (posar[i]->u.ch[0] < 0)
966 printf ("#%d", -posar[i]->u.ch[0]);
967 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
970 out_char (posar[i]->u.ch[0]);
971 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
973 out_char (posar[i]->u.ch[1]);
977 out_char (posar[i]->u.ch[0]);
983 void dfa_parse_cmap_clean (struct DFA *d)
985 struct DFA_parse *dfa = d->parse_info;
990 dfa->charMapSize = 7;
991 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
996 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
998 struct DFA_parse *dfa = d->parse_info;
1003 for (cp = cmap; *cp; cp += 2)
1005 size = cp - cmap + 1;
1006 if (size > dfa->charMapSize)
1009 ifree (dfa->charMap);
1010 dfa->charMapSize = size;
1011 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
1013 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
1016 void dfa_parse_cmap_del (struct DFA *d, int from)
1018 struct DFA_parse *dfa = d->parse_info;
1022 for (cc = dfa->charMap; *cc; cc += 2)
1025 while ((cc[0] = cc[2]))
1034 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1036 struct DFA_parse *dfa = d->parse_info;
1041 for (cc = dfa->charMap; *cc; cc += 2)
1047 indx = cc - dfa->charMap;
1048 size = dfa->charMapSize;
1051 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1052 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1053 ifree (dfa->charMap);
1055 dfa->charMapSize = size+16;
1057 dfa->charMap[indx] = from;
1058 dfa->charMap[indx+1] = to;
1059 dfa->charMap[indx+2] = 0;
1062 void dfa_parse_cmap_thompson (struct DFA *d)
1064 static int thompson_chars[] =
1080 dfa_parse_cmap_new (d, thompson_chars);
1083 static struct DFA_parse *dfa_parse_init (void)
1085 struct DFA_parse *parse_info =
1086 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1088 parse_info->charset = mk_BSetHandle (255, 20);
1089 parse_info->position = 0;
1090 parse_info->rule = 0;
1091 parse_info->root = NULL;
1093 parse_info->anyset = mk_BSet (&parse_info->charset);
1094 res_BSet (parse_info->charset, parse_info->anyset);
1095 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1096 com_BSet (parse_info->charset, parse_info->anyset);
1097 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1098 parse_info->start = parse_info->end = NULL;
1099 parse_info->charMap = NULL;
1100 parse_info->charMapSize = 0;
1101 parse_info->cmap = NULL;
1105 static void rm_dfa_parse (struct DFA_parse **dfap)
1107 struct DFA_parse *parse_info = *dfap;
1108 assert (parse_info);
1109 term_Tnode(parse_info);
1110 rm_BSetHandle (&parse_info->charset);
1111 ifree (parse_info->charMap);
1116 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1118 struct DFA_states *dfas;
1119 struct DFA_parse *parse_info = dfap;
1121 assert (poset_chunk > 10);
1124 parse_info->poset = mk_SetType (poset_chunk);
1125 init_pos(parse_info);
1126 init_followpos(parse_info);
1127 assert (parse_info->root);
1128 dfa_trav (parse_info, parse_info->root);
1130 if (debug_dfa_followpos)
1131 pr_followpos(parse_info);
1132 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1133 mk_dfa_tran (parse_info, dfas);
1135 pr_tran (parse_info, dfas);
1137 pr_verbose (parse_info, dfas);
1138 del_pos(parse_info);
1139 del_followpos(parse_info);
1140 rm_SetType (parse_info->poset);
1144 struct DFA *dfa_init (void)
1148 dfa = imalloc (sizeof(*dfa));
1149 dfa->parse_info = dfa_parse_init ();
1150 dfa->state_info = NULL;
1152 dfa_parse_cmap_thompson (dfa);
1156 void dfa_set_cmap (struct DFA *dfa, void *vp,
1157 const char **(*cmap)(void *vp, const char **from, int len))
1159 dfa->parse_info->cmap = cmap;
1160 dfa->parse_info->cmap_data = vp;
1163 int dfa_parse (struct DFA *dfa, const char **pattern)
1166 struct DFA_parse *parse_info;
1169 assert (dfa->parse_info);
1170 parse_info = dfa->parse_info;
1171 do_parse (parse_info, pattern, &top);
1172 if (parse_info->err_code)
1173 return parse_info->err_code;
1174 if ( !dfa->parse_info->root )
1175 dfa->parse_info->root = top;
1180 n = mk_Tnode (parse_info);
1182 n->u.p[0] = dfa->parse_info->root;
1184 dfa->parse_info->root = n;
1189 void dfa_mkstate (struct DFA *dfa)
1193 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1194 dfa->no_states = dfa->state_info->no;
1195 dfa->states = dfa->state_info->sortarray;
1196 rm_dfa_parse (&dfa->parse_info);
1199 void dfa_delete (struct DFA **dfap)
1203 if ((*dfap)->parse_info)
1204 rm_dfa_parse (&(*dfap)->parse_info);
1205 if ((*dfap)->state_info)
1206 rm_DFA_states (&(*dfap)->state_info);