2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.8 1999-02-02 14:50:07 adam
8 * Updated WIN32 code specific sections. Changed header.
10 * Revision 1.7 1996/10/29 13:57:24 adam
11 * Include of zebrautl.h instead of alexutil.h.
13 * Revision 1.6 1996/01/08 09:09:20 adam
14 * Function dfa_parse got 'const' string argument.
15 * New functions to define char mappings made public.
17 * Revision 1.5 1995/09/04 12:33:26 adam
18 * Various cleanup. YAZ util used instead.
20 * Revision 1.4 1995/01/24 16:00:21 adam
21 * Added -ansi to CFLAGS.
22 * Some changes to the dfa module.
24 * Revision 1.3 1994/10/04 17:46:43 adam
25 * Function options now returns arg with error option.
27 * Revision 1.2 1994/10/03 17:22:18 adam
28 * Optimization of grepper.
30 * Revision 1.1 1994/09/27 16:31:18 adam
31 * First version of grepper: grep with error correction.
45 static int show_line = 0;
47 typedef unsigned MatchWord;
51 int n; /* no of MatchWord needed */
52 int range; /* max no. of errors */
53 MatchWord *Sc; /* Mask Sc */
56 #define INFBUF_SIZE 16384
60 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
62 int off = state & (WORD_BITS-1);
63 int wno = state / WORD_BITS;
65 m[mc->n * ch + wno] |= 1<<off;
68 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
71 int off = state & (WORD_BITS-1);
72 int wno = state / WORD_BITS;
74 m[mc->n * ch + wno] &= ~(1<<off);
77 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
80 int off = state & (WORD_BITS-1);
81 int wno = state / WORD_BITS;
83 return m[mc->n * ch + wno] & (1<<off);
86 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
88 MatchContext *mc = imalloc (sizeof(*mc));
91 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
93 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
95 for (i=0; i<dfa->no_states; i++)
98 struct DFA_state *state = dfa->states[i];
100 for (j=0; j<state->tran_no; j++)
103 int ch0 = state->trans[j].ch[0];
104 int ch1 = state->trans[j].ch[1];
105 assert (ch0 >= 0 && ch1 >= 0);
107 for (ch = ch0; ch <= ch1; ch++)
108 set_bit (mc, mc->Sc, ch, i);
115 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
116 struct DFA *dfa, int ch)
119 MatchWord *Rsrc_p = Rsrc, mask;
122 for (j = 1; j<mc->n; j++)
127 for (j = 0; j<WORD_BITS/4; j++)
133 struct DFA_state *state = dfa->states[s];
134 int i = state->tran_no;
136 if (ch >= state->trans[i].ch[0] &&
137 ch <= state->trans[i].ch[1])
138 set_bit (mc, Rdst, 0, state->trans[i].to);
142 struct DFA_state *state = dfa->states[s+1];
143 int i = state->tran_no;
145 if (ch >= state->trans[i].ch[0] &&
146 ch <= state->trans[i].ch[1])
147 set_bit (mc, Rdst, 0, state->trans[i].to);
151 struct DFA_state *state = dfa->states[s+2];
152 int i = state->tran_no;
154 if (ch >= state->trans[i].ch[0] &&
155 ch <= state->trans[i].ch[1])
156 set_bit (mc, Rdst, 0, state->trans[i].to);
160 struct DFA_state *state = dfa->states[s+3];
161 int i = state->tran_no;
163 if (ch >= state->trans[i].ch[0] &&
164 ch <= state->trans[i].ch[1])
165 set_bit (mc, Rdst, 0, state->trans[i].to);
169 if (s >= dfa->no_states)
176 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
180 MatchWord *Rsrc_p = Rsrc, mask;
181 for (j = 0; j<mc->n; j++)
186 for (j = 0; j<WORD_BITS/4; j++)
192 struct DFA_state *state = dfa->states[s];
193 int i = state->tran_no;
195 set_bit (mc, Rdst, 0, state->trans[i].to);
199 struct DFA_state *state = dfa->states[s+1];
200 int i = state->tran_no;
202 set_bit (mc, Rdst, 0, state->trans[i].to);
206 struct DFA_state *state = dfa->states[s+2];
207 int i = state->tran_no;
209 set_bit (mc, Rdst, 0, state->trans[i].to);
213 struct DFA_state *state = dfa->states[s+3];
214 int i = state->tran_no;
216 set_bit (mc, Rdst, 0, state->trans[i].to);
220 if (s >= dfa->no_states)
227 static void or (MatchContext *mc, MatchWord *Rdst,
228 MatchWord *Rsrc1, MatchWord *Rsrc2)
231 for (i = 0; i<mc->n; i++)
232 Rdst[i] = Rsrc1[i] | Rsrc2[i];
236 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
238 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
245 infbuf = imalloc (INFBUF_SIZE);
247 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
248 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
249 Rj_a = icalloc (mc->n * sizeof(*Rj));
250 Rj_b = icalloc (mc->n * sizeof(*Rj));
251 Rj_c = icalloc (mc->n * sizeof(*Rj));
253 set_bit (mc, Rj, 0, 0);
254 for (d = 1; d<=mc->range; d++)
257 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
258 for (s = 0; s<dfa->no_states; s++)
260 if (get_bit (mc, Rj, d-1, s))
262 struct DFA_state *state = dfa->states[s];
263 int i = state->tran_no;
265 set_bit (mc, Rj, d, state->trans[i].to);
269 while ((ch = getc (inf)) != EOF)
273 infbuf[inf_ptr] = ch;
280 printf ("%5d:", lineno);
285 } while (infbuf[i] != '\n');
288 if (++i == INFBUF_SIZE)
291 } while (infbuf[i] != '\n');
296 if (++inf_ptr == INFBUF_SIZE)
298 mask_shift (mc, Rj1, Rj, dfa, ch);
299 for (d = 1; d <= mc->range; d++)
301 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
303 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
305 shift (mc, Rj_c, Rj_a, dfa);
307 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
309 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
311 for (s = 0; s<dfa->no_states; s++)
313 if (dfa->states[s]->rule_no)
314 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
317 for (d = 0; d <= mc->range; d++)
318 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
332 static int grep_file (struct DFA *dfa, const char *fname, int range)
339 inf = fopen (fname, "r");
342 logf (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
349 mc = mk_MatchContext (dfa, range);
358 int main (int argc, char **argv)
363 const char *pattern = NULL;
365 struct DFA *dfa = dfa_init();
368 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
376 i = dfa_parse (dfa, &pattern);
379 fprintf (stderr, "%s: illegal pattern\n", prog);
387 grep_file (dfa, arg, range);
392 log_init (log_mask_str(arg), prog, NULL);
401 debug_dfa_followpos = 1;
414 logf (LOG_FATAL, "Unknown option '-%s'", arg);
420 fprintf (stderr, "usage:\n "
421 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
424 else if (no_files == 0)
426 grep_file (dfa, NULL, range);