1 /* $Id: grepper.c,v 1.15 2006-05-10 08:13:18 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
30 #include <idzebra/util.h>
31 #include <yaz/yaz-util.h>
36 static int show_line = 0;
38 typedef unsigned MatchWord;
42 int n; /* no of MatchWord needed */
43 int range; /* max no. of errors */
44 MatchWord *Sc; /* Mask Sc */
47 #define INFBUF_SIZE 16384
51 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
53 int off = state & (WORD_BITS-1);
54 int wno = state / WORD_BITS;
56 m[mc->n * ch + wno] |= 1<<off;
59 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
62 int off = state & (WORD_BITS-1);
63 int wno = state / WORD_BITS;
65 m[mc->n * ch + wno] &= ~(1<<off);
68 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
71 int off = state & (WORD_BITS-1);
72 int wno = state / WORD_BITS;
74 return m[mc->n * ch + wno] & (1<<off);
77 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
79 MatchContext *mc = imalloc (sizeof(*mc));
82 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
84 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
86 for (i=0; i<dfa->no_states; i++)
89 struct DFA_state *state = dfa->states[i];
91 for (j=0; j<state->tran_no; j++)
94 int ch0 = state->trans[j].ch[0];
95 int ch1 = state->trans[j].ch[1];
96 assert (ch0 >= 0 && ch1 >= 0);
98 for (ch = ch0; ch <= ch1; ch++)
99 set_bit (mc, mc->Sc, ch, i);
106 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
107 struct DFA *dfa, int ch)
110 MatchWord *Rsrc_p = Rsrc, mask;
113 for (j = 1; j<mc->n; j++)
118 for (j = 0; j<WORD_BITS/4; j++)
124 struct DFA_state *state = dfa->states[s];
125 int i = state->tran_no;
127 if (ch >= state->trans[i].ch[0] &&
128 ch <= state->trans[i].ch[1])
129 set_bit (mc, Rdst, 0, state->trans[i].to);
133 struct DFA_state *state = dfa->states[s+1];
134 int i = state->tran_no;
136 if (ch >= state->trans[i].ch[0] &&
137 ch <= state->trans[i].ch[1])
138 set_bit (mc, Rdst, 0, state->trans[i].to);
142 struct DFA_state *state = dfa->states[s+2];
143 int i = state->tran_no;
145 if (ch >= state->trans[i].ch[0] &&
146 ch <= state->trans[i].ch[1])
147 set_bit (mc, Rdst, 0, state->trans[i].to);
151 struct DFA_state *state = dfa->states[s+3];
152 int i = state->tran_no;
154 if (ch >= state->trans[i].ch[0] &&
155 ch <= state->trans[i].ch[1])
156 set_bit (mc, Rdst, 0, state->trans[i].to);
160 if (s >= dfa->no_states)
167 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
171 MatchWord *Rsrc_p = Rsrc, mask;
172 for (j = 0; j<mc->n; j++)
177 for (j = 0; j<WORD_BITS/4; j++)
183 struct DFA_state *state = dfa->states[s];
184 int i = state->tran_no;
186 set_bit (mc, Rdst, 0, state->trans[i].to);
190 struct DFA_state *state = dfa->states[s+1];
191 int i = state->tran_no;
193 set_bit (mc, Rdst, 0, state->trans[i].to);
197 struct DFA_state *state = dfa->states[s+2];
198 int i = state->tran_no;
200 set_bit (mc, Rdst, 0, state->trans[i].to);
204 struct DFA_state *state = dfa->states[s+3];
205 int i = state->tran_no;
207 set_bit (mc, Rdst, 0, state->trans[i].to);
211 if (s >= dfa->no_states)
218 static void or (MatchContext *mc, MatchWord *Rdst,
219 MatchWord *Rsrc1, MatchWord *Rsrc2)
222 for (i = 0; i<mc->n; i++)
223 Rdst[i] = Rsrc1[i] | Rsrc2[i];
227 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
229 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
236 infbuf = imalloc (INFBUF_SIZE);
238 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
239 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
240 Rj_a = icalloc (mc->n * sizeof(*Rj));
241 Rj_b = icalloc (mc->n * sizeof(*Rj));
242 Rj_c = icalloc (mc->n * sizeof(*Rj));
244 set_bit (mc, Rj, 0, 0);
245 for (d = 1; d<=mc->range; d++)
248 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
249 for (s = 0; s<dfa->no_states; s++)
251 if (get_bit (mc, Rj, d-1, s))
253 struct DFA_state *state = dfa->states[s];
254 int i = state->tran_no;
256 set_bit (mc, Rj, d, state->trans[i].to);
260 while ((ch = getc (inf)) != EOF)
264 infbuf[inf_ptr] = ch;
271 printf ("%5d:", lineno);
276 } while (infbuf[i] != '\n');
279 if (++i == INFBUF_SIZE)
282 } while (infbuf[i] != '\n');
287 if (++inf_ptr == INFBUF_SIZE)
289 mask_shift (mc, Rj1, Rj, dfa, ch);
290 for (d = 1; d <= mc->range; d++)
292 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
294 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
296 shift (mc, Rj_c, Rj_a, dfa);
298 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
300 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
302 for (s = 0; s<dfa->no_states; s++)
304 if (dfa->states[s]->rule_no)
305 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
308 for (d = 0; d <= mc->range; d++)
309 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
323 static int grep_file (struct DFA *dfa, const char *fname, int range)
330 inf = fopen (fname, "r");
333 yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open `%s'", fname);
340 mc = mk_MatchContext (dfa, range);
349 int main (int argc, char **argv)
354 const char *pattern = NULL;
356 struct DFA *dfa = dfa_init();
359 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
367 i = dfa_parse (dfa, &pattern);
370 fprintf (stderr, "%s: illegal pattern\n", prog);
378 grep_file (dfa, arg, range);
383 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
392 debug_dfa_followpos = 1;
405 yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
411 fprintf (stderr, "usage:\n "
412 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
415 else if (no_files == 0)
417 grep_file (dfa, NULL, range);
425 * indent-tabs-mode: nil
427 * vim: shiftwidth=4 tabstop=8 expandtab