1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #include <yaz/tpath.h>
27 #include <idzebra/util.h>
29 #include <idzebra/recgrs.h>
34 #if MAJOR_VERSION >= 8
35 #define HAVE_TCL_OBJECTS
41 #define F_WIN_EOF 2000000000
45 #define REGX_PATTERN 1
50 #define REGX_CONTEXT 6
60 struct lexRuleAction {
64 struct DFA *dfa; /* REGX_PATTERN */
67 struct regxCode *code; /* REGX_CODE */
69 struct lexRuleAction *next;
74 struct lexRuleAction *actionList;
78 struct lexRuleInfo info;
85 struct lexRule *rules;
86 struct lexRuleInfo **fastRule;
90 struct lexRuleAction *beginActionList;
91 struct lexRuleAction *endActionList;
92 struct lexRuleAction *initActionList;
93 struct lexContext *next;
103 struct lexContext *context;
105 struct lexContext **context_stack;
106 int context_stack_size;
107 int context_stack_top;
113 Tcl_Interp *tcl_interp;
115 struct ZebraRecStream *stream;
116 off_t (*f_win_ef)(struct ZebraRecStream *s, off_t *);
118 int f_win_start; /* first byte of buffer is this file offset */
119 int f_win_end; /* last byte of buffer is this offset - 1 */
120 int f_win_size; /* size of buffer */
121 char *f_win_buf; /* buffer itself */
122 int (*f_win_rf)(struct ZebraRecStream *, char *, size_t);
123 off_t (*f_win_sf)(struct ZebraRecStream *, off_t);
125 struct lexConcatBuf *concatBuf;
127 data1_node **d1_stack;
138 struct lexSpec *spec;
142 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
145 int i, r, off = start_pos - spec->f_win_start;
147 if (off >= 0 && end_pos <= spec->f_win_end)
149 *size = end_pos - start_pos;
150 return spec->f_win_buf + off;
152 if (off < 0 || start_pos >= spec->f_win_end)
154 (*spec->f_win_sf)(spec->stream, start_pos);
155 spec->f_win_start = start_pos;
157 if (!spec->f_win_buf)
158 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
159 *size = (*spec->f_win_rf)(spec->stream, spec->f_win_buf,
161 spec->f_win_end = spec->f_win_start + *size;
163 if (*size > end_pos - start_pos)
164 *size = end_pos - start_pos;
165 return spec->f_win_buf;
167 for (i = 0; i<spec->f_win_end - start_pos; i++)
168 spec->f_win_buf[i] = spec->f_win_buf[i + off];
169 r = (*spec->f_win_rf)(spec->stream,
171 spec->f_win_size - i);
172 spec->f_win_start = start_pos;
173 spec->f_win_end += r;
175 if (*size > end_pos - start_pos)
176 *size = end_pos - start_pos;
177 return spec->f_win_buf;
180 static int f_win_advance (struct lexSpec *spec, int *pos)
185 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
186 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
187 if (*pos == F_WIN_EOF)
189 buf = f_win_get (spec, *pos, *pos+1, &size);
199 static void regxCodeDel (struct regxCode **pp)
201 struct regxCode *p = *pp;
206 Tcl_DecrRefCount (p->tcl_obj);
214 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
218 p = (struct regxCode *) xmalloc (sizeof(*p));
219 p->str = (char *) xmalloc (len+1);
220 memcpy (p->str, buf, len);
223 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
225 Tcl_IncrRefCount (p->tcl_obj);
230 static struct DFA *lexSpecDFA (void)
235 dfa_parse_cmap_del (dfa, ' ');
236 dfa_parse_cmap_del (dfa, '\t');
237 dfa_parse_cmap_add (dfa, '/', 0);
241 static void actionListDel (struct lexRuleAction **rap)
243 struct lexRuleAction *ra1, *ra;
245 for (ra = *rap; ra; ra = ra1)
251 dfa_delete (&ra->u.pattern.dfa);
254 regxCodeDel (&ra->u.code);
262 static struct lexContext *lexContextCreate (const char *name)
264 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
266 p->name = xstrdup (name);
269 p->dfa = lexSpecDFA ();
272 p->beginActionList = NULL;
273 p->endActionList = NULL;
274 p->initActionList = NULL;
279 static void lexContextDestroy (struct lexContext *p)
281 struct lexRule *rp, *rp1;
283 dfa_delete (&p->dfa);
285 for (rp = p->rules; rp; rp = rp1)
288 actionListDel (&rp->info.actionList);
291 actionListDel (&p->beginActionList);
292 actionListDel (&p->endActionList);
293 actionListDel (&p->initActionList);
298 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
303 p = (struct lexSpec *) xmalloc (sizeof(*p));
304 p->name = (char *) xmalloc (strlen(name)+1);
305 strcpy (p->name, name);
312 p->context_stack_size = 100;
313 p->context_stack = (struct lexContext **)
314 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
318 p->concatBuf = (struct lexConcatBuf *)
319 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
320 for (i = 0; i < p->maxLevel; i++)
322 p->concatBuf[i].max = 0;
323 p->concatBuf[i].buf = 0;
325 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
330 static void lexSpecDestroy (struct lexSpec **pp)
333 struct lexContext *lt;
341 for (i = 0; i < p->maxLevel; i++)
342 xfree (p->concatBuf[i].buf);
343 xfree (p->concatBuf);
348 struct lexContext *lt_next = lt->next;
349 lexContextDestroy (lt);
354 Tcl_DeleteInterp (p->tcl_interp);
357 xfree (p->f_win_buf);
358 xfree (p->context_stack);
364 static int readParseToken (const char **cpp, int *len)
366 const char *cp = *cpp;
370 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
399 if (*cp >= 'a' && *cp <= 'z')
401 else if (*cp >= 'A' && *cp <= 'Z')
402 cmd[i] = *cp + 'a' - 'A';
405 if (i < (int) sizeof(cmd)-2)
412 yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp);
414 while (*cp && *cp != ' ' && *cp != '\t' &&
415 *cp != '\n' && *cp != '\r')
421 if (!strcmp (cmd, "begin"))
423 else if (!strcmp (cmd, "end"))
425 else if (!strcmp (cmd, "body"))
427 else if (!strcmp (cmd, "context"))
429 else if (!strcmp (cmd, "init"))
433 yaz_log (YLOG_WARN, "bad command %s", cmd);
439 static int actionListMk (struct lexSpec *spec, const char *s,
440 struct lexRuleAction **ap)
446 while ((tok = readParseToken (&s, &len)))
454 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
456 regxCodeMk (&(*ap)->u.code, s, len);
460 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
462 (*ap)->u.pattern.body = bodyMark;
464 (*ap)->u.pattern.dfa = lexSpecDFA ();
466 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
472 yaz_log(YLOG_WARN, "regular expression error '%.*s'", pos, s0);
479 printf("pattern: %.*s\n", pos, s0);
480 dfa_mkstate((*ap)->u.pattern.dfa);
485 yaz_log (YLOG_WARN, "cannot use BEGIN here");
488 yaz_log (YLOG_WARN, "cannot use INIT here");
491 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
501 int readOneSpec (struct lexSpec *spec, const char *s)
505 struct lexContext *lc;
507 tok = readParseToken (&s, &len);
508 if (tok == REGX_CONTEXT)
510 char context_name[32];
511 tok = readParseToken (&s, &len);
512 if (tok != REGX_CODE)
514 yaz_log (YLOG_WARN, "missing name after CONTEXT keyword");
519 memcpy (context_name, s, len);
520 context_name[len] = '\0';
521 lc = lexContextCreate (context_name);
522 lc->next = spec->context;
527 spec->context = lexContextCreate ("main");
532 actionListDel (&spec->context->beginActionList);
533 actionListMk (spec, s, &spec->context->beginActionList);
536 actionListDel (&spec->context->endActionList);
537 actionListMk (spec, s, &spec->context->endActionList);
540 actionListDel (&spec->context->initActionList);
541 actionListMk (spec, s, &spec->context->initActionList);
545 yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s);
547 r = dfa_parse (spec->context->dfa, &s);
550 yaz_log (YLOG_WARN, "regular expression error. r=%d", r);
555 yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s);
559 rp = (struct lexRule *) xmalloc (sizeof(*rp));
560 rp->info.no = spec->context->ruleNo++;
561 rp->next = spec->context->rules;
562 spec->context->rules = rp;
563 actionListMk (spec, s, &rp->info.actionList);
568 int readFileSpec (struct lexSpec *spec)
570 struct lexContext *lc;
571 int c, i, errors = 0;
577 if (spec->tcl_interp)
579 sprintf (fname, "%s.tflt", spec->name);
580 spec_inf = data1_path_fopen (spec->dh, fname, "r");
585 sprintf (fname, "%s.flt", spec->name);
586 spec_inf = data1_path_fopen (spec->dh, fname, "r");
590 yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name);
593 yaz_log (YLOG_LOG, "reading regx filter %s", fname);
595 if (spec->tcl_interp)
596 yaz_log (YLOG_LOG, "Tcl enabled");
602 debug_dfa_followpos = 0;
606 lineBuf = wrbuf_alloc();
611 wrbuf_rewind (lineBuf);
612 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
614 while (c != '\n' && c != EOF)
627 wrbuf_putc(lineBuf, c);
635 if (c != ' ' && c != '\t')
640 wrbuf_putc(lineBuf, '\0');
641 readOneSpec (spec, wrbuf_buf(lineBuf));
642 spec->lineNo += addLine;
646 wrbuf_destroy(lineBuf);
648 for (lc = spec->context; lc; lc = lc->next)
651 lc->fastRule = (struct lexRuleInfo **)
652 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
653 for (i = 0; i < lc->ruleNo; i++)
654 lc->fastRule[i] = NULL;
655 for (rp = lc->rules; rp; rp = rp->next)
656 lc->fastRule[rp->info.no] = &rp->info;
657 dfa_mkstate (lc->dfa);
666 static struct lexSpec *curLexSpec = NULL;
669 static void execData (struct lexSpec *spec,
670 const char *ebuf, int elen, int formatted_text,
671 const char *attribute_str, int attribute_len)
673 struct data1_node *res, *parent;
676 if (elen == 0) /* shouldn't happen, but it does! */
680 yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
681 ebuf, 40, ebuf + elen-40);
682 else if (elen == 1 && ebuf[0] == '\n')
684 yaz_log (YLOG_LOG, "data(new line)");
687 yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
689 yaz_log (YLOG_LOG, "data(%d bytes)", elen);
692 if (spec->d1_level <= 1)
695 parent = spec->d1_stack[spec->d1_level -1];
702 if (res->which != DATA1N_tag)
704 /* sweep through exising attributes.. */
705 for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next)
706 if (strlen((*ap)->name) == attribute_len &&
707 !memcmp((*ap)->name, attribute_str, attribute_len))
711 /* new attribute. Create it with name + value */
712 *ap = nmem_malloc(spec->m, sizeof(**ap));
714 (*ap)->name = nmem_malloc(spec->m, attribute_len+1);
715 memcpy((*ap)->name, attribute_str, attribute_len);
716 (*ap)->name[attribute_len] = '\0';
718 (*ap)->value = nmem_malloc(spec->m, elen+1);
719 memcpy((*ap)->value, ebuf, elen);
720 (*ap)->value[elen] = '\0';
725 /* append to value if attribute already exists */
726 char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value));
727 strcpy(nv, (*ap)->value);
728 memcpy (nv + strlen(nv), ebuf, elen);
729 nv[strlen(nv)+elen] = '\0';
735 if ((res = spec->d1_stack[spec->d1_level]) &&
736 res->which == DATA1N_data)
737 org_len = res->u.data.len;
742 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
743 res->u.data.what = DATA1I_text;
745 res->u.data.formatted_text = formatted_text;
746 res->u.data.data = 0;
748 if (spec->d1_stack[spec->d1_level])
749 spec->d1_stack[spec->d1_level]->next = res;
750 spec->d1_stack[spec->d1_level] = res;
752 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
754 char *old_buf, *new_buf;
756 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
757 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
758 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
760 memcpy (new_buf, old_buf, org_len);
763 spec->concatBuf[spec->d1_level].buf = new_buf;
765 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
766 res->u.data.len += elen;
770 static void execDataP (struct lexSpec *spec,
771 const char *ebuf, int elen, int formatted_text)
773 execData (spec, ebuf, elen, formatted_text, 0, 0);
776 static void tagDataRelease (struct lexSpec *spec)
780 if ((res = spec->d1_stack[spec->d1_level]) &&
781 res->which == DATA1N_data &&
782 res->u.data.what == DATA1I_text)
784 assert (!res->u.data.data);
785 assert (res->u.data.len > 0);
786 if (res->u.data.len > DATA1_LOCALDATA)
787 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
789 res->u.data.data = res->lbuf;
790 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
795 static void variantBegin (struct lexSpec *spec,
796 const char *class_str, int class_len,
797 const char *type_str, int type_len,
798 const char *value_str, int value_len)
800 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
801 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
806 if (spec->d1_level == 0)
808 yaz_log (YLOG_WARN, "in variant begin. No record type defined");
811 if (class_len >= DATA1_MAX_SYMBOL)
812 class_len = DATA1_MAX_SYMBOL-1;
813 memcpy (tclass, class_str, class_len);
814 tclass[class_len] = '\0';
816 if (type_len >= DATA1_MAX_SYMBOL)
817 type_len = DATA1_MAX_SYMBOL-1;
818 memcpy (ttype, type_str, type_len);
819 ttype[type_len] = '\0';
822 yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
827 data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn,
831 if (parent->which != DATA1N_variant)
833 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
834 if (spec->d1_stack[spec->d1_level])
835 tagDataRelease (spec);
836 spec->d1_stack[spec->d1_level] = res;
837 spec->d1_stack[++(spec->d1_level)] = NULL;
839 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
840 if (spec->d1_stack[i]->u.variant.type == tp)
847 yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level);
849 parent = spec->d1_stack[spec->d1_level-1];
850 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
851 res->u.variant.type = tp;
853 if (value_len >= DATA1_LOCALDATA)
854 value_len =DATA1_LOCALDATA-1;
855 memcpy (res->lbuf, value_str, value_len);
856 res->lbuf[value_len] = '\0';
858 res->u.variant.value = res->lbuf;
860 if (spec->d1_stack[spec->d1_level])
861 tagDataRelease (spec);
862 spec->d1_stack[spec->d1_level] = res;
863 spec->d1_stack[++(spec->d1_level)] = NULL;
866 static void tagStrip (const char **tag, int *len)
870 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
873 for (i = 0; i < *len && isspace((*tag)[i]); i++)
879 static void tagBegin (struct lexSpec *spec,
880 const char *tag, int len)
882 if (spec->d1_level == 0)
884 yaz_log (YLOG_WARN, "in element begin. No record type defined");
887 tagStrip (&tag, &len);
888 if (spec->d1_stack[spec->d1_level])
889 tagDataRelease (spec);
892 yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
895 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
896 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
897 spec->d1_stack[++(spec->d1_level)] = NULL;
900 static void tagEnd (struct lexSpec *spec, int min_level,
901 const char *tag, int len)
903 tagStrip (&tag, &len);
904 while (spec->d1_level > min_level)
906 tagDataRelease (spec);
908 if (spec->d1_level == 0)
910 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
912 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
914 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
918 yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level);
923 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
924 struct DFA *dfa, int greedy)
926 struct DFA_state *state = dfa->states[0];
929 unsigned char c_prev = 0;
930 int ptr = *pptr; /* current pointer */
931 int start_ptr = *pptr; /* first char of match */
932 int last_ptr = 0; /* last char of match */
933 int last_rule = 0; /* rule number of current match */
940 c = f_win_advance (spec, &ptr);
944 if (dfa->states[0] == state)
949 c = f_win_advance (spec, &ptr);
951 if (ptr == F_WIN_EOF)
965 if (--i < 0) /* no transition for character c */
969 *mptr = start_ptr; /* match starts here */
970 *pptr = last_ptr; /* match end here (+1) */
973 state = dfa->states[0];
976 c = f_win_advance (spec, &ptr);
982 else if (c >= t->ch[0] && c <= t->ch[1])
984 state = dfa->states[t->to];
985 if (state->rule_no && c_prev == '\n')
987 last_rule = state->rule_no;
990 else if (state->rule_nno)
992 last_rule = state->rule_nno;
1003 static int execTok (struct lexSpec *spec, const char **src,
1004 const char **tokBuf, int *tokLen)
1006 const char *s = *src;
1008 while (*s == ' ' || *s == '\t')
1012 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
1016 while (*s >= '0' && *s <= '9')
1017 n = n*10 + (*s++ -'0');
1018 if (spec->arg_no == 0)
1025 if (n >= spec->arg_no)
1027 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
1031 else if (*s == '\"')
1034 while (*s && *s != '\"')
1036 *tokLen = s - *tokBuf;
1041 else if (*s == '\n' || *s == ';')
1049 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1052 *tokLen = s - *tokBuf;
1059 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1062 *tokLen = s - *tokBuf;
1068 static char *regxStrz (const char *src, int len, char *str)
1072 memcpy (str, src, len);
1078 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1079 int argc, const char **argv)
1081 struct lexSpec *spec = (struct lexSpec *) clientData;
1084 if (!strcmp(argv[1], "record") && argc == 3)
1086 const char *absynName = argv[2];
1090 yaz_log (YLOG_LOG, "begin record %s", absynName);
1092 res = data1_mk_root (spec->dh, spec->m, absynName);
1096 spec->d1_stack[spec->d1_level++] = res;
1098 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1100 spec->d1_stack[spec->d1_level++] = res;
1102 spec->d1_stack[spec->d1_level] = NULL;
1104 else if (!strcmp(argv[1], "element") && argc == 3)
1106 tagBegin (spec, argv[2], strlen(argv[2]));
1108 else if (!strcmp (argv[1], "variant") && argc == 5)
1110 variantBegin (spec, argv[2], strlen(argv[2]),
1111 argv[3], strlen(argv[3]),
1112 argv[4], strlen(argv[4]));
1114 else if (!strcmp (argv[1], "context") && argc == 3)
1116 struct lexContext *lc = spec->context;
1118 yaz_log (YLOG_LOG, "begin context %s",argv[2]);
1120 while (lc && strcmp (argv[2], lc->name))
1124 spec->context_stack[++(spec->context_stack_top)] = lc;
1127 yaz_log (YLOG_WARN, "unknown context %s", argv[2]);
1134 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1135 int argc, const char **argv)
1137 struct lexSpec *spec = (struct lexSpec *) clientData;
1141 if (!strcmp (argv[1], "record"))
1143 while (spec->d1_level)
1145 tagDataRelease (spec);
1149 yaz_log (YLOG_LOG, "end record");
1151 spec->stop_flag = 1;
1153 else if (!strcmp (argv[1], "element"))
1156 const char *element = 0;
1157 if (argc >= 3 && !strcmp(argv[2], "-record"))
1166 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1167 if (spec->d1_level <= 1)
1170 yaz_log (YLOG_LOG, "end element end records");
1172 spec->stop_flag = 1;
1175 else if (!strcmp (argv[1], "context"))
1178 yaz_log (YLOG_LOG, "end context");
1180 if (spec->context_stack_top)
1181 (spec->context_stack_top)--;
1188 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1189 int argc, const char **argv)
1193 const char *element = 0;
1194 const char *attribute = 0;
1195 struct lexSpec *spec = (struct lexSpec *) clientData;
1199 if (!strcmp("-text", argv[argi]))
1204 else if (!strcmp("-element", argv[argi]))
1208 element = argv[argi++];
1210 else if (!strcmp("-attribute", argv[argi]))
1214 attribute = argv[argi++];
1220 tagBegin (spec, element, strlen(element));
1224 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1226 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1227 execData (spec, native, strlen(native), textFlag, attribute,
1228 attribute ? strlen(attribute) : 0);
1229 Tcl_DStringFree (&ds);
1231 execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute,
1232 attribute ? strlen(attribute) : 0);
1237 tagEnd (spec, 2, NULL, 0);
1241 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1242 int argc, const char **argv)
1244 struct lexSpec *spec = (struct lexSpec *) clientData;
1251 if (!strcmp("-offset", argv[argi]))
1256 offset = atoi(argv[argi]);
1265 no = atoi(argv[argi]);
1266 if (no >= spec->arg_no)
1267 no = spec->arg_no - 1;
1268 spec->ptr = spec->arg_start[no] + offset;
1272 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1276 for (i = 0; i < spec->arg_no; i++)
1278 char var_name[10], *var_buf;
1281 sprintf (var_name, "%d", i);
1282 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1286 ch = var_buf[var_len];
1287 var_buf[var_len] = '\0';
1288 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1289 var_buf[var_len] = ch;
1292 #if HAVE_TCL_OBJECTS
1293 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1295 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1299 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1300 yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1301 spec->tcl_interp->errorLine,
1302 spec->tcl_interp->result,
1303 err ? err : "[NO ERRORINFO]");
1309 static void execCode (struct lexSpec *spec, struct regxCode *code)
1311 const char *s = code->str;
1313 const char *cmd_str;
1315 r = execTok (spec, &s, &cmd_str, &cmd_len);
1322 r = execTok (spec, &s, &cmd_str, &cmd_len);
1325 p = regxStrz (cmd_str, cmd_len, ptmp);
1326 if (!strcmp (p, "begin"))
1328 r = execTok (spec, &s, &cmd_str, &cmd_len);
1331 yaz_log (YLOG_WARN, "missing keyword after 'begin'");
1334 p = regxStrz (cmd_str, cmd_len, ptmp);
1335 if (!strcmp (p, "record"))
1337 r = execTok (spec, &s, &cmd_str, &cmd_len);
1340 if (spec->d1_level <= 1)
1342 static char absynName[64];
1347 memcpy (absynName, cmd_str, cmd_len);
1348 absynName[cmd_len] = '\0';
1350 yaz_log (YLOG_LOG, "begin record %s", absynName);
1352 res = data1_mk_root (spec->dh, spec->m, absynName);
1356 spec->d1_stack[spec->d1_level++] = res;
1358 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1360 spec->d1_stack[spec->d1_level++] = res;
1362 spec->d1_stack[spec->d1_level] = NULL;
1364 r = execTok (spec, &s, &cmd_str, &cmd_len);
1366 else if (!strcmp (p, "element"))
1368 r = execTok (spec, &s, &cmd_str, &cmd_len);
1371 tagBegin (spec, cmd_str, cmd_len);
1372 r = execTok (spec, &s, &cmd_str, &cmd_len);
1374 else if (!strcmp (p, "variant"))
1377 const char *class_str = NULL;
1379 const char *type_str = NULL;
1381 const char *value_str = NULL;
1382 r = execTok (spec, &s, &cmd_str, &cmd_len);
1385 class_str = cmd_str;
1386 class_len = cmd_len;
1387 r = execTok (spec, &s, &cmd_str, &cmd_len);
1393 r = execTok (spec, &s, &cmd_str, &cmd_len);
1396 value_str = cmd_str;
1397 value_len = cmd_len;
1399 variantBegin (spec, class_str, class_len,
1400 type_str, type_len, value_str, value_len);
1403 r = execTok (spec, &s, &cmd_str, &cmd_len);
1405 else if (!strcmp (p, "context"))
1409 struct lexContext *lc = spec->context;
1410 r = execTok (spec, &s, &cmd_str, &cmd_len);
1411 p = regxStrz (cmd_str, cmd_len, ptmp);
1413 yaz_log (YLOG_LOG, "begin context %s", p);
1415 while (lc && strcmp (p, lc->name))
1418 spec->context_stack[++(spec->context_stack_top)] = lc;
1420 yaz_log (YLOG_WARN, "unknown context %s", p);
1423 r = execTok (spec, &s, &cmd_str, &cmd_len);
1427 yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p);
1430 else if (!strcmp (p, "end"))
1432 r = execTok (spec, &s, &cmd_str, &cmd_len);
1435 yaz_log (YLOG_WARN, "missing keyword after 'end'");
1438 p = regxStrz (cmd_str, cmd_len, ptmp);
1439 if (!strcmp (p, "record"))
1441 while (spec->d1_level)
1443 tagDataRelease (spec);
1446 r = execTok (spec, &s, &cmd_str, &cmd_len);
1448 yaz_log (YLOG_LOG, "end record");
1450 spec->stop_flag = 1;
1452 else if (!strcmp (p, "element"))
1455 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1457 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1462 tagEnd (spec, min_level, cmd_str, cmd_len);
1463 r = execTok (spec, &s, &cmd_str, &cmd_len);
1466 tagEnd (spec, min_level, NULL, 0);
1467 if (spec->d1_level <= 1)
1470 yaz_log (YLOG_LOG, "end element end records");
1472 spec->stop_flag = 1;
1476 else if (!strcmp (p, "context"))
1479 yaz_log (YLOG_LOG, "end context");
1481 if (spec->context_stack_top)
1482 (spec->context_stack_top)--;
1483 r = execTok (spec, &s, &cmd_str, &cmd_len);
1486 yaz_log (YLOG_WARN, "bad keyword '%s' after end", p);
1488 else if (!strcmp (p, "data"))
1492 const char *element_str = NULL;
1494 const char *attribute_str = NULL;
1496 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1498 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1500 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1502 r = execTok (spec, &s, &element_str, &element_len);
1506 else if (cmd_len==10 && !memcmp ("-attribute", cmd_str,
1509 r = execTok (spec, &s, &attribute_str, &attribute_len);
1514 yaz_log (YLOG_WARN, "bad data option: %.*s",
1519 yaz_log (YLOG_WARN, "missing data item after data");
1523 tagBegin (spec, element_str, element_len);
1526 execData (spec, cmd_str, cmd_len, textFlag,
1527 attribute_str, attribute_len);
1528 r = execTok (spec, &s, &cmd_str, &cmd_len);
1531 tagEnd (spec, 2, NULL, 0);
1533 else if (!strcmp (p, "unread"))
1536 r = execTok (spec, &s, &cmd_str, &cmd_len);
1537 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1539 r = execTok (spec, &s, &cmd_str, &cmd_len);
1542 yaz_log (YLOG_WARN, "missing number after -offset");
1545 p = regxStrz (cmd_str, cmd_len, ptmp);
1547 r = execTok (spec, &s, &cmd_str, &cmd_len);
1553 yaz_log (YLOG_WARN, "missing index after unread command");
1556 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1558 yaz_log (YLOG_WARN, "bad index after unread command");
1563 no = *cmd_str - '0';
1564 if (no >= spec->arg_no)
1565 no = spec->arg_no - 1;
1566 spec->ptr = spec->arg_start[no] + offset;
1568 r = execTok (spec, &s, &cmd_str, &cmd_len);
1570 else if (!strcmp (p, "context"))
1574 struct lexContext *lc = spec->context;
1575 r = execTok (spec, &s, &cmd_str, &cmd_len);
1576 p = regxStrz (cmd_str, cmd_len, ptmp);
1578 while (lc && strcmp (p, lc->name))
1581 spec->context_stack[spec->context_stack_top] = lc;
1583 yaz_log (YLOG_WARN, "unknown context %s", p);
1586 r = execTok (spec, &s, &cmd_str, &cmd_len);
1590 yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1591 r = execTok (spec, &s, &cmd_str, &cmd_len);
1596 yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1598 r = execTok (spec, &s, &cmd_str, &cmd_len);
1605 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1606 int start_ptr, int *pptr)
1615 arg_start[0] = start_ptr;
1617 spec->arg_start = arg_start;
1618 spec->arg_end = arg_end;
1625 if (ap->u.pattern.body)
1627 arg_start[arg_no] = *pptr;
1628 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1630 arg_end[arg_no] = F_WIN_EOF;
1632 arg_start[arg_no] = F_WIN_EOF;
1633 arg_end[arg_no] = F_WIN_EOF;
1634 yaz_log(YLOG_DEBUG, "Pattern match rest of record");
1639 arg_end[arg_no] = sptr;
1641 arg_start[arg_no] = sptr;
1642 arg_end[arg_no] = *pptr;
1647 arg_start[arg_no] = *pptr;
1648 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1650 if (sptr != arg_start[arg_no])
1652 arg_end[arg_no] = *pptr;
1657 spec->arg_no = arg_no;
1660 if (spec->tcl_interp)
1661 execTcl(spec, ap->u.code);
1663 execCode (spec, ap->u.code);
1665 execCode (spec, ap->u.code);
1668 if (spec->stop_flag)
1672 arg_start[arg_no] = *pptr;
1673 arg_end[arg_no] = F_WIN_EOF;
1682 static int execRule (struct lexSpec *spec, struct lexContext *context,
1683 int ruleNo, int start_ptr, int *pptr)
1686 yaz_log (YLOG_LOG, "exec rule %d", ruleNo);
1688 return execAction (spec, context->fastRule[ruleNo]->actionList,
1692 int lexNode (struct lexSpec *spec, int *ptr)
1694 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1695 struct DFA_state *state = context->dfa->states[0];
1698 unsigned char c_prev = '\n';
1700 int last_rule = 0; /* rule number of current match */
1701 int last_ptr = *ptr; /* last char of match */
1702 int start_ptr = *ptr; /* first char of match */
1703 int skip_ptr = *ptr; /* first char of run */
1708 c = f_win_advance (spec, ptr);
1709 if (*ptr == F_WIN_EOF)
1711 /* end of file met */
1714 /* there was a match */
1715 if (skip_ptr < start_ptr)
1717 /* deal with chars that didn't match */
1720 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1721 execDataP (spec, buf, size, 0);
1723 /* restore pointer */
1726 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1728 /* restore skip pointer */
1732 else if (skip_ptr < *ptr)
1734 /* deal with chars that didn't match */
1737 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1738 execDataP (spec, buf, size, 0);
1740 state = context->dfa->states[0];
1741 if (*ptr == F_WIN_EOF)
1748 { /* no transition for character c ... */
1751 if (skip_ptr < start_ptr)
1753 /* deal with chars that didn't match */
1756 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1757 execDataP (spec, buf, size, 0);
1759 /* restore pointer */
1761 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1763 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1765 off_t end_offset = *ptr;
1767 yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr);
1769 (*spec->f_win_ef)(spec->stream, &end_offset);
1773 context = spec->context_stack[spec->context_stack_top];
1776 last_ptr = start_ptr = *ptr;
1780 c_prev = f_win_advance (spec, &start_ptr);
1785 c_prev = f_win_advance (spec, &start_ptr);
1788 state = context->dfa->states[0];
1791 else if (c >= t->ch[0] && c <= t->ch[1])
1792 { /* transition ... */
1793 state = context->dfa->states[t->to];
1798 last_rule = state->rule_no;
1801 else if (state->rule_nno)
1803 last_rule = state->rule_nno;
1816 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1817 const char *context_name)
1819 struct lexContext *lt = spec->context;
1823 spec->stop_flag = 0;
1825 spec->context_stack_top = 0;
1828 if (!strcmp (lt->name, context_name))
1834 yaz_log (YLOG_WARN, "cannot find context %s", context_name);
1837 spec->context_stack[spec->context_stack_top] = lt;
1838 spec->d1_stack[spec->d1_level] = NULL;
1843 execAction (spec, lt->initActionList, ptr, &ptr);
1846 execAction (spec, lt->beginActionList, ptr, &ptr);
1848 ret = lexNode (spec, &ptr);
1849 while (spec->d1_level)
1851 tagDataRelease (spec);
1856 execAction (spec, lt->endActionList, ptr, &ptr);
1857 return spec->d1_stack[0];
1860 void grs_destroy(void *clientData)
1862 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1865 lexSpecDestroy(&specs->spec);
1870 void *grs_init(Res res, RecType recType)
1872 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1874 strcpy(specs->type, "");
1879 ZEBRA_RES grs_config(void *clientData, Res res, const char *args)
1881 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1882 if (strlen(args) < sizeof(specs->type))
1883 strcpy(specs->type, args);
1887 data1_node *grs_read_regx (struct grs_read_info *p)
1890 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1891 struct lexSpec **curLexSpec = &specs->spec;
1895 yaz_log (YLOG_LOG, "grs_read_regx");
1897 if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1900 lexSpecDestroy (curLexSpec);
1901 *curLexSpec = lexSpecCreate (specs->type, p->dh);
1902 res = readFileSpec (*curLexSpec);
1905 lexSpecDestroy (curLexSpec);
1909 (*curLexSpec)->dh = p->dh;
1910 start_offset = p->stream->tellf(p->stream);
1911 if (start_offset == 0)
1913 (*curLexSpec)->f_win_start = 0;
1914 (*curLexSpec)->f_win_end = 0;
1915 (*curLexSpec)->f_win_rf = p->stream->readf;
1916 (*curLexSpec)->f_win_sf = p->stream->seekf;
1917 (*curLexSpec)->stream = p->stream;
1918 (*curLexSpec)->f_win_ef = p->stream->endf;
1919 (*curLexSpec)->f_win_size = 500000;
1921 (*curLexSpec)->m = p->mem;
1922 return lexRoot (*curLexSpec, start_offset, "main");
1925 static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
1927 return zebra_grs_extract(clientData, ctrl, grs_read_regx);
1930 static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl)
1932 return zebra_grs_retrieve(clientData, ctrl, grs_read_regx);
1935 static struct recType regx_type = {
1947 data1_node *grs_read_tcl (struct grs_read_info *p)
1950 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1951 struct lexSpec **curLexSpec = &specs->spec;
1955 yaz_log (YLOG_LOG, "grs_read_tcl");
1957 if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1959 Tcl_Interp *tcl_interp;
1961 lexSpecDestroy (curLexSpec);
1962 *curLexSpec = lexSpecCreate (specs->type, p->dh);
1963 Tcl_FindExecutable("");
1964 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1965 Tcl_Init(tcl_interp);
1966 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1967 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1968 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1969 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1971 res = readFileSpec (*curLexSpec);
1974 lexSpecDestroy (curLexSpec);
1978 (*curLexSpec)->dh = p->dh;
1979 start_offset = p->stream->tellf(p->stream);
1980 if (start_offset == 0)
1982 (*curLexSpec)->f_win_start = 0;
1983 (*curLexSpec)->f_win_end = 0;
1984 (*curLexSpec)->f_win_rf = p->stream->readf;
1985 (*curLexSpec)->f_win_sf = p->stream->seekf;
1986 (*curLexSpec)->stream = p->stream;
1987 (*curLexSpec)->f_win_ef = p->stream->endf;
1988 (*curLexSpec)->f_win_size = 500000;
1990 (*curLexSpec)->m = p->mem;
1991 return lexRoot (*curLexSpec, start_offset, "main");
1994 static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl)
1996 return zebra_grs_extract(clientData, ctrl, grs_read_tcl);
1999 static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl)
2001 return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl);
2004 static struct recType tcl_type = {
2017 #ifdef IDZEBRA_STATIC_GRS_REGX
2018 idzebra_filter_grs_regx
2033 * indent-tabs-mode: nil
2035 * vim: shiftwidth=4 tabstop=8 expandtab