/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
- * $Id: nfa.h,v 1.5 2006-05-05 09:14:42 heikki Exp $
+ * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $
*/
/**
yaz_nfa *n, int backref_no );
+/** \brief Create a charcater range converter
+ * \param n the nfa
+ * \param backref_no The backreference to reproduce
+ * \param from_char the first character of the original range
+ * \param to_char the first character of the target range
+ *
+ * This converter takes a backreference, and shifts the characters
+ * by a constant value. For example, translating a-z to A-Z.
+ * Note that backref 0 is always the last character that matched a
+ * range, even if no backrefs were defined in teh nfa. This makes
+ * it pretty useful with this converter.
+ *
+ */
+yaz_nfa_converter *yaz_nfa_create_range_converter (
+ yaz_nfa *n, int backref_no,
+ yaz_nfa_char from_char,
+ yaz_nfa_char to_char);
+
/** \brief Connects converters in a chain.
* \param n the nfa (mostly for nmem access)
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.6 2006-05-05 09:14:42 heikki Exp $
+ * $Id: nfa.c,v 1.7 2006-05-05 14:02:27 heikki Exp $
*/
/**
typedef enum {
conv_none,
conv_string,
- conv_backref
+ conv_backref,
+ conv_range
} yaz_nfa_converter_type;
struct yaz_nfa_converter {
struct matcher {
yaz_nfa *n;
yaz_nfa_char *longest;
- int bestnode;
+ int bestnode;
void *result;
int errorcode;
int empties; /* count how many consecutive empty transitions */
static void match_state(
yaz_nfa_state *s,
- yaz_nfa_char *inchar,
- size_t incharsleft,
- struct matcher *m ) {
+ yaz_nfa_char *prevmatch,
+ yaz_nfa_char *inchar,
+ size_t incharsleft,
+ struct matcher *m ) {
yaz_nfa_transition *t = s->lasttrans;
if (s->backref_start)
m->n->curr_backrefs[s->backref_start].start = inchar;
if (s->backref_end)
- m->n->curr_backrefs[s->backref_end].end = inchar;
+ m->n->curr_backrefs[s->backref_end].end = prevmatch;
if (t) {
if (incharsleft) {
do {
m->n->curr_backrefs[0].start = inchar;
m->n->curr_backrefs[0].end = inchar;
}
- match_state(t->to_state, inchar+1, incharsleft-1, m);
- /* yes, descent to all matching nodes, even if overrun, */
- /* since we can find a better one later */
- } else if (( t->range_start==EMPTY_START) && (t->range_end==EMPTY_END)) {
+ match_state(t->to_state, inchar,
+ inchar+1, incharsleft-1, m);
+ } else if (( t->range_start==EMPTY_START) &&
+ (t->range_end==EMPTY_END)) {
if ( m->empties++ > LOOP_LIMIT )
m->errorcode= YAZ_NFA_LOOP;
else
- match_state(t->to_state, inchar, incharsleft, m);
+ match_state(t->to_state, prevmatch,
+ inchar, incharsleft, m);
}
} while (t != s->lasttrans );
} else {
n->best_backrefs[i].end = 0;
}
- match_state(n->firststate, *inbuff, *incharsleft, &m);
+ match_state(n->firststate, *inbuff, *inbuff, *incharsleft, &m);
if (m.result) {
*incharsleft -= (m.longest-*inbuff);
*result = m.result;
return c;
}
+yaz_nfa_converter *yaz_nfa_create_range_converter (
+ yaz_nfa *n, int backref_no,
+ yaz_nfa_char from_char,
+ yaz_nfa_char to_char){
+ yaz_nfa_converter *c;
+ c=create_null_converter(n);
+ c->type=conv_range;
+ c->backref_no=backref_no;
+ c->char_diff=to_char - from_char;
+ return c;
+
+}
+
void yaz_nfa_append_converter (
yaz_nfa *n,
return 0;
if (i==1) /* no match in dfa */
return 1; /* should not happen */
- while (cp2>cp1) {
+ while (cp2>=cp1) {
if ((*outcharsleft)-- <= 0)
return 2;
**outbuff=*cp1++;
return 0;
}
+static int range_convert (
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ yaz_nfa_char *cp1,*cp2;
+ int i;
+ i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ printf ("range_convert: i=%d d=%d, cp1=%p cp2=%p \n",i,c->char_diff,cp1,cp2);
+ if (i == 2) /* no backref, produce no output, not ok */
+ return 1; /* should not happen */
+ if (i == 1) /* no match in dfa */
+ return 1; /* should not happen */
+ while (cp2 >= cp1) {
+ if ((*outcharsleft)-- <= 0)
+ return 2;
+ printf(" range_convert: %d '%c' -> ",*cp1,*cp1);
+ **outbuff=(*cp1++) + c->char_diff ;
+ printf("%d '%c'\n",**outbuff, **outbuff);
+ (*outbuff)++;
+ }
+ return 0;
+}
+
int yaz_nfa_run_converters(
yaz_nfa *n,
yaz_nfa_char **outbuff,
size_t *outcharsleft){
int rc=0;
- // yaz_nfa_char *bufstart=*outbuff;
while (c && !rc) {
switch(c->type) {
case conv_string:
case conv_backref:
rc=backref_convert(n,c,outbuff,outcharsleft);
break;
+ case conv_range:
+ rc=range_convert(n,c,outbuff,outcharsleft);
+ break;
default:
rc=3; /* internal error */
}
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfatest1.c,v 1.4 2006-05-05 09:14:42 heikki Exp $
+ * $Id: nfatest1.c,v 1.5 2006-05-05 14:02:27 heikki Exp $
*
*/
#include <yaz/nmem.h>
#include <yaz/test.h>
-#define VERBOSE 1
+#define VERBOSE 0
char *printfunc(void *result) {
static char buf[200];
test_match(n, tst4, 9, YAZ_NFA_LOOP, "loop");
test_match(n, tst5, 9, YAZ_NFA_SUCCESS, "y k+ d");
- cp = tst6;
+ cp = tst6; /* xzkab */
sz = 8;
i = yaz_nfa_match(n, &cp, &sz, &p);
YAZ_CHECK_EQ(i, YAZ_NFA_SUCCESS);
i = yaz_nfa_get_backref(n, 2, &cp1, &cp2 );
YAZ_CHECK_EQ(i, 0);
+ YAZ_CHECK_EQ(cp2-cp1+1,2);
+ YAZ_CHECK_EQ(*cp1, 'z' );
+ YAZ_CHECK_EQ(*cp2, 'k' );
#if VERBOSE
- printf("backref from %p to %p is %d long. sz is now %d\n",
- cp1,cp2, cp2-cp1,sz );
+ printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
+ cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz );
#endif
yaz_nfa_destroy(n);
void converter_test() {
yaz_nfa* n= yaz_nfa_init();
- yaz_nfa_converter *c1,*c2;
+ yaz_nfa_converter *c1, *c2, *c3;
yaz_nfa_char str1[]={'a','b','c'};
yaz_nfa_char seq1[]={'A','B','C',0};
yaz_nfa_char seq2[]={'k','m','n','m','x','P','Q','X',0};
yaz_nfa_char outbuf[1024];
- yaz_nfa_char *outp,*cp, *cp1, *cp2;
+ yaz_nfa_char *outp, *cp, *cp1, *cp2;
yaz_nfa_state *s, *s2;
void *vp;
int i;
yaz_nfa_set_result(n,s,c1);
yaz_nfa_set_backref_point(n,s,1,0);
- /* [k-o][m-n]*x -> m-n sequence */
+ /* ([k-o][m-n]*)x -> \1 */
s=yaz_nfa_add_state(n);
yaz_nfa_add_empty_transition(n,0,s);
yaz_nfa_set_backref_point(n,s,2,1);
c2=vp;
YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS);
i=yaz_nfa_get_backref(n, 2, &cp1, &cp2 );
+#if VERBOSE
+ printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
+ cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz );
+#endif
YAZ_CHECK_EQ(i,0);
- YAZ_CHECK_EQ((int)c1,(int)c2);
+ YAZ_CHECK_EQ((int)c1,(int)c2); /* got our pointer back from nfa */
for(i=0;i<1024;i++)
outbuf[i]=10000+i;
outp=outbuf;
YAZ_CHECK_EQ(outbuf[5],10000+5);
YAZ_CHECK_EQ(sz,11-5);
+ c3=yaz_nfa_create_range_converter(n,2, 'a', 'A' );
+ for(i=0;i<1024;i++)
+ outbuf[i]=10000+i;
+ outp=outbuf;
+ sz=11;
+ i=yaz_nfa_run_converters(n, c3, &outp, &sz);
+ YAZ_CHECK_EQ(i,0);
+ YAZ_CHECK_EQ(outbuf[0],'K');
+ YAZ_CHECK_EQ(outbuf[1],'M');
+ YAZ_CHECK_EQ(outbuf[2],'N');
+ YAZ_CHECK_EQ(outbuf[3],'M');
+ YAZ_CHECK_EQ(outbuf[4],'X');
+ YAZ_CHECK_EQ(outbuf[5],10000+5);
+ YAZ_CHECK_EQ(sz,11-5);
+
yaz_nfa_destroy(n);
}