Better defined return codes, doxygen comments, etc.
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
- * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $
+ * $Id: nfa.h,v 1.7 2006-05-10 13:58:46 heikki Exp $
*/
/**
YAZ_BEGIN_CDECL
-/** \brief Internal character type */
+/** \name return codes and data types*/
+/* \{ */
+/** \brief Success */
+#define YAZ_NFA_SUCCESS 0
+
+/** \brief no match found */
+#define YAZ_NFA_NOMATCH 1
+
+/** \brief Need more input */
+#define YAZ_NFA_OVERRUN 2
+
+/** \brief The NFA is looping */
+#define YAZ_NFA_LOOP 3
+
+/** \brief no room in output buffer */
+#define YAZ_NFA_NOSPACE 4
+
+/** \brief tryig to set a result when one already exists*/
+#define YAZ_NFA_ALREADY 5
+
+/** \brief Attempting to set an end to a backref that has not been started */
+#define YAZ_NFA_NOSTART 6
+
+/** \brief Asking for a non-existing backref */
+#define YAZ_NFA_NOSUCHBACKREF 7
+
+/** \brief Internal error, should never happen */
+#define YAZ_NFA_INTERNAL 8
+
+
+/** \brief Internal character type. 32-bit unicode! */
typedef unsigned int yaz_nfa_char;
/** \brief Transition from one state to another */
typedef struct yaz_nfa_transition yaz_nfa_transition;
-
-/** brief Simple character range converter */
+/** \brief A converter produces some output to a buffer */
typedef struct yaz_nfa_converter yaz_nfa_converter;
+/* \} */
+/** \name Low-level interface to building the NFA */
+/* \{ */
/** \brief Initialize the NFA without any states in it
*
/** \brief Sets the result pointer to a state
*
- * Call with null to clear the pointer.
+ * \param n the NFA itself
+ * \param s the state to which the result will be added
+ * \param result the result pointer
+ *
+ * Sets the result pointer of a state. If already set, returns an error. Call
+ * with a NULL pointer to clear the result, before setting a new one.
*
- * \retval 0 ok
- * \retval 1 The state already has a result!
+ * \retval YAZ_NFA_SUCCESS ok
+ * \retval YAZ_NFA_ALREADY The state already has a result!
*/
int yaz_nfa_set_result(
- /** The NFA itsef */
yaz_nfa *n,
- /** The state to which the result is added */
yaz_nfa_state *s,
- /** The result. The NFA does not care what it is, just stores it */
void *result
);
*
* \param n the nfa
* \param s the state to add to
- * \param backref_number is the number of the back reference. 0 for clearing
+ * \param backref_number is the number of the back reference.
* \param is_start is 1 for start of the backref, 0 for end
- * \retval 0 for OK
- * \retval 1 if the backref is already set
- * \retval 2 for ending a backref that has not been started
+ *
+ * \retval YAZ_NFA_SUCCESS for OK
+ * \retval YAZ_NFA_ALREADY if the backref is already set
+ * \retval YAZ_NFA_NOSTART for ending a backref that has not been started
*
*/
/** \brief Add a sequence of transitions and states.
*
+ * \param n the nfa
+ * \param s the state to add this to. If null, adds to the initial state
+ * \param seq is a sequence of yaz_fna_chars.
+ * \param seq_len is the length of the sequence
+ * \Return the final state
+ *
* Starting from state s (or from the initial state, if s is
* null), finds as much of seq as possible and inserts the rest.
- * \Return the final state
*/
yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n,
yaz_nfa_state *s,
- yaz_nfa_char *seq );
+ yaz_nfa_char *seq,
+ size_t seq_len );
+/** \} */
+
+/** \name Low-level interface for mathcing the NFA. */
+/*
+ * These do the actual matching. They know nothing of
+ * the type of the result pointers
+ */
+/** \{ */
/** \brief Find the longest possible match.
*
* In case of errors, returns the best match so far,
* which the caller is free to ignore.
*
- * \retval 0 success
- * \retval 1 no match found
- * \retval 2 overrun'of input buffer
- * \retval 3 looping too far
+ * \retval YAZ_NFA_SUCCESS success
+ * \retval YAZ_NFA_NOMATCH no match found
+ * \retval YAZ_NFA_OVERRUN overrun of input buffer
+ * \retval YAZ_NFA_LOOP looping too far
*
*/
int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft,
void **result );
-/** yaz_nfa_match return codes */
-#define YAZ_NFA_SUCCESS 0
-#define YAZ_NFA_NOMATCH 1
-#define YAZ_NFA_OVERRUN 2
-#define YAZ_NFA_LOOP 3
-
/** \brief Get a back reference after a successfull match.
*
* \param n the nfa
* not the end of a backref. It is up to the caller to decide how
* to handle such a situation.
*
- * \retval 0 OK
- * \retval 1 no match
- * \retval 2 no such backref
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_NOMATCH The NFA hasn't matched anything, no backref
+ * \retval YAZ_NFA_NOSUCHBACKREF no such backref
*/
int yaz_nfa_get_backref( yaz_nfa *n,
yaz_nfa_char **start,
yaz_nfa_char **end );
+/* \} */
+
+/** \name Low-level interface to the converters */
+/* These produce some output text into a buffer. There are a few
+ * kinds of converters, each producing different type of output.
+ */
+/* \{ */
+
/** \brief Create a string converter.
* \param n the nfa
* \param string the string to output
* Runs the converters in the chain, placing output into outbuff
* (and incrementing the pointer).
*
- * \retval 0 OK
- * \retval 1 no match to get backrefs from
- * \retval 2 no room in outbuf
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_NOMATCH no match to get backrefs from
+ * \retval YAZ_NFA_NOSPACE no room in outbuf
+ * \retval YAZ_NFA_INTERNAL Should never happen
*
*/
int yaz_nfa_run_converters(
yaz_nfa_char **outbuff,
size_t *outcharsleft);
+/** \} */
+
+/** \name High-level interface to the NFA */
+/* This interface combines the NFA and converters, for ease of
+ * access. There are a few calls to build a complete system, and a call
+ * to do the actual conversion.
+ */
+/* \{ */
+
+/** \brief Add a rule that converts one string to another ('IX' -> '9')
+ *
+ * \param n The nfa itself
+ * \param from_string the string to match
+ * \param from_length length of the from_string
+ * \param to_string the string to write in the output
+ * \param to_length length of the to_string
+ *
+ * Adds a matching rule and a string converter to the NFA.
+ * Can be used for converting strings into nothing, for example,
+ * to remove markup.
+ *
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_ALREADY Conflict with some other rule
+ *
+ */
+int yaz_nfa_add_string_rule( yaz_nfa *n,
+ yaz_nfa_char *from_string,
+ size_t from_length,
+ yaz_nfa_char *to_string,
+ size_t to_length);
+
+/** brief Just like yaz_nfa_add_string_rule, but takes the strings in ascii
+ *
+ * \param n The nfa itself
+ * \param from_string the string to match
+ * \param to_string the string to write in the output
+ *
+ * Like yaz_nfa_add_string_rule, this adds a rule to translate a string
+ * into another. The only difference is that this one takes the strings as
+ * normal char *, which means that no high-valued unicodes can be handled,
+ * and that this one uses null-terminated strings. In short, this is a
+ * simplified version mostly intended for tests and other small uses.
+ *
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_ALREADY Conflict with some other rule
+ */
+int yaz_nfa_add_ascii_string_rule( yaz_nfa *n,
+ char *from_string,
+ char *to_string);
+
+
+/** \brief Add a rule that converts a character range
+ *
+ * \param n The nfa itself
+ * \param range_start Where the matching range starts
+ * \param range_end Where the matching range ends
+ * \param output_range_start Where the resulting range starts
+ *
+ *
+ * Adds a character range rule to the NFA. The range to be converted
+ * is defined by the range_start and range_end parameters. The output
+ * range starts at output_range_start, and is automatically as long
+ * as the input range.
+ *
+ * Useful for alphabet normalizing [a-z] -> [A-Z]
+ *
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_ALREADY Conflict with some other rule
+ */
+int yaz_nfa_add_char_range_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char output_range_start);
+
+/** \brief Add a rule that converts a character range to a string
+ *
+ * \param n The nfa itself
+ * \param range_start Where the matching range starts
+ * \param range_end Where the matching range ends
+ * \param to_string the string to write in the output
+ * \param to_length length of the to_string
+ *
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_ALREADY Conflict with some other rule
+ *
+ * Adds a character range match rule, and a string converter.
+ *
+ * Useful in converting a range of special characters into (short?)
+ * strings of whitespace, or even to nothing at all.
+ */
+int yaz_nfa_add_char_string_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char* to_string,
+ size_t to_length);
+
+/** \brief Converts one 'slice' that is, the best matching rule.
+ *
+ * \param n the nfa itself
+ * \param inbuff buffer of input data. Will be incremented when match
+ * \param incharsleft max number of inchars to use from inbuff. decrements.
+ * \param outbuff buffer for output data. Will be incremented when match
+ * \param outcharsleft max number of chars to write to outbuff.
+ *
+ * \retval YAZ_NFA_SUCCESS OK
+ * \retval YAZ_NFA_OVERRUN No more input data, some pattern could match
+ * \retval YAZ_NFA_NOSPACE No room in the putput buffer
+ * \retval YAZ_NFA_NOSUCHBACKREF NFA refers to a non-existing backref
+ *
+ * Finds the best match at the beginning of inbuf, and fires its converter(s)
+ * to produce output in outbuff. Increments both inbuf and outbuf pointers and
+ * decrements the *charsleft values, so all is ready for calling again, until
+ * the buffer is exhausted. That loop is left to the caller, so he can load
+ * more data in the buffer in good time.
+ *
+ * If no match is found, converts one character into itself. If the matcher
+ * returns any sort of error, leaves the pointers where they were.
+ */
+int yaz_nfa_convert_slice (yaz_nfa *n,
+ yaz_nfa_char **inbuff,
+ size_t *incharsleft,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft);
+
+
+/* \} */
+
+/** \name Debug routines */
+/* These provide a method for traversing all the states defined
+ * in the NFA, for example to release memory allocated in the results,
+ * and a simple debug routine to dump the NFA */
+/* \{ */
+
/** \brief Get the first state of the NFA.
*
*/
void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) );
-
+/* \} */
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.8 2006-05-05 14:04:03 heikki Exp $
+ * $Id: nfa.c,v 1.9 2006-05-10 13:58:46 heikki Exp $
*/
/**
* strings with it.
*/
+
#include <stdio.h>
+#include <string.h>
#include <yaz/nfa.h>
#include <yaz/nmem.h>
NMEM my_nmem = nmem_create();
yaz_nfa *n = nmem_malloc(my_nmem, sizeof(yaz_nfa));
n->nmem = my_nmem;
- n->nstates = 0;
- n->laststate = 0;
- n->firststate = 0;
- n->nbackrefs = 0;
+ n->nbackrefs = 1; /* we always have #0, last range match */
n->curr_backrefs = 0;
n->best_backrefs = 0;
n->lastmatch = YAZ_NFA_NOMATCH;
+ n->nstates = 0;
+ n->laststate = 0;
+ n->firststate = n->laststate ;
return n;
}
int yaz_nfa_set_result(yaz_nfa *n, yaz_nfa_state *s, void *result) {
if ((s->result)&&result)
- return 1;
+ return YAZ_NFA_ALREADY;
s->result = result;
return 0;
}
int is_start ){
if (is_start) {
if (s->backref_start)
- return 1;
+ return YAZ_NFA_ALREADY;
s->backref_start = backref_number;
if (n->nbackrefs<=backref_number) {
n->nbackrefs = backref_number+1;
}
} else {
if (s->backref_end)
- return 1;
- if (n->nbackrefs<backref_number)
- return 2; /* can't end a backref that has not been started */
+ return YAZ_NFA_ALREADY;
+ if (n->nbackrefs<=backref_number)
+ return YAZ_NFA_NOSTART;
s->backref_end = backref_number;
}
return 0; /* ok */
yaz_nfa_state *s,
yaz_nfa_char range_start,
yaz_nfa_char range_end) {
- yaz_nfa_state *nextstate;
+ yaz_nfa_state *nextstate=0;
if (!s) /* default to top-level of the nfa */
s = n->firststate;
- nextstate = find_single_trans(s, range_start, range_end);
+ if (s)
+ nextstate = find_single_trans(s, range_start, range_end);
+ else
+ s = yaz_nfa_add_state(n); /* create initial state */
if (!nextstate) {
nextstate = yaz_nfa_add_state(n);
yaz_nfa_add_transition(n, s, nextstate, range_start, range_end);
yaz_nfa_state *yaz_nfa_add_sequence(yaz_nfa *n,
yaz_nfa_state *s,
- yaz_nfa_char *seq ){
- yaz_nfa_state *nextstate;
+ yaz_nfa_char *seq,
+ size_t seq_len){
+ yaz_nfa_state *nextstate=0;
if (!s) /* default to top-level of the nfa */
s = n->firststate;
- nextstate = find_single_trans(s, *seq, *seq);
+ if (s)
+ nextstate = find_single_trans(s, *seq, *seq);
if (nextstate) {
seq++;
- if (!*seq) /* whole sequence matched */
+ seq_len--;
+ if (!seq_len) /* whole sequence matched */
return nextstate;
else
- return yaz_nfa_add_sequence(n, nextstate, seq);
+ return yaz_nfa_add_sequence(n, nextstate, seq,seq_len);
} else { /* no next state, build the rest */
- while (*seq) {
+ while (seq_len) {
s = yaz_nfa_add_range(n, s, *seq, *seq);
seq++;
+ seq_len--;
}
return s;
}
if (incharsleft) {
do {
t = t->next;
- if ( (( t->range_start <= *inchar ) && ( t->range_end >= *inchar )) ){
+ if ( (( t->range_start <= *inchar ) &&
+ ( t->range_end >= *inchar )) ){
m->empties = 0;
if (t->range_start!=t->range_end){
/* backref 0 is special: the last range operation */
m.longest=*inbuff;
m.bestnode = n->nstates;
m.result = 0;
- m.errorcode = 0;
+ m.errorcode = YAZ_NFA_SUCCESS;
m.empties = 0;
sz = sizeof( struct yaz_nfa_backref_info) * n->nbackrefs;
if (!n->curr_backrefs) {
}
match_state(n->firststate, *inbuff, *inbuff, *incharsleft, &m);
- if (m.result) {
- *incharsleft -= (m.longest-*inbuff);
- *result = m.result;
- *inbuff = m.longest;
- if (m.errorcode)
- n->lastmatch = m.errorcode;
- else
- n->lastmatch= YAZ_NFA_SUCCESS;
- return n->lastmatch;
+ if (m.errorcode==YAZ_NFA_SUCCESS) {
+ if (!m.result)
+ m.errorcode=YAZ_NFA_NOMATCH;
+ else {
+ *incharsleft -= (m.longest-*inbuff);
+ *result = m.result;
+ *inbuff = m.longest;
+ }
}
- n->lastmatch = YAZ_NFA_NOMATCH;
- return n->lastmatch;
+ n->lastmatch=m.errorcode;
+ return m.errorcode;
}
int backref_no,
yaz_nfa_char **start,
yaz_nfa_char **end) {
- if (backref_no>=n->nbackrefs)
- return 2;
- if (backref_no<0)
- return 2;
- if (n->lastmatch== YAZ_NFA_NOMATCH)
- return 1; /* accept other errors, they return partial matches*/
+ if (backref_no >= n->nbackrefs)
+ return YAZ_NFA_NOSUCHBACKREF;
+ if (backref_no < 0)
+ return YAZ_NFA_NOSUCHBACKREF;
+ if (n->lastmatch != YAZ_NFA_SUCCESS)
+ return YAZ_NFA_NOMATCH;
*start = n->best_backrefs[backref_no].start;
*end = n->best_backrefs[backref_no].end;
yaz_nfa_char *p=c->string;
while (sz--) {
if ((*outcharsleft)-- <= 0)
- return 2;
+ return YAZ_NFA_NOSPACE;
**outbuff=*p++;
(*outbuff)++;
}
- return 0;
+ return YAZ_NFA_SUCCESS;
}
static int backref_convert (
yaz_nfa *n,
size_t *outcharsleft){
yaz_nfa_char *cp1,*cp2;
int i;
- i=yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
- if (i==2) /* no backref, produce no output, that's ok */
- return 0;
- if (i==1) /* no match in dfa */
+ i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ if ( i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output */
+ return YAZ_NFA_SUCCESS;
+ if ( i == YAZ_NFA_NOMATCH ) /* no match in dfa */
return 1; /* should not happen */
- while (cp2>=cp1) {
+ while (cp2 >= cp1) {
if ((*outcharsleft)-- <= 0)
- return 2;
+ return YAZ_NFA_NOSPACE;
**outbuff=*cp1++;
(*outbuff)++;
}
- return 0;
+ return YAZ_NFA_SUCCESS;
}
static int range_convert (
yaz_nfa_converter *c,
yaz_nfa_char **outbuff,
size_t *outcharsleft){
- yaz_nfa_char *cp1,*cp2;
+ yaz_nfa_char *cp1=0, *cp2=0;
int i;
i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
- if (i == 2) /* no backref, produce no output, not ok */
- return 1; /* should not happen */
- if (i == 1) /* no match in dfa */
- return 1; /* should not happen */
+ if (i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output, not ok */
+ return YAZ_NFA_NOSUCHBACKREF; /* should not happen */
+ if (i == YAZ_NFA_NOMATCH) /* no match in dfa */
+ return YAZ_NFA_NOMATCH; /* should not happen */
while (cp2 >= cp1) {
if ((*outcharsleft)-- <= 0)
- return 2;
+ return YAZ_NFA_NOSPACE;
**outbuff=(*cp1++) + c->char_diff ;
(*outbuff)++;
}
- return 0;
+ return YAZ_NFA_SUCCESS;
}
rc=range_convert(n,c,outbuff,outcharsleft);
break;
default:
- rc=3; /* internal error */
+ rc=YAZ_NFA_INTERNAL; /* should never happen */
}
c=c->next;
}
return rc;
}
+/* * * * * * * *
+ * High-level interface
+ * These routines build the nfa and add converters, all
+ * in one go.
+ * * * * * * * */
+
+int yaz_nfa_add_string_rule( yaz_nfa *n,
+ yaz_nfa_char *from_string,
+ size_t from_length,
+ yaz_nfa_char *to_string,
+ size_t to_length ) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_sequence(n, 0, from_string,from_length);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_string_converter(n,to_string,to_length);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+int yaz_nfa_add_ascii_string_rule( yaz_nfa *n,
+ char *from_string,
+ char *to_string) {
+ size_t from_len = strlen(from_string);
+ size_t to_len = strlen(to_string);
+ yaz_nfa_char *from_buf=
+ nmem_malloc(n->nmem, from_len*sizeof(yaz_nfa_char));
+ yaz_nfa_char *to_buf=
+ nmem_malloc(n->nmem, to_len*sizeof(yaz_nfa_char));
+ int i;
+ for (i=0;i<from_len;i++)
+ from_buf[i]=from_string[i];
+ for (i=0;i<to_len;i++)
+ to_buf[i]=to_string[i];
+ return yaz_nfa_add_string_rule(n,from_buf, from_len,
+ to_buf, to_len);
+}
+
+int yaz_nfa_add_char_range_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char output_range_start) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_range(n, 0, range_start, range_end);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_range_converter(n,0,range_start, output_range_start);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+int yaz_nfa_add_char_string_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char* to_string,
+ size_t to_length) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_range(n, 0, range_start, range_end);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_string_converter(n,to_string,to_length);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+
+int yaz_nfa_convert_slice (yaz_nfa *n,
+ yaz_nfa_char **inbuff,
+ size_t *incharsleft,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft) {
+ void *resptr;
+ yaz_nfa_converter *conv;
+ int rc;
+
+ if (*outcharsleft==0)
+ rc=YAZ_NFA_NOSPACE; /* no room in outbuff */
+ else if (*incharsleft==0)
+ rc = YAZ_NFA_SUCCESS; /* all done */
+ else {
+ rc=yaz_nfa_match(n, inbuff, incharsleft, &resptr);
+ if (rc==YAZ_NFA_SUCCESS) {
+ conv= (yaz_nfa_converter *)resptr;
+ rc=yaz_nfa_run_converters(n,conv,outbuff,outcharsleft);
+ } else if (rc==YAZ_NFA_NOMATCH) {
+ **outbuff = **inbuff;
+ (*outbuff)++;
+ (*inbuff)++;
+ (*outcharsleft)--;
+ (*incharsleft)--;
+ rc=YAZ_NFA_SUCCESS;
+ }
+ /* else just return the error code */
+ }
+ return rc;
+}
+
/* * * * * * * * *
* Debug routines
* * * * * * */
resultstring = s->result;
}
fprintf(F, " state [%d] %s %s",
- s->num, s->result?"(FINAL)":"", resultstring );
+ s->num, s->result?"(final)":"", resultstring );
if (s->backref_start) {
fprintf(F, " start-%d", s->backref_start);
}
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfatest1.c,v 1.5 2006-05-05 14:02:27 heikki Exp $
+ * $Id: nfatest1.c,v 1.6 2006-05-10 13:58:47 heikki Exp $
*
*/
#include <yaz/nmem.h>
#include <yaz/test.h>
-#define VERBOSE 0
char *printfunc(void *result) {
static char buf[200];
int i, bi;
size_t buflen2 = buflen;
i = yaz_nfa_match(n,&c, &buflen2,&resptr);
-#if VERBOSE
- printf("\n'%s' returned %d. Moved c by %d, and resulted in '%s'\n",
+ if (yaz_test_get_verbosity()>3)
+ printf("\n'%s' returned %d. Moved c by %d, and resulted in '%s'\n",
expstr, i, (c-buf),(char*)resptr);
-#endif
YAZ_CHECK_EQ(buflen-buflen2, c-buf);
YAZ_CHECK_EQ(i, expcode);
- if (i!=1)
+ if (i==0)
YAZ_CHECK_EQ(strcmp(expstr,(char*)resptr), 0);
i = 0;
bi = 0;
- while(bi!=2){
+ while((bi!=2) && (yaz_test_get_verbosity()>3)){
bi = yaz_nfa_get_backref(n, i,&cp1,&cp2);
if (bi==0 && ( cp1 || cp2 ) ) {
-#if VERBOSE
printf(" got backref %d of %d chars (%p to %p): '",
i, cp2-cp1+1, cp1, cp2);
while (cp2-cp1 >= 0 )
printf("%c", *cp1++);
printf("'\n");
-#endif
}
i++;
}
YAZ_CHECK_EQ(i, 0);
i = yaz_nfa_set_result(n, s1, "DUPLICATE");
- YAZ_CHECK_EQ(i, 1);
+ YAZ_CHECK_EQ(i, YAZ_NFA_ALREADY);
p = yaz_nfa_get_result(n, s1);
YAZ_CHECK(p);
yaz_nfa_set_result(n, s, "y k+ d");
yaz_nfa_set_backref_point(n, s, 1, 0);
- s = yaz_nfa_add_sequence(n, 0, seq1 );
+ s = yaz_nfa_add_sequence(n, 0, seq1,6 );
yaz_nfa_set_result(n, s, "PREFIX");
- s = yaz_nfa_add_sequence(n, 0, seq2 );
+ s = yaz_nfa_add_sequence(n, 0, seq2,6 );
yaz_nfa_set_result(n, s, "PRELIM");
s = yaz_nfa_add_range(n, 0, 'x', 'x' );
- yaz_nfa_set_backref_point(n, s, 2, 1);
- s1 = yaz_nfa_add_sequence(n, s, tst4);
+ i=yaz_nfa_set_backref_point(n, s, 2, 0);
+ YAZ_CHECK_EQ(i,YAZ_NFA_NOSTART);
+ i=yaz_nfa_set_backref_point(n, s, 2, 1);
+ YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS);
+ i=yaz_nfa_set_backref_point(n, s, 2, 1);
+ YAZ_CHECK_EQ(i,YAZ_NFA_ALREADY);
+ s1 = yaz_nfa_add_sequence(n, s, tst4,2);
yaz_nfa_set_backref_point(n, s1, 2, 0);
yaz_nfa_set_result(n, s1, "xzk");
/* check return codes before doing any matches */
i = yaz_nfa_get_backref(n, 0, &cp1, &cp2 );
- YAZ_CHECK_EQ(i, 1);
+ YAZ_CHECK_EQ(i, YAZ_NFA_NOMATCH);
i = yaz_nfa_get_backref(n, 3, &cp1, &cp2 );
- YAZ_CHECK_EQ(i, 2);
+ YAZ_CHECK_EQ(i, YAZ_NFA_NOSUCHBACKREF );
i = yaz_nfa_get_backref(n, 1, &cp1, &cp2 );
- YAZ_CHECK_EQ(i, 1);
+ YAZ_CHECK_EQ(i, YAZ_NFA_NOMATCH );
-#if VERBOSE
- yaz_nfa_dump(0, n, printfunc);
-#endif
+ if (yaz_test_get_verbosity()>3)
+ yaz_nfa_dump(0, n, printfunc);
test_match(n, seq2, 3, YAZ_NFA_OVERRUN, "K-S");
test_match(n, seq2, 6, YAZ_NFA_SUCCESS, "PRELIM");
YAZ_CHECK_EQ(cp2-cp1+1,2);
YAZ_CHECK_EQ(*cp1, 'z' );
YAZ_CHECK_EQ(*cp2, 'k' );
-#if VERBOSE
- printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
+ if (yaz_test_get_verbosity()>3)
+ printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz );
-#endif
yaz_nfa_destroy(n);
}
yaz_nfa* n= yaz_nfa_init();
yaz_nfa_converter *c1, *c2, *c3;
yaz_nfa_char str1[]={'a','b','c'};
- yaz_nfa_char seq1[]={'A','B','C',0};
+ yaz_nfa_char seq1[]={'A','B','C'};
yaz_nfa_char seq2[]={'k','m','n','m','x','P','Q','X',0};
yaz_nfa_char outbuf[1024];
yaz_nfa_char *outp, *cp, *cp1, *cp2;
outp=outbuf;
sz=1;
i=yaz_nfa_run_converters(n, c1, &outp, &sz);
- YAZ_CHECK_EQ(i,2); /* overrun */
+ YAZ_CHECK_EQ(i,4); /* overrun */
YAZ_CHECK_EQ(outbuf[0],'a');
YAZ_CHECK_EQ(outbuf[1],10000+1);
s=yaz_nfa_add_state(n);
yaz_nfa_add_empty_transition(n,0,s);
yaz_nfa_set_backref_point(n,s,1,1);
- s=yaz_nfa_add_sequence(n, s, seq1 );
+ s=yaz_nfa_add_sequence(n, s, seq1,3 );
yaz_nfa_set_result(n,s,c1);
yaz_nfa_set_backref_point(n,s,1,0);
c1=yaz_nfa_create_backref_converter(n,2);
yaz_nfa_set_result(n,s,c1);
-#if VERBOSE
- yaz_nfa_dump(0,n, printfunc2);
-#endif
+ if (yaz_test_get_verbosity()>3)
+ yaz_nfa_dump(0,n, printfunc2);
cp=seq2;
sz=18;
c2=vp;
YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS);
i=yaz_nfa_get_backref(n, 2, &cp1, &cp2 );
-#if VERBOSE
- printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
+ if (yaz_test_get_verbosity()>3)
+ printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n",
cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz );
-#endif
YAZ_CHECK_EQ(i,0);
YAZ_CHECK_EQ((int)c1,(int)c2); /* got our pointer back from nfa */
for(i=0;i<1024;i++)
yaz_nfa_destroy(n);
}
+yaz_nfa_char *makebuff(NMEM nmem, char *in) {
+ yaz_nfa_char *buff = nmem_malloc(nmem, strlen(in)*sizeof(yaz_nfa_char));
+ yaz_nfa_char *op=buff;
+ while ( (*op++ = *in++) )
+ ;
+ return buff;
+}
+
+void dumpbuff(char *msg, yaz_nfa_char *start, yaz_nfa_char *end) {
+ if (yaz_test_get_verbosity()>3) {
+ printf("%s\"",msg);
+ while (start!=end)
+ printf("%c",*start++);
+ printf("\"\n");
+ }
+}
+
+void chkbuff( yaz_nfa_char *start, yaz_nfa_char *end, char *exp) {
+ char *orig_exp=exp;
+ while (start!=end)
+ if ( *start++ != *exp++ ) {
+ if (yaz_test_get_verbosity()>3) {
+ start--;
+ exp--;
+ printf ("chkbuff: unexpected conversion '%c' != '%c' \n"
+ "\"%s\"\n", *start, *exp, orig_exp );
+ }
+ YAZ_CHECK(!"conversion differs! ");
+ return;
+ }
+
+}
+
+void high_level_test() {
+ NMEM nmem=nmem_create();
+ yaz_nfa_char from1[] = {'f','o','o','b','a','r'};
+ yaz_nfa_char to1[] = {'f','u','b','a','r'};
+ yaz_nfa_char tospace[] = {' '};
+ yaz_nfa_char todot[] = {'.'};
+ char *fromtext =
+ "It was a Dark and Rainy Night, when alpha and beta "
+ "fixme - FIND better names ?? !! ## - "
+ "went out to fix the foobar "
+ "that was all foo.";
+ char *expected =
+ "IT WAS A DARK AND RAINY NIGHT. WHEN ALPHA AND b "
+ "to-be-fixed-later . FIND BETTER NAMES .. .. .. . "
+ "WENT OUT TO (fix) THE fubar "
+ "THAT WAS ALL FOO.";
+ yaz_nfa_char *from3 = makebuff(nmem,fromtext);
+ yaz_nfa_char *to3 = nmem_malloc(nmem, 1024*sizeof(yaz_nfa_char));
+ yaz_nfa_char *fromp=from3;
+ yaz_nfa_char *top=to3;
+ size_t insize=strlen(fromtext);
+ size_t outsize=1024;
+ size_t prev_insize=0;
+
+ yaz_nfa *n = yaz_nfa_init();
+ int i;
+ i = yaz_nfa_add_string_rule(n, from1, 6, to1, 5);
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_string_rule(n, from1, 6, to1, 5);
+ YAZ_CHECK_EQ(i,YAZ_NFA_ALREADY);
+ i = yaz_nfa_add_ascii_string_rule(n,"beta","b");
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_ascii_string_rule(n,"fixme","to-be-fixed-later");
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_ascii_string_rule(n,"fix","(fix)");
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_char_range_rule(n, 'a','z','A');
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_char_string_rule(n, 0,' ', tospace,1);
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_char_string_rule(n, '!','/', todot,1);
+ YAZ_CHECK_EQ(i,0);
+ i = yaz_nfa_add_char_string_rule(n, ':','?', todot,1);
+ YAZ_CHECK_EQ(i,0);
+ if (yaz_test_get_verbosity()>3)
+ yaz_nfa_dump(0,n, printfunc2);
+
+ YAZ_CHECK_EQ( *from3, 'I' ); /* just to be sure my copy func works */
+ for (i=0;i<100;i++)
+ to3[i]=10000+i;
+ i=yaz_nfa_convert_slice(n, &fromp, &insize, &top, &outsize);
+ YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS);
+ YAZ_CHECK_EQ(*to3,'I');
+ YAZ_CHECK_EQ(insize, strlen(fromtext)-1);
+ YAZ_CHECK_EQ(outsize, 1024-1);
+
+ while ( (i==YAZ_NFA_SUCCESS) && (insize > 0) && (prev_insize!=insize) ) {
+ prev_insize=insize; /* detect dead loops if something goes wrong */
+ i=yaz_nfa_convert_slice(n, &fromp, &insize, &top, &outsize);
+ }
+ YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS);
+ YAZ_CHECK_EQ(insize,0);
+ YAZ_CHECK(prev_insize != insize); /* the loop would have been endless */
+
+ dumpbuff("Original text: ",from3, fromp);
+ dumpbuff("Converted text: ",to3, top);
+
+ chkbuff(to3, top, expected);
+
+ yaz_nfa_destroy(n);
+ nmem_destroy(nmem);
+}
int main(int argc, char **argv)
{
nmem_init ();
construction_test();
converter_test();
+ high_level_test();
nmem_exit ();
YAZ_CHECK_TERM;
}