-## $Id: Makefile.am,v 1.28 2006-05-02 20:47:45 adam Exp $
+## $Id: Makefile.am,v 1.29 2006-05-03 11:09:59 heikki Exp $
pkginclude_HEADERS= backend.h ccl.h cql.h comstack.h \
- diagbib1.h diagsrw.h sortspec.h log.h logrpn.h marcdisp.h nmem.h odr.h \
+ diagbib1.h diagsrw.h sortspec.h log.h logrpn.h marcdisp.h nfa.h \
+ nmem.h odr.h \
oid.h options.h otherinfo.h pquery.h prt-ext.h querytowrbuf.h \
readconf.h record_conv.h statserv.h \
tcpip.h test.h unix.h tpath.h wrbuf.h xmalloc.h \
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
- * $Id: nfa.h,v 1.1 2006-05-03 09:04:33 heikki Exp $
+ * $Id: nfa.h,v 1.2 2006-05-03 11:09:59 heikki Exp $
*/
/**
* Each state can be the beginning and/or ending of a backref
* sequence. This call sets those flags in the states. After matching,
* we can get hold of the backrefs that matched, and use them in our
- * translations.
+ * translations. The backrefs start at 1, not zero!
*
* \param n the nfa
* \param s the state to add to
* \param backref_number is the number of the back reference. 0 for clearing
* \param is_start is 1 for start of the backref, 0 for end
- * \return
- * 0 for OK
- * 1 if the backref is already set
- *
+ * \retval 0 for OK
+ * \retval 1 if the backref is already set
+ * \retval 2 for ending a backref that has not been started
*
*/
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.1 2006-05-03 09:04:33 heikki Exp $
+ * $Id: nfa.c,v 1.2 2006-05-03 11:10:00 heikki Exp $
*
* This is a simple NFA-based system for character set normalizing
* in yaz and zebra. Unlike standard NFA's, this operates on ranges of
n->nmem=my_nmem;
n->nstates=0;
n->laststate=0;
+ n->firststate=0;
+ n->nbackrefs=0;
+ n->curr_backrefs=0;
+ n->best_backrefs=0;
return n;
}
if (s->backref_start)
return 1;
s->backref_start=backref_number;
- if (n->nbackrefs<backref_number)
+ if (n->nbackrefs<backref_number) {
n->nbackrefs=backref_number;
+ n->curr_backrefs=0;
+ n->best_backrefs=0;
+ /* clear them just in case we have already matched on */
+ /* with this nfa, and created a too small backref table */
+ /* we will reallocate when matching. */
+ }
} else {
if (s->backref_end)
return 1;
+ if (n->nbackrefs<backref_number)
+ return 2; /* can't end a backref that has not been started */
s->backref_end=backref_number;
}
return 0; /* ok */
* * * * * * */
struct matcher {
+ yaz_nfa *n;
yaz_nfa_char *longest;
int bestnode;
void *result;
};
/* Finds the longest match. In case of ties, chooses the one with the
- * lowest numbered state */
-static void match_state(yaz_nfa_state *s,
+ * lowest numbered state. Keep track of the back references. Recursively
+ * traverses the NFA. Keeps track of the best hit it has found. */
+
+static void match_state(
+ yaz_nfa_state *s,
yaz_nfa_char *inchar,
size_t incharsleft,
struct matcher *m ) {
yaz_nfa_transition *t=s->lasttrans;
-
+ if (s->backref_start)
+ m->n->curr_backrefs[s->backref_start].start=inchar;
+ if (s->backref_end)
+ m->n->curr_backrefs[s->backref_end].end=inchar;
if (t) {
if (incharsleft) {
do {
if ( (m->longest < inchar) || /* longer result */
((m->longest == inchar)&&(m->bestnode<s->num)) ){
/* or as long, but with lower node number. Still better */
+ int i;
m->longest=inchar;
m->bestnode=s->num;
m->result=s->result;
+ if (m->n->curr_backrefs)
+ for (i=0; i<m->n->nbackrefs;i++)
+ m->n->best_backrefs[i]=m->n->curr_backrefs[i];
}
}
+ if (s->backref_start)
+ m->n->curr_backrefs[s->backref_start].start=0;
+ if (s->backref_end)
+ m->n->curr_backrefs[s->backref_end].end=0;
} /* match_state */
int yaz_nfa_match(yaz_nfa *n,
struct matcher m;
if (!n->firststate)
return YAZ_NFA_NOMATCH;
+ m.n=n;
m.longest=*inbuff;
m.bestnode=n->nstates;
m.result=0;
m.errorcode=0;
- if (!n->curr_backrefs) {
- int sz=sizeof( struct yaz_nfa_backref_info);
- n->curr_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz);
- n->best_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz);
+ if ((!n->curr_backrefs) && (n->nbackrefs)){
+ int sz=sizeof( struct yaz_nfa_backref_info) * (n->nbackrefs+1);
+ n->curr_backrefs=nmem_malloc( n->nmem, sz);
+ n->best_backrefs=nmem_malloc( n->nmem, sz);
}