From d61bd2d5c05bee9ef7acb0f5e817f10a2b3bd34e Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 29 Jun 2005 16:52:50 +0000 Subject: [PATCH] Fixed bug #317: xelm only indexes cdata at matching node (not children). --- data1/d1_absyn.c | 104 +++++++++++++++++++++++++++++------------------------- recctrl/recgrs.c | 67 +++++++++++++++++++---------------- 2 files changed, 92 insertions(+), 79 deletions(-) diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index af85c0a..4ab6d1a 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.9.2.2 2005-06-09 22:08:10 adam Exp $ +/* $Id: d1_absyn.c,v 1.9.2.3 2005-06-29 16:52:50 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -293,65 +293,73 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) */ -const char * mk_xpath_regexp (data1_handle dh, char *expr) +static const char * mk_xpath_regexp (data1_handle dh, const char *expr) { - char *p = expr; - char *pp; - char *s; + const char *p = expr; int abs = 1; int i; - int j; - int e=0; - int is_predicate = 0; + int e = 0; + char *stack[32]; + char *res_p, *res = 0; + size_t res_size = 1; - static char *stack[32]; - static char res[1024]; - char *r = ""; - - if (*p != '/') { return (""); } + if (*p != '/') + return (""); p++; - if (*p == '/') { abs=0; p++; } - - while (*p) { - i=0; - while (*p && !strchr("/",*p)) { - i++; p++; - } - stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1); + if (*p == '/') + { + abs =0; + p++; + } + while (*p) + { + int is_predicate = 0; + char *s; + int j; + for (i = 0; *p && !strchr("/",*p); i++, p++) + ; + res_size += (i+3); /* we'll add / between later .. */ + stack[e] = (char *) nmem_malloc(data1_nmem_get(dh), i+1); s = stack[e]; - for (j=0; j< i; j++) { - pp = p-i+j; - if (*pp == '[') { - is_predicate=1; - } - else if (*pp == ']') { - is_predicate=0; - } - else { - if (!is_predicate) { - if (*pp == '*') - *s++ = '.'; - *s++ = *pp; + for (j = 0; j < i; j++) + { + const char *pp = p-i+j; + if (*pp == '[') + is_predicate=1; + else if (*pp == ']') + is_predicate=0; + else + { + if (!is_predicate) { + if (*pp == '*') + *s++ = '.'; + *s++ = *pp; + } } - } } *s = 0; e++; - if (*p) {p++;} + if (*p) + p++; } - e--; p = &res[0]; i=0; - sprintf (p, "^"); p++; - while (e >= 0) { - /* !!! res size is not checked !!! */ - sprintf (p, "%s/",stack[e]); - p += strlen(stack[e]) + 1; - e--; + res_p = res = nmem_malloc(data1_nmem_get(dh), res_size + 10); + + i = 0; + sprintf(res_p, ".*/"); + res_p = res_p + strlen(res_p); + while (--e >= 0) { + sprintf(res_p, "%s/", stack[e]); + res_p += strlen(stack[e]) + 1; } - if (!abs) { sprintf (p, ".*"); p+=2; } - sprintf (p, "$"); p++; - r = nmem_strdup (data1_nmem_get (dh), res); - yaz_log(LOG_DEBUG,"Got regexp: %s",r); - return (r); + if (!abs) + { + sprintf(res_p, ".*"); + res_p += 2; + } + sprintf (res_p, "$"); + res_p++; + yaz_log(YLOG_DEBUG, "Got regexp: %s", res); + return res; } /* *ostrich* diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 5565abb..7c886e8 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.86.2.2 2004-10-12 16:47:38 quinn Exp $ +/* $Id: recgrs.c,v 1.86.2.3 2005-06-29 16:52:50 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -324,6 +324,33 @@ int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p) return 0; } +static int dfa_match_first(struct DFA_state **dfaar, const char *text) +{ + struct DFA_state *s = dfaar[0]; /* start state */ + struct DFA_tran *t; + int i; + const char *p = text; + unsigned char c; + + for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++) + if (c >= t->ch[0] && c <= t->ch[1]) + { + while (i >= 0) + { + /* move to next state and return if we get a match */ + s = dfaar[t->to]; + if (s->rule_no) + return 1; + /* next char */ + c = *p++; + for (t = s->trans, i = s->tran_no; --i >= 0; t++) + if (c >= t->ch[0] && c <= t->ch[1]) + break; + } + } + return 0; +} + /* *ostrich* @@ -351,43 +378,20 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) struct xpath_location_step *xp; #endif - char *pexpr = xmalloc(strlen(tagpath)+2); + char *pexpr = xmalloc(strlen(tagpath)+5); int ok = 0; - - sprintf (pexpr, "%s\n", tagpath); + + sprintf (pexpr, "/%s\n", tagpath); yaz_log(LOG_DEBUG,"Checking tagpath %s",tagpath); while (xpe) { - struct DFA_state **dfaar = xpe->dfa->states; - struct DFA_state *s=dfaar[0]; - struct DFA_tran *t; - const char *p; - int i; - unsigned char c; - int start_line = 1; - - c = *pexpr++; t = s->trans; i = s->tran_no; - if ((c >= t->ch[0] && c <= t->ch[1]) || (!t->ch[0])) { - p = pexpr; - do { - if ((s = dfaar[t->to])->rule_no && - (start_line || s->rule_nno)) { - ok = 1; - break; - } - for (t=s->trans, i=s->tran_no; --i >= 0; t++) { - if ((unsigned) *p >= t->ch[0] && (unsigned) *p <= t->ch[1]) - break; - } - p++; - } while (i >= 0); - } + int i; + ok = dfa_match_first(xpe->dfa->states, pexpr); if (ok) - yaz_log(LOG_DEBUG," xpath match %s",xpe->xpath_expr); + yaz_log(YLOG_DEBUG, " xpath got match %s",xpe->xpath_expr); else - yaz_log(LOG_DEBUG," xpath no match %s",xpe->xpath_expr); + yaz_log(YLOG_DEBUG, " xpath no match %s",xpe->xpath_expr); - pexpr--; if (ok) { #ifdef ENHANCED_XELM /* we have to check the perdicates up to the root node */ @@ -1169,6 +1173,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) } tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank"); + if (tagname && p->score >= 0 && (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem))) { -- 1.7.10.4