summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
60e5ce8)
is reduced from 6073 sec to 2578 sec (on Zebra 1.3.38 though).
-/* $Id: d1_absyn.c,v 1.28 2006-08-14 10:40:06 adam Exp $
+/* $Id: d1_absyn.c,v 1.29 2006-09-28 18:38:44 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
Copyright (C) 1995-2006
Index Data ApS
data1_xpelement *xpe = abs->xp_elements;
while (xpe) {
yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
data1_xpelement *xpe = abs->xp_elements;
while (xpe) {
yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
- if (xpe->dfa) { dfa_delete (&xpe->dfa); }
+ if (xpe->dfa)
+ dfa_delete (&xpe->dfa);
/* we have multiple versions of data1_getelementbyname */
#define DATA1_GETELEMENTBYTAGNAME_VERSION 1
/* we have multiple versions of data1_getelementbyname */
#define DATA1_GETELEMENTBYTAGNAME_VERSION 1
-#if DATA1_GETELEMENTBYTAGNAME_VERSION==0
-/* straight linear search */
-data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
- data1_element *parent,
- const char *tagname)
-{
- data1_element *r;
-
- /* It's now possible to have a data1 tree with no abstract syntax */
- if ( !abs )
- return 0;
-
- if (!parent)
- r = abs->main_elements;
- else
- r = parent->children;
-
- for (; r; r = r->next)
- {
- data1_name *n;
-
- for (n = r->tag->names; n; n = n->next)
- if (!data1_matchstr(tagname, n->name))
- return r;
- }
- return 0;
-}
-#endif
-
-#if DATA1_GETELEMENTBYTAGNAME_VERSION==1
-/* using hash search */
data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
data1_element *parent,
const char *tagname)
data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
data1_element *parent,
const char *tagname)
else
r = parent->children;
else
r = parent->children;
+#if DATA1_GETELEMENTBYTAGNAME_VERSION==1
+ /* using hash search */
if (!r)
return 0;
ht = r->hash;
if (!ht)
{
if (!r)
return 0;
ht = r->hash;
if (!ht)
{
+ /* build hash table (the first time) */
ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
for (; r; r = r->next)
{
ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
for (; r; r = r->next)
{
}
}
return data1_hash_lookup(ht, tagname);
}
}
return data1_hash_lookup(ht, tagname);
+#else
+ /* using linear search */
+ for (; r; r = r->next)
+ {
+ data1_name *n;
+
+ for (n = r->tag->names; n; n = n->next)
+ if (!data1_matchstr(tagname, n->name))
+ return r;
+ }
+ return 0;
data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
const char *name)
data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
const char *name)
int i;
char *p, *xpath_expr, *termlists;
const char *regexp;
int i;
char *p, *xpath_expr, *termlists;
const char *regexp;
- struct DFA *dfa = dfa = dfa_init();
data1_termlist **tp;
char melm_xpath[128];
data1_termlist **tp;
char melm_xpath[128];
+ data1_xpelement *xp_old = 0;
}
termlists = argv[2];
regexp = mk_xpath_regexp(dh, xpath_expr);
}
termlists = argv[2];
regexp = mk_xpath_regexp(dh, xpath_expr);
- i = dfa_parse (dfa, ®exp);
- if (i || *regexp) {
- yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
- dfa_delete (&dfa);
- continue;
- }
-
+
+#if OPTIMIZE_MELM
+ for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next)
+ if (!strcmp(xp_old->regexp, regexp))
+ break;
+#endif
+ if (!xp_old)
+ {
+ const char *regexp_ptr = regexp;
+
+ dfa = dfa_init();
+ i = dfa_parse (dfa, ®exp_ptr);
+ if (i || *regexp_ptr) {
+ yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
+ dfa_delete (&dfa);
+ continue;
+ }
+ }
if (!cur_xpelement)
{
cur_xpelement = (data1_xpelement *)
if (!cur_xpelement)
{
cur_xpelement = (data1_xpelement *)
nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
cur_xpelement = cur_xpelement->next;
}
nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
cur_xpelement = cur_xpelement->next;
}
+#if OPTIMIZE_MELM
+ cur_xpelement->regexp = regexp;
+#endif
cur_xpelement->next = NULL;
cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh),
xpath_expr);
cur_xpelement->next = NULL;
cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh),
xpath_expr);
- dfa_mkstate (dfa);
- cur_xpelement->dfa = dfa;
+ if (dfa)
+ dfa_mkstate (dfa);
+ cur_xpelement->dfa = dfa;
#ifdef ENHANCED_XELM
cur_xpelement->xpath_len =
#ifdef ENHANCED_XELM
cur_xpelement->xpath_len =
-/* $Id: dfa.c,v 1.37 2006-08-14 10:40:08 adam Exp $
+/* $Id: dfa.c,v 1.38 2006-09-28 18:38:45 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
Copyright (C) 1995-2006
Index Data ApS
dfa->parse_info->cmap_data = vp;
}
dfa->parse_info->cmap_data = vp;
}
+int dfa_get_last_rule (struct DFA *dfa)
+{
+ return dfa->parse_info->rule;
+}
+
int dfa_parse (struct DFA *dfa, const char **pattern)
{
struct Tnode *top;
int dfa_parse (struct DFA *dfa, const char **pattern)
{
struct Tnode *top;
-/* $Id: d1_absyn.h,v 1.6 2006-08-14 10:40:12 adam Exp $
+/* $Id: d1_absyn.h,v 1.7 2006-09-28 18:38:46 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
Copyright (C) 1995-2006
Index Data ApS
#define D1_ABSYN_H 1
#define ENHANCED_XELM 1
#define D1_ABSYN_H 1
#define ENHANCED_XELM 1
#include <zebra_xpath.h>
#include <idzebra/data1.h>
#include <zebra_xpath.h>
#include <idzebra/data1.h>
struct DFA *dfa;
data1_termlist *termlists;
struct data1_xpelement *next;
struct DFA *dfa;
data1_termlist *termlists;
struct data1_xpelement *next;
+#if OPTIMIZE_MELM
+ const char *regexp;
+#endif
+ int match_state;
} data1_xpelement;
struct data1_absyn
} data1_xpelement;
struct data1_absyn
-/* $Id: dfa.h,v 1.14 2006-08-14 10:40:12 adam Exp $
+/* $Id: dfa.h,v 1.15 2006-09-28 18:38:46 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
Copyright (C) 1995-2006
Index Data ApS
int dfa_parse (struct DFA *, const char **);
void dfa_mkstate (struct DFA *);
void dfa_delete (struct DFA **);
int dfa_parse (struct DFA *, const char **);
void dfa_mkstate (struct DFA *);
void dfa_delete (struct DFA **);
+int dfa_get_last_rule (struct DFA *);
void dfa_parse_cmap_clean (struct DFA *d);
void dfa_parse_cmap_new (struct DFA *d, const int *cmap);
void dfa_parse_cmap_clean (struct DFA *d);
void dfa_parse_cmap_new (struct DFA *d, const int *cmap);
-/* $Id: recgrs.c,v 1.5 2006-08-22 13:39:27 adam Exp $
+/* $Id: recgrs.c,v 1.6 2006-09-28 18:38:47 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
Copyright (C) 1995-2006
Index Data ApS
data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
{
data1_absyn *abs = n->root->u.root.absyn;
data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
{
data1_absyn *abs = n->root->u.root.absyn;
- data1_xpelement *xpe = abs->xp_elements;
+
+ data1_xpelement *xpe = 0;
data1_node *nn;
#ifdef ENHANCED_XELM
struct xpath_location_step *xp;
#endif
char *pexpr = xmalloc(strlen(tagpath)+5);
data1_node *nn;
#ifdef ENHANCED_XELM
struct xpath_location_step *xp;
#endif
char *pexpr = xmalloc(strlen(tagpath)+5);
sprintf (pexpr, "/%s\n", tagpath);
sprintf (pexpr, "/%s\n", tagpath);
- for (; xpe; xpe = xpe->next)
+
+ for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
+ xpe->match_state = -1; /* don't know if it matches yet */
+
+ for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
- ok = dfa_match_first(xpe->dfa->states, pexpr);
+ int ok = xpe->match_state;
+ if (ok == -1)
+ { /* don't know whether there is a match yet */
+ data1_xpelement *xpe1;
+
+ assert(xpe->dfa);
+ ok = dfa_match_first(xpe->dfa->states, pexpr);
+#if OPTIMIZE_MELM
+ /* mark this and following ones with same regexp */
+ for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
+ {
+ if (!strcmp(xpe1->regexp, xpe->regexp))
+ xpe1->match_state = ok;
+ }
+#endif
+ }
+ assert (ok == 0 || ok == 1);
if (ok) {
#ifdef ENHANCED_XELM
/* we have to check the perdicates up to the root node */
if (ok) {
#ifdef ENHANCED_XELM
/* we have to check the perdicates up to the root node */
yaz_log(YLOG_DEBUG, "Got it");
return xpe->termlists;
} else {
yaz_log(YLOG_DEBUG, "Got it");
return xpe->termlists;
} else {