From 8e9d2cec97def1cd8b89b1fe427f1a232661ec6f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 25 Jul 2008 09:00:04 +0200 Subject: [PATCH] First RPN to CQL conversion using actual attribute matching. --- include/yaz/rpn2cql.h | 10 ++++ include/yaz/yaz-iconv.h | 20 +++++++ src/cqltransform.c | 132 ++++++++++++++++++++++++++++++++--------------- src/matchstr.c | 16 +++--- src/pquery.c | 3 +- src/rpn2cql.c | 70 +++++++++++++++++++------ test/tst_rpn2cql.c | 47 ++++++++++++++++- 7 files changed, 226 insertions(+), 72 deletions(-) diff --git a/include/yaz/rpn2cql.h b/include/yaz/rpn2cql.h index 8491cee..26c72f7 100644 --- a/include/yaz/rpn2cql.h +++ b/include/yaz/rpn2cql.h @@ -64,6 +64,16 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct, WRBUF w, Z_RPNQuery *q); +/** \brief find a pattern that has a subset of attributes + \param ct CQL transform handle + \param category pattern category + \param attributes RPN attributes + \returns pattern value or NULL if not found + */ +const char *cql_lookup_reverse(cql_transform_t ct, + const char *category, + Z_AttributeList *attributes); + YAZ_END_CDECL #endif diff --git a/include/yaz/yaz-iconv.h b/include/yaz/yaz-iconv.h index 20d8a9e..4768805 100644 --- a/include/yaz/yaz-iconv.h +++ b/include/yaz/yaz-iconv.h @@ -73,8 +73,28 @@ YAZ_EXPORT int yaz_iconv_isbuiltin(yaz_iconv_t cd); */ YAZ_EXPORT int yaz_matchstr(const char *s1, const char *s2); +/** \brief match a and b with some delimitor for b + \param a first second + \param b second string + \param b_del delimitor for b + \retval 0 strings are similar + \retval !=0 strings are different +*/ YAZ_EXPORT int yaz_strcmp_del(const char *a, const char *b, const char *b_del); + +/** \brief compares two buffers of different size + \param a first buffer + \param b second buffer + \param len_a length of first buffer + \retval len_b length of second buffer + \retval 0 buffers are equal + \retval >0 a > b + \retval <0 a < b +*/ +int yaz_memcmp(const void *a, const void *b, size_t len_a, size_t len_b); + + /** \brief decodes UTF-8 sequence \param inp input buffer with UTF-8 bytes \param inbytesleft length of input buffer diff --git a/src/cqltransform.c b/src/cqltransform.c index cd735e4..dbdd8aa 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -31,15 +31,10 @@ #include #include -struct cql_rpn_value_entry { - Z_AttributeElement *elem; - struct cql_rpn_value_entry *next; -}; - struct cql_prop_entry { char *pattern; char *value; - struct cql_rpn_value_entry *attr_values; + Z_AttributeList attr_list; struct cql_prop_entry *next; }; @@ -69,11 +64,13 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, const char *pattern, yaz_tok_parse_t tp) { + int ae_num = 0; + Z_AttributeElement *ae[20]; int ret = 0; /* 0=OK, != 0 FAIL */ int t; t = yaz_tok_move(tp); - while (t == YAZ_TOK_STRING) + while (t == YAZ_TOK_STRING && ae_num < 20) { WRBUF type_str = wrbuf_alloc(); WRBUF set_str = 0; @@ -83,26 +80,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, elem = nmem_malloc(ct->nmem, sizeof(*elem)); elem->attributeSet = 0; -#if 0 - struct Z_ComplexAttribute { - int num_list; - Z_StringOrNumeric **list; - int num_semanticAction; - int **semanticAction; /* OPT */ - }; - - struct Z_AttributeElement { - Z_AttributeSetId *attributeSet; /* OPT */ - int *attributeType; - int which; - union { - int *numeric; - Z_ComplexAttribute *complex; -#define Z_AttributeValue_numeric 1 -#define Z_AttributeValue_complex 2 - } value; - }; -#endif + ae[ae_num] = elem; wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); wrbuf_puts(type_str, yaz_tok_parse_string(tp)); t = yaz_tok_move(tp); @@ -182,6 +160,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); t = yaz_tok_move(tp); wrbuf_puts(ct->w, " "); + ae_num++; } if (ret == 0) /* OK? */ { @@ -191,7 +170,29 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp)); (*pp)->pattern = xstrdup(pattern); (*pp)->value = xstrdup(wrbuf_cstr(ct->w)); + + (*pp)->attr_list.num_attributes = ae_num; + if (ae_num == 0) + (*pp)->attr_list.attributes = 0; + else + { + (*pp)->attr_list.attributes = + nmem_malloc(ct->nmem, + ae_num * sizeof(Z_AttributeElement *)); + memcpy((*pp)->attr_list.attributes, ae, + ae_num * sizeof(Z_AttributeElement *)); + } (*pp)->next = 0; + + if (0) + { + ODR pr = odr_createmem(ODR_PRINT); + Z_AttributeList *alp = &(*pp)->attr_list; + odr_setprint(pr, yaz_log_file()); + z_AttributeList(pr, &alp, 0, 0); + odr_setprint(pr, 0); + odr_destroy(pr); + } } return ret; } @@ -281,28 +282,75 @@ cql_transform_t cql_transform_open_fname(const char *fname) return ct; } -static const char *cql_lookup_reverse(cql_transform_t ct, - const char *category, - const char **attr_list, - int *matches) +#if 0 +struct Z_AttributeElement { + Z_AttributeSetId *attributeSet; /* OPT */ + int *attributeType; + int which; + union { + int *numeric; + Z_ComplexAttribute *complex; +#define Z_AttributeValue_numeric 1 +#define Z_AttributeValue_complex 2 + } value; +}; +#endif + +static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) +{ + ODR odr_a = odr_createmem(ODR_ENCODE); + ODR odr_b = odr_createmem(ODR_ENCODE); + int len_a, len_b; + char *buf_a, *buf_b; + int ret; + + z_AttributeElement(odr_a, &a, 0, 0); + z_AttributeElement(odr_b, &b, 0, 0); + + buf_a = odr_getbuf(odr_a, &len_a, 0); + buf_b = odr_getbuf(odr_b, &len_b, 0); + + ret = yaz_memcmp(buf_a, buf_b, len_a, len_b); + + odr_destroy(odr_a); + odr_destroy(odr_b); + return ret; +} + +const char *cql_lookup_reverse(cql_transform_t ct, + const char *category, + Z_AttributeList *attributes) { struct cql_prop_entry *e; - size_t cat_len = strlen(category); - NMEM nmem = nmem_create(); + size_t clen = strlen(category); for (e = ct->entry; e; e = e->next) { - const char *dot_str = strchr(e->pattern, '.'); - int prefix_len = dot_str ? - prefix_len = dot_str - e->pattern : strlen(e->pattern); - if (cat_len == prefix_len && !memcmp(category, e->pattern, cat_len)) + if (!strncmp(e->pattern, category, clen)) { - char **attr_array; - int attr_num; - nmem_strsplit_blank(nmem, e->value, &attr_array, &attr_num); - nmem_reset(nmem); + /* category matches.. See if attributes in pattern value + are all listed in actual attributes */ + int i; + for (i = 0; i < e->attr_list.num_attributes; i++) + { + /* entry attribute */ + Z_AttributeElement *e_ae = e->attr_list.attributes[i]; + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + /* actual attribute */ + Z_AttributeElement *a_ae = attributes->attributes[j]; + int r = compare_attr(e_ae, a_ae); + if (r == 0) + break; + } + if (j == attributes->num_attributes) + break; /* i was not found at all.. try next pattern */ + + } + if (i == e->attr_list.num_attributes) + return e->pattern; } } - nmem_destroy(nmem); return 0; } diff --git a/src/matchstr.c b/src/matchstr.c index b078150..e2a1da3 100644 --- a/src/matchstr.c +++ b/src/matchstr.c @@ -62,17 +62,15 @@ int yaz_strcmp_del(const char *a, const char *b, const char *b_del) return *a - *b; } -#ifdef __GNUC__ -#ifdef __CHECKER__ -void __assert_fail (const char *assertion, const char *file, - unsigned int line, const char *function) +int yaz_memcmp(const void *a, const void *b, size_t len_a, size_t len_b) { - fprintf (stderr, "%s in file %s line %d func %s\n", - assertion, file, line, function); - abort (); + size_t m_len = len_a < len_b ? len_a : len_b; + int r = memcmp(a, b, m_len); + if (r) + return r; + return len_a - len_b; } -#endif -#endif + /* * Local variables: * c-basic-offset: 4 diff --git a/src/pquery.c b/src/pquery.c index 23d7b64..36733a3 100644 --- a/src/pquery.c +++ b/src/pquery.c @@ -292,8 +292,7 @@ static Z_AttributesPlusTerm *rpn_term(struct yaz_pqf_parser *li, ODR o, Z_StringOrNumeric_string; elements[k]->value.complex->list[0]->u.string = attr_clist[i]; - elements[k]->value.complex->semanticAction = (int **) - odr_nullval(); + elements[k]->value.complex->semanticAction = 0; elements[k]->value.complex->num_semanticAction = 0; } else diff --git a/src/rpn2cql.c b/src/rpn2cql.c index 108c0ec..7a5b9ca 100644 --- a/src/rpn2cql.c +++ b/src/rpn2cql.c @@ -7,15 +7,6 @@ * \file * \brief Implements RPN to CQL conversion * - * Evaluation order of rules: - * - * always - * relation - * structure - * position - * truncation - * index - * relationModifier */ #include @@ -28,14 +19,41 @@ #include static int rpn2cql_attr(cql_transform_t ct, - void (*pr)(const char *buf, void *client_data), - void *client_data, Z_AttributeList *attributes, WRBUF w) { - int i; - for (i = 0; i < attributes->num_attributes; i++) + const char *relation = cql_lookup_reverse(ct, "relation.", attributes); + const char *index = cql_lookup_reverse(ct, "index.", attributes); + const char *structure = cql_lookup_reverse(ct, "structure.", attributes); + if (index && strcmp(index, "index.cql.serverChoice")) { - Z_AttributeElement *elem = attributes->attributes[i]; + wrbuf_puts(w, index+6); + if (relation) + { + relation += 9; + + if (!strcmp(relation, "exact")) + relation = "=="; + else if (!strcmp(relation, "eq")) + relation = "="; + else if (!strcmp(relation, "le")) + relation = "<="; + else if (!strcmp(relation, "ge")) + relation = ">="; + wrbuf_puts(w, relation); + } + else + wrbuf_puts(w, "="); + + if (structure) + { + structure += 10; + if (strcmp(structure, "*")) + { + wrbuf_puts(w, "/"); + wrbuf_puts(w, structure); + wrbuf_puts(w, " "); + } + } } return 0; } @@ -55,25 +73,43 @@ static int rpn2cql_simple(cql_transform_t ct, { Z_AttributesPlusTerm *apt = q->u.attributesPlusTerm; Z_Term *term = apt->term; + const char *sterm = 0; + size_t lterm = 0; wrbuf_rewind(w); - ret = rpn2cql_attr(ct, pr, client_data, apt->attributes, w); + ret = rpn2cql_attr(ct, apt->attributes, w); switch(term->which) { case Z_Term_general: - wrbuf_write(w, (const char *) term->u.general->buf, term->u.general->len); + lterm = term->u.general->len; + sterm = (const char *) term->u.general->buf; break; case Z_Term_numeric: wrbuf_printf(w, "%d", *term->u.numeric); break; case Z_Term_characterString: - wrbuf_puts(w, term->u.characterString); + sterm = term->u.characterString; + lterm = strlen(sterm); break; default: ret = -1; cql_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0); } + + if (term) + { + int i; + int must_quote = 0; + for (i = 0 ; i < lterm; i++) + if (sterm[i] == ' ') + must_quote = 1; + if (must_quote) + wrbuf_puts(w, "\""); + wrbuf_write(w, sterm, lterm); + if (must_quote) + wrbuf_puts(w, "\""); + } if (ret == 0) pr(wrbuf_cstr(w), client_data); } diff --git a/test/tst_rpn2cql.c b/test/tst_rpn2cql.c index 2ef5d0e..29e4616 100644 --- a/test/tst_rpn2cql.c +++ b/test/tst_rpn2cql.c @@ -34,7 +34,9 @@ static int compare(cql_transform_t ct, const char *pqf, const char *cql) { yaz_log(YLOG_LOG, "%s -> %s", pqf, wrbuf_cstr(w)); if (cql && !strcmp(wrbuf_cstr(w), cql)) + { ret = 1; + } } } wrbuf_destroy(w); @@ -42,19 +44,60 @@ static int compare(cql_transform_t ct, const char *pqf, const char *cql) return ret; } -static void tst(void) +static void tst1(void) { cql_transform_t ct = cql_transform_create(); + YAZ_CHECK(compare(ct, "abc", "abc")); + YAZ_CHECK(compare(ct, "\"a b c\"", "\"a b c\"")); YAZ_CHECK(compare(ct, "@and a b", "a and b")); + YAZ_CHECK(compare(ct, "@attr 1=4 abc", "abc")); + + cql_transform_define_pattern(ct, "index.title", "1=4"); + YAZ_CHECK(compare(ct, "@attr 1=4 abc", "title=abc")); + + cql_transform_define_pattern(ct, "index.foo", "1=bar"); + YAZ_CHECK(compare(ct, "@attr 1=bar abc", "foo=abc")); + + cql_transform_close(ct); +} + +static void tst2(void) +{ + WRBUF w = wrbuf_alloc(); + cql_transform_t ct = 0; + const char *srcdir = getenv("srcdir"); + if (srcdir) + { + wrbuf_puts(w, srcdir); + wrbuf_puts(w, "/"); + } + wrbuf_puts(w, "../etc/pqf.properties"); + + ct = cql_transform_open_fname(wrbuf_cstr(w)); + YAZ_CHECK(compare(ct, "@attr 1=4 abc", "dc.title=abc")); + YAZ_CHECK(compare(ct, "@attr 1=4 @attr 4=108 abc", "dc.title=/exact abc")); + YAZ_CHECK(compare(ct, "@attr 1=4 @attr 3=1 @attr 6=1 abc", "dc.title=abc")); + YAZ_CHECK(compare(ct, "@attr 1=4 @attr 4=1 @attr 6=1 abc", + "dc.title=abc")); + YAZ_CHECK(compare(ct, "@attr 1=1016 abc", "abc")); + YAZ_CHECK(compare(ct, "@attr 2=1 @attr 1=30 1980", "dc.date<1980")); + YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=3 1980", "dc.date=1980")); + YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=5 1980", "dc.date>1980")); + YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=2 1980", "dc.date<=1980")); + YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=4 1980", "dc.date>=1980")); + + YAZ_CHECK(compare(ct, "@attr 2=103 @attr 1=_ALLRECORDS 1", "cql.allRecords=1")); cql_transform_close(ct); + wrbuf_destroy(w); } int main (int argc, char **argv) { YAZ_CHECK_INIT(argc, argv); YAZ_CHECK_LOG(); - tst(); + tst1(); + tst2(); YAZ_CHECK_TERM; } -- 1.7.10.4