@stop.field t1 t2 t3 .. All terms matching list of t1, t2, ..
in field are removed from resulting RPN.
# CCL field mappings
-# $Id: default.bib,v 1.10 2007-04-24 12:57:52 adam Exp $
+# $Id: default.bib,v 1.11 2007-04-30 19:55:39 adam Exp $
#
# The rule below is used when no fields are specified
term t=l,r s=al
clean t=l,r
#
# Rules for some BIB-1 fields
+
au u=1 s=pw
ti u=4 s=pw
isbn u=7
DateChanged exp1,1=10
DateExpires exp1,1=11
ElementSetName exp1,1=12
+
+# Define or operator
+@or or
+# Define and operator
+@and and
+# Define and not operator
+@not not andnot
+
+# Whether CCL is case sensitive or not.
+@case 1
+
+# default stop words
+# @stop.* the of a
+
+# stop words for ti
+# @stop.ti art
/*
* CCL - header file
*
- * $Id: ccl.h,v 1.27 2007-04-30 11:33:49 adam Exp $
+ * $Id: ccl.h,v 1.28 2007-04-30 19:55:39 adam Exp $
*
* Old Europagate Log:
*
YAZ_EXPORT
struct ccl_rpn_node *ccl_parser_find_str(CCL_parser cclp, const char *str);
-/** Set names for AND operator in parser */
-YAZ_EXPORT
-void ccl_parser_set_op_and(CCL_parser p, const char *op);
-
-/** Set names for OR operator in parser */
-YAZ_EXPORT
-void ccl_parser_set_op_or(CCL_parser p, const char *op);
-
-/** Set names for ANDNOT operator in parser */
-YAZ_EXPORT
-void ccl_parser_set_op_not(CCL_parser p, const char *op);
-
-/** Set names for ResultSet in parser */
-YAZ_EXPORT
-void ccl_parser_set_op_set(CCL_parser p, const char *op);
-
/** Set case sensitivity for parser */
YAZ_EXPORT
void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag);
/** Add special qualifier */
YAZ_EXPORT
-void ccl_qual_add_special(CCL_bibset bibset, const char *n, const char *v);
+void ccl_qual_add_special(CCL_bibset bibset, const char *n, const char *cp);
/** Add combo qualifier */
YAZ_EXPORT
-void ccl_qual_add_combi(CCL_bibset b, const char *n, const char *names);
+void ccl_qual_add_combi(CCL_bibset b, const char *n, const char **names);
/** Read CCL qualifier list spec from file inf */
YAZ_EXPORT
/** Search for special qualifier */
YAZ_EXPORT
-const char *ccl_qual_search_special(CCL_bibset b, const char *name);
+const char **ccl_qual_search_special(CCL_bibset b, const char *name);
/** Pretty-print CCL RPN node tree to WRBUF */
YAZ_EXPORT
void ccl_pquery(WRBUF w, struct ccl_rpn_node *p);
/* CCL find (to rpn conversion)
* Europagate, 1995
*
- * $Id: cclfind.c,v 1.12 2007-04-30 11:33:49 adam Exp $
+ * $Id: cclfind.c,v 1.13 2007-04-30 19:55:40 adam Exp $
*
* Old Europagate log:
*
int and_list = 0;
int or_list = 0;
char *attset;
- const char *truncation_aliases;
+ const char **truncation_aliases;
+ const char *t_default[2];
truncation_aliases =
ccl_qual_search_special(cclp->bibset, "truncation");
if (!truncation_aliases)
- truncation_aliases = "?";
+ {
+ truncation_aliases = t_default;
+ t_default[0] = "?";
+ t_default[1] = 0;
+ }
if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
and_list = 1;
for (i = 0; i<lookahead->len; i++)
if (lookahead->name[i] == ' ')
no_spaces++;
- else if (strchr(truncation_aliases, lookahead->name[i]))
+ else if (strchr(truncation_aliases[0], lookahead->name[i]))
{
if (no == 0 && i == 0 && lookahead->len >= 1)
left_trunc = 1;
if (len == 0)
break; /* no more terms . stop . */
-
-
- if (p_top)
- {
- if (or_list)
- p = ccl_rpn_node_create(CCL_RPN_OR);
- else if (and_list)
- p = ccl_rpn_node_create(CCL_RPN_AND);
- else
- p = ccl_rpn_node_create(CCL_RPN_AND);
- p->u.p[0] = p_top;
- p_top = p;
- }
/* create the term node, but wait a moment before adding the term */
p = ccl_rpn_node_create(CCL_RPN_TERM);
p->u.t.attr_list = NULL;
p->u.t.term = NULL;
- /* make the top node point to us.. */
- if (p_top)
- p_top->u.p[1] = p;
- else
- p_top = p;
-
-
/* go through all attributes and add them to the attribute list */
for (i=0; qa && qa[i]; i++)
{
for (i = 0; i<no; i++)
{
const char *src_str = cclp->look_token->name;
- int src_len = cclp->look_token->len;
+ size_t src_len = cclp->look_token->len;
if (i == 0 && left_trunc)
{
}
if (i == no-1 && right_trunc)
src_len--;
+ if (!ccl_qual_match_stop(cclp->bibset, qa, src_str, src_len))
+ {
#if 0
- fprintf(stderr, "[%s %.*s]",
- ccl_qual_get_name(qa[0]), src_len, src_str);
+ fprintf(stderr, "[%s %.*s]",
+ ccl_qual_get_name(qa[0]), src_len, src_str);
#endif
- if (i && cclp->look_token->ws_prefix_len)
- {
- size_t len = strlen(p->u.t.term);
- memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
- cclp->look_token->ws_prefix_len);
- p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
+ if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
+ {
+ size_t len = strlen(p->u.t.term);
+ memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
+ cclp->look_token->ws_prefix_len);
+ p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
+ }
+ strxcat(p->u.t.term, src_str, src_len);
}
- strxcat(p->u.t.term, src_str, src_len);
ADVANCE;
}
+
+ if (p->u.t.term[0] == 0)
+ {
+ ccl_rpn_delete(p);
+ continue;
+ }
+
+ /* make the top node point to us.. */
+ if (p_top)
+ {
+ struct ccl_rpn_node *tmp;
+
+ if (or_list)
+ tmp = ccl_rpn_node_create(CCL_RPN_OR);
+ else if (and_list)
+ tmp = ccl_rpn_node_create(CCL_RPN_AND);
+ else
+ tmp = ccl_rpn_node_create(CCL_RPN_AND);
+ tmp->u.p[0] = p_top;
+ tmp->u.p[1] = p;
+
+ p_top = tmp;
+ }
+ else
+ p_top = p;
+
+
if (left_trunc && right_trunc)
{
if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
* qa: Qualifier attributes already applied.
* return: pointer to node(s); NULL on error.
*/
-static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, struct ccl_token *la,
- ccl_qualifier_t *qa)
+static struct ccl_rpn_node *qualifier_list(CCL_parser cclp,
+ struct ccl_token *la,
+ ccl_qualifier_t *qa)
{
struct ccl_token *lookahead = cclp->look_token;
struct ccl_token *look_start = cclp->look_token;
ccl_qualifier_t *ap;
struct ccl_rpn_node *node = 0;
- const char *field_str;
+ const char **field_str;
int no = 0;
int seq = 0;
int i;
field_str = ccl_qual_search_special(cclp->bibset, "field");
if (field_str)
{
- if (!strcmp(field_str, "or"))
+ if (!strcmp(field_str[0], "or"))
mode_merge = 0;
- else if (!strcmp(field_str, "merge"))
+ else if (!strcmp(field_str[0], "merge"))
mode_merge = 1;
}
if (!mode_merge)
* Copyright (C) 1995-2005, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: cclp.h,v 1.3 2007-04-30 11:33:49 adam Exp $
+ * $Id: cclp.h,v 1.4 2007-04-30 19:55:40 adam Exp $
*/
/**
CCL_bibset bibset;
/** names of and operator */
- char *ccl_token_and;
+ const char **ccl_token_and;
/** names of or operator */
- char *ccl_token_or;
+ const char **ccl_token_or;
/** names of not operator */
- char *ccl_token_not;
+ const char **ccl_token_not;
/** names of set operator */
- char *ccl_token_set;
+ const char **ccl_token_set;
/** 1=CCL parser is case sensitive, 0=case insensitive */
int ccl_case_sensitive;
};
YAZ_EXPORT
const char *ccl_qual_get_name(ccl_qualifier_t q);
+YAZ_EXPORT
+int ccl_qual_match_stop(CCL_bibset bibset, ccl_qualifier_t *qa,
+ const char *src_str, size_t src_len);
+
/*
* Local variables:
* c-basic-offset: 4
/* CCL qualifiers
* Europagate, 1995
*
- * $Id: cclqfile.c,v 1.10 2007-04-27 10:09:45 adam Exp $
+ * $Id: cclqfile.c,v 1.11 2007-04-30 19:55:40 adam Exp $
*
* Old Europagate Log:
*
{
/* lead is first of a list of qualifier aliaeses */
/* qualifier alias: q1 q2 ... */
- xfree(lead_str);
+ char *qlist[10];
+ int i = 0;
+
+ qlist[i++] = lead_str;
+
+ while ((t=yaz_tok_move(tp)) == YAZ_TOK_STRING)
+ {
+ if (i < sizeof(qlist)/sizeof(*qlist)-1)
+ qlist[i++] = xstrdup(yaz_tok_parse_string(tp));
+ }
+ qlist[i] = 0;
yaz_tok_parse_destroy(tp);
- ccl_qual_add_combi (bibset, qual_name, cp);
+ ccl_qual_add_combi (bibset, qual_name, (const char **) qlist);
+ for (i = 0; qlist[i]; i++)
+ xfree(qlist[i]);
return 0;
}
while (1) /* comma separated attribute value list */
/* CCL qualifiers
* Europagate, 1995
*
- * $Id: cclqual.c,v 1.8 2007-04-30 11:33:49 adam Exp $
+ * $Id: cclqual.c,v 1.9 2007-04-30 19:55:40 adam Exp $
*
* Old Europagate Log:
*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-
+#include <yaz/snprintf.h>
+#include <yaz/tokenizer.h>
#include "cclp.h"
/** CCL Qualifier */
/** CCL Qualifier special */
struct ccl_qualifier_special {
char *name;
- char *value;
+ const char **values;
struct ccl_qualifier_special *next;
};
return q;
}
-/** \brief specifies special qualifier
- \param bibset Bibset
- \param n name of special (without leading @)
- \param v value of special
-*/
-void ccl_qual_add_special(CCL_bibset bibset, const char *n, const char *v)
+void ccl_qual_add_special_ar(CCL_bibset bibset, const char *n,
+ const char **values)
{
struct ccl_qualifier_special *p;
- const char *pe;
-
for (p = bibset->special; p && strcmp(p->name, n); p = p->next)
;
if (p)
- xfree(p->value);
+ {
+ if (p->values)
+ {
+ int i;
+ for (i = 0; p->values[i]; i++)
+ xfree((char *) p->values[i]);
+ xfree(p->values);
+ }
+ }
else
{
p = (struct ccl_qualifier_special *) xmalloc(sizeof(*p));
p->name = xstrdup(n);
- p->value = 0;
p->next = bibset->special;
bibset->special = p;
}
- while (strchr(" \t", *v))
- ++v;
- for (pe = v + strlen(v); pe != v; --pe)
- if (!strchr(" \n\r\t", pe[-1]))
- break;
- p->value = (char*) xmalloc(pe - v + 1);
- if (pe - v)
- memcpy(p->value, v, pe - v);
- p->value[pe - v] = '\0';
+ p->values = values;
}
-static int next_token(const char **cpp, const char **dst)
+void ccl_qual_add_special(CCL_bibset bibset, const char *n, const char *cp)
{
- int len = 0;
- const char *cp = *cpp;
- while (*cp && strchr(" \r\n\t\f", *cp))
- cp++;
- if (dst)
- *dst = cp;
- len = 0;
- while (*cp && !strchr(" \r\n\t\f", *cp))
+ size_t no = 2;
+ char **vlist = xmalloc(no * sizeof(*vlist));
+ yaz_tok_cfg_t yt = yaz_tok_cfg_create();
+ int t;
+ int i = 0;
+
+ yaz_tok_parse_t tp = yaz_tok_parse_buf(yt, cp);
+
+ yaz_tok_cfg_destroy(yt);
+
+ t = yaz_tok_move(tp);
+ while (t == YAZ_TOK_STRING)
{
- cp++;
- len++;
+ if (i >= no-1)
+ vlist = xrealloc(vlist, (no = no * 2) * sizeof(*vlist));
+ vlist[i++] = xstrdup(yaz_tok_parse_string(tp));
+ t = yaz_tok_move(tp);
}
- *cpp = cp;
- return len;
+ vlist[i] = 0;
+ ccl_qual_add_special_ar(bibset, n, (const char **) vlist);
+
+ yaz_tok_parse_destroy(tp);
}
+
/** \brief adds specifies qualifier aliases
\param b bibset
\param n qualifier name
\param names list of qualifier aliases
*/
-void ccl_qual_add_combi(CCL_bibset b, const char *n, const char *names)
+void ccl_qual_add_combi(CCL_bibset b, const char *n, const char **names)
{
- const char *cp, *cp1;
- int i, len;
+ int i;
struct ccl_qualifier *q;
for (q = b->list; q && strcmp(q->name, n); q = q->next)
;
q->next = b->list;
b->list = q;
- cp = names;
- for (i = 0; next_token(&cp, 0); i++)
+ for (i = 0; names[i]; i++)
;
q->no_sub = i;
- q->sub = (struct ccl_qualifier **) xmalloc(sizeof(*q->sub) *
- (1+q->no_sub));
- cp = names;
- for (i = 0; (len = next_token(&cp, &cp1)); i++)
- {
- q->sub[i] = ccl_qual_lookup(b, cp1, len);
- }
+ q->sub = (struct ccl_qualifier **)
+ xmalloc(sizeof(*q->sub) * (1+q->no_sub));
+ for (i = 0; names[i]; i++)
+ q->sub[i] = ccl_qual_lookup(b, names[i], strlen(names[i]));
}
/** \brief adds specifies attributes for qualifier
{
sp1 = sp->next;
xfree(sp->name);
- xfree(sp->value);
+ if (sp->values)
+ {
+ int i;
+ for (i = 0; sp->values[i]; i++)
+ xfree((char*) sp->values[i]);
+ xfree(sp->values);
+ }
xfree(sp);
}
xfree(*b);
size_t name_len, int seq)
{
struct ccl_qualifier *q = 0;
- const char *aliases;
+ const char **aliases;
int case_sensitive = cclp->ccl_case_sensitive;
ccl_assert(cclp);
aliases = ccl_qual_search_special(cclp->bibset, "case");
if (aliases)
- case_sensitive = atoi(aliases);
+ case_sensitive = atoi(aliases[0]);
for (q = cclp->bibset->list; q; q = q->next)
if (strlen(q->name) == name_len)
return q->name;
}
-const char *ccl_qual_search_special(CCL_bibset b, const char *name)
+const char **ccl_qual_search_special(CCL_bibset b, const char *name)
{
struct ccl_qualifier_special *q;
if (!b)
for (q = b->special; q && strcmp(q->name, name); q = q->next)
;
if (q)
- return q->value;
+ return q->values;
return 0;
}
+
+int ccl_qual_match_stop(CCL_bibset bibset, ccl_qualifier_t *qa,
+ const char *src_str, size_t src_len)
+{
+ if (qa[0])
+ {
+ char qname[80];
+ const char **slist;
+ yaz_snprintf(qname, sizeof(qname)-1, "stop.%s",
+ ccl_qual_get_name(qa[0]));
+ slist = ccl_qual_search_special(bibset, qname);
+ if (!slist)
+ slist = ccl_qual_search_special(bibset, "stop.*");
+ if (slist)
+ {
+ int i;
+ for (i = 0; slist[i]; i++)
+ if (src_len == strlen(slist[i])
+ && ccl_memicmp(slist[i], src_str, src_len) == 0)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
/*
* Local variables:
* c-basic-offset: 4
/* CCL - lexical analysis
* Europagate, 1995
*
- * $Id: ccltoken.c,v 1.11 2007-04-26 09:11:56 adam Exp $
+ * $Id: ccltoken.c,v 1.12 2007-04-30 19:55:40 adam Exp $
*
* Old Europagate Log:
*
* return: 1 if token string matches one of the keywords in list;
* 0 otherwise.
*/
-static int token_cmp(CCL_parser cclp, const char *kw, struct ccl_token *token)
+static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
{
- const char *cp1 = kw;
- const char *cp2;
- const char *aliases;
+ const char **aliases;
int case_sensitive = cclp->ccl_case_sensitive;
+ int i;
aliases = ccl_qual_search_special(cclp->bibset, "case");
if (aliases)
- case_sensitive = atoi(aliases);
- if (!kw)
- return 0;
- while ((cp2 = strchr(cp1, ' ')))
+ case_sensitive = atoi(aliases[0]);
+
+ for (i = 0; kw[i]; i++)
{
- if (token->len == (size_t) (cp2-cp1))
+ if (token->len == strlen(kw[i]))
{
if (case_sensitive)
{
- if (!memcmp(cp1, token->name, token->len))
+ if (!memcmp(kw[i], token->name, token->len))
return 1;
}
else
{
- if (!ccl_memicmp(cp1, token->name, token->len))
+ if (!ccl_memicmp(kw[i], token->name, token->len))
return 1;
}
}
- cp1 = cp2+1;
}
- if (case_sensitive)
- return token->len == strlen(cp1)
- && !memcmp(cp1, token->name, token->len);
- return token->len == strlen(cp1) &&
- !ccl_memicmp(cp1, token->name, token->len);
+ return 0;
}
/*
*/
struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
{
- const char *aliases;
+ const char **aliases;
const unsigned char *cp = (const unsigned char *) command;
struct ccl_token *first = NULL;
struct ccl_token *last = NULL;
}
}
+static const char **create_ar(const char *v1, const char *v2)
+{
+ const char **a = xmalloc(3 * sizeof(*a));
+ a[0] = xstrdup(v1);
+ if (v2)
+ {
+ a[1] = xstrdup(v2);
+ a[2] = 0;
+ }
+ else
+ a[1] = 0;
+ return a;
+}
+
+static void destroy_ar(const char **a)
+{
+ if (a)
+ {
+ int i;
+ for (i = 0; a[i]; i++)
+ xfree((char *) a[i]);
+ xfree(a);
+ }
+}
+
CCL_parser ccl_parser_create(CCL_bibset bibset)
{
CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
p->error_pos = NULL;
p->bibset = bibset;
- p->ccl_token_and = xstrdup("and");
- p->ccl_token_or = xstrdup("or");
- p->ccl_token_not = xstrdup("not andnot");
- p->ccl_token_set = xstrdup("set");
+ p->ccl_token_and = create_ar("and", 0);
+ p->ccl_token_or = create_ar("or", 0);
+ p->ccl_token_not = create_ar("not", "andnot");
+ p->ccl_token_set = create_ar("set", 0);
p->ccl_case_sensitive = 1;
return p;
{
if (!p)
return;
- xfree(p->ccl_token_and);
- xfree(p->ccl_token_or);
- xfree(p->ccl_token_not);
- xfree(p->ccl_token_set);
+ destroy_ar(p->ccl_token_and);
+ destroy_ar(p->ccl_token_or);
+ destroy_ar(p->ccl_token_not);
+ destroy_ar(p->ccl_token_set);
xfree(p);
}
-void ccl_parser_set_op_and(CCL_parser p, const char *op)
-{
- if (p && op)
- {
- xfree(p->ccl_token_and);
- p->ccl_token_and = xstrdup(op);
- }
-}
-
-void ccl_parser_set_op_or(CCL_parser p, const char *op)
-{
- if (p && op)
- {
- xfree(p->ccl_token_or);
- p->ccl_token_or = xstrdup(op);
- }
-}
-void ccl_parser_set_op_not(CCL_parser p, const char *op)
-{
- if (p && op)
- {
- xfree(p->ccl_token_not);
- p->ccl_token_not = xstrdup(op);
- }
-}
-void ccl_parser_set_op_set(CCL_parser p, const char *op)
-{
- if (p && op)
- {
- xfree(p->ccl_token_set);
- p->ccl_token_set = xstrdup(op);
- }
-}
-
void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
{
if (p)