-## $Id: Makefile.am,v 1.44 2007-04-18 07:34:35 adam Exp $
+## $Id: Makefile.am,v 1.45 2007-04-26 21:45:16 adam Exp $
pkginclude_HEADERS= backend.h ccl.h ccl_xml.h cql.h comstack.h \
diagbib1.h diagsrw.h diagsru_update.h sortspec.h log.h logrpn.h marcdisp.h \
tcpip.h test.h timing.h unix.h tpath.h wrbuf.h xmalloc.h \
yaz-ccl.h yaz-iconv.h yaz-util.h yaz-version.h yconfig.h proto.h \
xmlquery.h libxml2_error.h xmltypes.h snprintf.h query-charset.h \
- mutex.h oid_db.h oid_util.h oid_std.h \
+ mutex.h oid_db.h oid_util.h oid_std.h tokenizer.h \
\
ill.h ill-core.h item-req.h oclc-ill-req-ext.h z-accdes1.h z-accform1.h \
z-acckrb1.h z-core.h z-date.h z-diag1.h z-espec1.h z-estask.h z-exp.h \
/*
* CCL - header file
*
- * $Id: ccl.h,v 1.25 2007-04-26 09:11:56 adam Exp $
+ * $Id: ccl.h,v 1.26 2007-04-26 21:45:16 adam Exp $
*
* Old Europagate Log:
*
YAZ_EXPORT
int ccl_parser_get_error(CCL_parser cclp, int *pos);
+YAZ_EXPORT
+struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind);
+
+YAZ_EXPORT
+void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
+ int type, int value);
+
+YAZ_EXPORT
+void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
+ int type, char *value);
+
+
#ifndef ccl_assert
#define ccl_assert(x) ;
#endif
--- /dev/null
+/*
+ * Copyright (c) 1995-2007, Index Data
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Index Data nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* $Id: tokenizer.h,v 1.1 2007-04-26 21:45:16 adam Exp $ */
+
+/** \file tokenizer.h
+ \brief Header with public definitions about YAZ' tokenizer
+*/
+
+#ifndef YAZ_TOKENIZER
+#define YAZ_TOKENIZER
+#include <yaz/nmem.h>
+
+YAZ_BEGIN_CDECL
+
+#define YAZ_TOKENIZER_EOF 0
+#define YAZ_TOKENIZER_ERROR (-1)
+#define YAZ_TOKENIZER_STRING (-2)
+#define YAZ_TOKENIZER_QSTRING (-3)
+
+typedef struct yaz_tokenizer *yaz_tokenizer_t;
+
+YAZ_EXPORT
+yaz_tokenizer_t yaz_tokenizer_create(void);
+
+YAZ_EXPORT
+void yaz_tokenizer_destroy(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf);
+
+YAZ_EXPORT
+int yaz_tokenizer_move(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+const char *yaz_tokenizer_string(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple);
+
+YAZ_END_CDECL
+
+#endif
+/* CQL_H_INCLUDED */
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
## This file is part of the YAZ toolkit.
## Copyright (C) 1995-2007, Index Data, All rights reserved.
-## $Id: Makefile.am,v 1.64 2007-04-25 20:52:19 adam Exp $
+## $Id: Makefile.am,v 1.65 2007-04-26 21:45:17 adam Exp $
YAZ_VERSION_INFO=3:0:0
tcpdchk.c \
test.c timing.c \
xmlquery.c http.c \
- mime.c mime.h oid_util.c \
+ mime.c mime.h oid_util.c tokenizer.c \
record_conv.c retrieval.c elementset.c snprintf.c query-charset.c
libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
/* CCL find (to rpn conversion)
* Europagate, 1995
*
- * $Id: cclfind.c,v 1.10 2007-04-26 09:11:56 adam Exp $
+ * $Id: cclfind.c,v 1.11 2007-04-26 21:45:17 adam Exp $
*
* Old Europagate log:
*
* kind: Type of node.
* return: pointer to allocated node.
*/
-static struct ccl_rpn_node *mk_node (int kind)
+struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind)
{
struct ccl_rpn_node *p;
p = (struct ccl_rpn_node *)xmalloc (sizeof(*p));
ccl_assert (p);
p->kind = kind;
+
+ switch(kind)
+ {
+ case CCL_RPN_TERM:
+ p->u.t.attr_list = 0;
+ p->u.t.term = 0;
+ break;
+ default:
+ break;
+ }
return p;
}
n->next = p->u.t.attr_list;
p->u.t.attr_list = n;
- n->kind = CCL_RPN_ATTR_NUMERIC;
- n->value.numeric = 0;
return n;
}
* value: Value of attribute
* set: Attribute set name
*/
-static void add_attr_numeric (struct ccl_rpn_node *p, const char *set,
- int type, int value)
+void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
+ int type, int value)
{
struct ccl_rpn_attr *n;
n->value.numeric = value;
}
-static void add_attr_string (struct ccl_rpn_node *p, const char *set,
- int type, char *value)
+void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
+ int type, char *value)
{
struct ccl_rpn_attr *n;
if (p_top)
{
if (or_list)
- p = mk_node (CCL_RPN_OR);
+ p = ccl_rpn_node_create(CCL_RPN_OR);
else if (and_list)
- p = mk_node (CCL_RPN_AND);
+ p = ccl_rpn_node_create(CCL_RPN_AND);
else
- p = mk_node (CCL_RPN_AND);
+ p = ccl_rpn_node_create(CCL_RPN_AND);
p->u.p[0] = p_top;
p_top = p;
}
/* create the term node, but wait a moment before adding the term */
- p = mk_node (CCL_RPN_TERM);
+ p = ccl_rpn_node_create(CCL_RPN_TERM);
p->u.t.attr_list = NULL;
p->u.t.term = NULL;
switch(attr->kind)
{
case CCL_RPN_ATTR_STRING:
- add_attr_string(p, attr->set, attr->type,
- attr->value.str);
+ ccl_add_attr_string(p, attr->set, attr->type,
+ attr->value.str);
break;
case CCL_RPN_ATTR_NUMERIC:
if (attr->value.numeric > 0)
completeness_value = attr->value.numeric;
break;
}
- add_attr_numeric(p, attr->set, attr->type,
- attr->value.numeric);
+ ccl_add_attr_numeric(p, attr->set, attr->type,
+ attr->value.numeric);
}
}
}
{ /* no structure attribute met. Apply either structure attribute
WORD or PHRASE depending on number of CCL tokens */
if (no == 1 && no_spaces == 0)
- add_attr_numeric (p, attset, CCL_BIB1_STR, 2);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
else
- add_attr_numeric (p, attset, CCL_BIB1_STR, 1);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
}
/* make the RPN token */
ccl_rpn_delete (p);
return NULL;
}
- add_attr_numeric (p, attset, CCL_BIB1_TRU, 3);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
}
else if (right_trunc)
{
ccl_rpn_delete (p);
return NULL;
}
- add_attr_numeric (p, attset, CCL_BIB1_TRU, 1);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
}
else if (left_trunc)
{
ccl_rpn_delete (p);
return NULL;
}
- add_attr_numeric (p, attset, CCL_BIB1_TRU, 2);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
}
else
{
if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
&attset))
- add_attr_numeric (p, attset, CCL_BIB1_TRU, 100);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
}
if (!multi)
break;
ccl_rpn_delete (p1);
return NULL;
}
- p = mk_node (CCL_RPN_AND);
+ p = ccl_rpn_node_create(CCL_RPN_AND);
p->u.p[0] = p1;
- add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
+ ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
p->u.p[1] = p2;
- add_attr_numeric (p2, attset, CCL_BIB1_REL, 2);
+ ccl_add_attr_numeric(p2, attset, CCL_BIB1_REL, 2);
return p;
}
else /* = term - */
{
- add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
+ ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
return p1;
}
}
ADVANCE;
if (!(p = search_term (cclp, ap)))
return NULL;
- add_attr_numeric (p, attset, CCL_BIB1_REL, 2);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2);
return p;
}
else if (KIND == CCL_TOK_LP)
{
if (!(p = search_terms (cclp, ap)))
return NULL;
- add_attr_numeric (p, attset, CCL_BIB1_REL, rel);
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
return p;
}
cclp->error_code = CCL_ERR_TERM_EXPECTED;
}
if (node)
{
- struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ struct ccl_rpn_node *node_this =
+ ccl_rpn_node_create(CCL_RPN_OR);
node_this->u.p[0] = node;
node_this->u.p[1] = node_sub;
node = node_this;
}
if (node)
{
- struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ struct ccl_rpn_node *node_this =
+ ccl_rpn_node_create(CCL_RPN_OR);
node_this->u.p[0] = node;
node_this->u.p[1] = node_sub;
node = node_this;
struct ccl_rpn_node *p_prox = 0;
/* ! word order specified */
/* % word order not specified */
- p_prox = mk_node(CCL_RPN_TERM);
+ p_prox = ccl_rpn_node_create(CCL_RPN_TERM);
p_prox->u.t.term = (char *) xmalloc(1 + cclp->look_token->len);
memcpy(p_prox->u.t.term, cclp->look_token->name,
cclp->look_token->len);
ccl_rpn_delete (p1);
return NULL;
}
- pn = mk_node (CCL_RPN_PROX);
+ pn = ccl_rpn_node_create(CCL_RPN_PROX);
pn->u.p[0] = p1;
pn->u.p[1] = p2;
pn->u.p[2] = p_prox;
ccl_rpn_delete (p1);
return NULL;
}
- pn = mk_node (CCL_RPN_PROX);
+ pn = ccl_rpn_node_create(CCL_RPN_PROX);
pn->u.p[0] = p1;
pn->u.p[1] = p2;
pn->u.p[2] = 0;
cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
return NULL;
}
- p1 = mk_node (CCL_RPN_SET);
+ p1 = ccl_rpn_node_create(CCL_RPN_SET);
p1->u.setname = copy_token_name (cclp->look_token);
ADVANCE;
return p1;
}
if (node)
{
- struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ struct ccl_rpn_node *node_this =
+ ccl_rpn_node_create(CCL_RPN_OR);
node_this->u.p[0] = node;
node_this->u.p[1] = node_sub;
node_this->u.p[2] = 0;
ccl_rpn_delete (p1);
return NULL;
}
- pn = mk_node (CCL_RPN_AND);
+ pn = ccl_rpn_node_create(CCL_RPN_AND);
pn->u.p[0] = p1;
pn->u.p[1] = p2;
pn->u.p[2] = 0;
ccl_rpn_delete (p1);
return NULL;
}
- pn = mk_node (CCL_RPN_OR);
+ pn = ccl_rpn_node_create(CCL_RPN_OR);
pn->u.p[0] = p1;
pn->u.p[1] = p2;
pn->u.p[2] = 0;
ccl_rpn_delete (p1);
return NULL;
}
- pn = mk_node (CCL_RPN_NOT);
+ pn = ccl_rpn_node_create(CCL_RPN_NOT);
pn->u.p[0] = p1;
pn->u.p[1] = p2;
pn->u.p[2] = 0;
/* CCL qualifiers
* Europagate, 1995
*
- * $Id: cclqfile.c,v 1.8 2007-04-25 20:52:19 adam Exp $
+ * $Id: cclqfile.c,v 1.9 2007-04-26 21:45:17 adam Exp $
*
* Old Europagate Log:
*
#include <stdlib.h>
#include <string.h>
+#include <yaz/tokenizer.h>
#include <yaz/ccl.h>
+#include <yaz/log.h>
#define MAX_QUAL 128
-void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name)
+int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name,
+ const char **addinfo)
{
- char qual_spec[128];
+ yaz_tokenizer_t yt = yaz_tokenizer_create();
+
int type_ar[MAX_QUAL];
int value_ar[MAX_QUAL];
char *svalue_ar[MAX_QUAL];
char *attsets[MAX_QUAL];
int pair_no = 0;
+ char *type_str = 0;
+ int t;
- while (pair_no < MAX_QUAL)
+ yaz_tokenizer_single_tokens(yt, ",=");
+ yaz_tokenizer_read_buf(yt, cp);
+ *addinfo = 0;
+
+ t = yaz_tokenizer_move(yt);
+ while (t == YAZ_TOKENIZER_STRING)
{
- char *qual_value, *qual_type;
- char *split, *setp;
- int no_scan = 0;
-
- if (sscanf (cp, "%100s%n", qual_spec, &no_scan) < 1)
- break;
+ /* we don't know what lead is yet */
+ char *lead_str = xstrdup(yaz_tokenizer_string(yt));
+ const char *value_str = 0;
+ int type = 0, value = 0; /* indicates attribute value UNSET */
- if (!(split = strchr (qual_spec, '=')))
+ t = yaz_tokenizer_move(yt);
+ if (t == ',')
{
- /* alias specification .. */
- if (pair_no == 0)
+ /* full attribute spec: set, type = value */
+ /* lead is attribute set */
+ attsets[pair_no] = lead_str;
+ t = yaz_tokenizer_move(yt);
+ if (t != YAZ_TOKENIZER_STRING)
{
- ccl_qual_add_combi (bibset, qual_name, cp);
- return;
+ *addinfo = "token expected";
+ goto out;
+ }
+ xfree(type_str);
+ type_str = xstrdup(yaz_tokenizer_string(yt));
+ if (yaz_tokenizer_move(yt) != '=')
+ {
+ *addinfo = "= expected";
+ goto out;
}
- break;
}
- /* [set,]type=value ... */
- cp += no_scan;
-
- *split++ = '\0';
-
- setp = strchr (qual_spec, ',');
- if (setp)
+ else if (t == '=')
{
- /* set,type=value ... */
- *setp++ = '\0';
- qual_type = setp;
+ /* lead is attribute type */
+ /* attribute set omitted: type = value */
+ attsets[pair_no] = 0;
+ xfree(type_str);
+ type_str = lead_str;
}
else
{
- /* type=value ... */
- qual_type = qual_spec;
+ /* lead is first of a list of qualifier aliaeses */
+ /* qualifier alias: q1 q2 ... */
+ xfree(lead_str);
+ yaz_tokenizer_destroy(yt);
+ ccl_qual_add_combi (bibset, qual_name, cp);
+ return 0;
}
- while (pair_no < MAX_QUAL)
+ while (1) /* comma separated attribute value list */
{
- int type, value;
-
- qual_value = split;
- if ((split = strchr (qual_value, ',')))
- *split++ = '\0';
-
- value = 0;
- switch (qual_type[0])
+ t = yaz_tokenizer_move(yt);
+ /* must have a value now */
+ if (t != YAZ_TOKENIZER_STRING)
{
- case 'u':
- case 'U':
- type = CCL_BIB1_USE;
- break;
- case 'r':
- case 'R':
- type = CCL_BIB1_REL;
- if (!ccl_stricmp (qual_value, "o"))
- value = CCL_BIB1_REL_ORDER;
- else if (!ccl_stricmp (qual_value, "r"))
- value = CCL_BIB1_REL_PORDER;
- break;
- case 'p':
- case 'P':
- type = CCL_BIB1_POS;
- break;
- case 's':
- case 'S':
- type = CCL_BIB1_STR;
- if (!ccl_stricmp (qual_value, "pw"))
- value = CCL_BIB1_STR_WP;
- if (!ccl_stricmp (qual_value, "al"))
- value = CCL_BIB1_STR_AND_LIST;
- if (!ccl_stricmp (qual_value, "ol"))
- value = CCL_BIB1_STR_OR_LIST;
- break;
- case 't':
- case 'T':
- type = CCL_BIB1_TRU;
- if (!ccl_stricmp (qual_value, "l"))
- value = CCL_BIB1_TRU_CAN_LEFT;
- else if (!ccl_stricmp (qual_value, "r"))
- value = CCL_BIB1_TRU_CAN_RIGHT;
- else if (!ccl_stricmp (qual_value, "b"))
- value = CCL_BIB1_TRU_CAN_BOTH;
- else if (!ccl_stricmp (qual_value, "n"))
- value = CCL_BIB1_TRU_CAN_NONE;
- break;
- case 'c':
- case 'C':
- type = CCL_BIB1_COM;
- break;
- default:
- type = atoi (qual_type);
+ *addinfo = "value token expected";
+ goto out;
+ }
+ value_str = yaz_tokenizer_string(yt);
+
+ if (sscanf(type_str, "%d", &type) == 1)
+ ;
+ else if (strlen(type_str) != 1)
+ {
+ *addinfo = "bad attribute type";
+ goto out;
+ }
+ else
+ {
+ switch (*type_str)
+ {
+ case 'u':
+ case 'U':
+ type = CCL_BIB1_USE;
+ break;
+ case 'r':
+ case 'R':
+ type = CCL_BIB1_REL;
+ if (!ccl_stricmp (value_str, "o"))
+ value = CCL_BIB1_REL_ORDER;
+ else if (!ccl_stricmp (value_str, "r"))
+ value = CCL_BIB1_REL_PORDER;
+ break;
+ case 'p':
+ case 'P':
+ type = CCL_BIB1_POS;
+ break;
+ case 's':
+ case 'S':
+ type = CCL_BIB1_STR;
+ if (!ccl_stricmp (value_str, "pw"))
+ value = CCL_BIB1_STR_WP;
+ if (!ccl_stricmp (value_str, "al"))
+ value = CCL_BIB1_STR_AND_LIST;
+ if (!ccl_stricmp (value_str, "ol"))
+ value = CCL_BIB1_STR_OR_LIST;
+ break;
+ case 't':
+ case 'T':
+ type = CCL_BIB1_TRU;
+ if (!ccl_stricmp (value_str, "l"))
+ value = CCL_BIB1_TRU_CAN_LEFT;
+ else if (!ccl_stricmp (value_str, "r"))
+ value = CCL_BIB1_TRU_CAN_RIGHT;
+ else if (!ccl_stricmp (value_str, "b"))
+ value = CCL_BIB1_TRU_CAN_BOTH;
+ else if (!ccl_stricmp (value_str, "n"))
+ value = CCL_BIB1_TRU_CAN_NONE;
+ break;
+ case 'c':
+ case 'C':
+ type = CCL_BIB1_COM;
+ break;
+ }
+ }
+ if (type == 0)
+ {
+ /* type was not set in switch above */
+ *addinfo = "bad attribute type";
+ goto out;
}
-
type_ar[pair_no] = type;
-
if (value)
{
value_ar[pair_no] = value;
svalue_ar[pair_no] = 0;
}
- else if (*qual_value >= '0' && *qual_value <= '9')
+ else if (*value_str >= '0' && *value_str <= '9')
{
- value_ar[pair_no] = atoi (qual_value);
+ value_ar[pair_no] = atoi (value_str);
svalue_ar[pair_no] = 0;
}
else
{
- size_t len;
- if (split)
- len = split - qual_value;
- else
- len = strlen(qual_value);
- svalue_ar[pair_no] = (char *) xmalloc(len+1);
- memcpy(svalue_ar[pair_no], qual_value, len);
- svalue_ar[pair_no][len] = '\0';
+ value_ar[pair_no] = 0;
+ svalue_ar[pair_no] = xstrdup(value_str);
}
- if (setp)
+ pair_no++;
+ if (pair_no == MAX_QUAL)
{
- attsets[pair_no] = xstrdup (qual_spec);
+ *addinfo = "too many attribute values";
+ goto out;
}
- else
- attsets[pair_no] = 0;
- pair_no++;
- if (!split)
+ t = yaz_tokenizer_move(yt);
+ if (t != ',')
break;
+ attsets[pair_no] = attsets[pair_no-1];
}
}
- ccl_qual_add_set (bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar,
- attsets);
+ out:
+ xfree(type_str);
+ type_str = 0;
+
+ yaz_tokenizer_destroy(yt);
+
+ if (*addinfo)
+ {
+ int i;
+ for (i = 0; i<pair_no; i++)
+ {
+ xfree(attsets[i]);
+ xfree(svalue_ar[i]);
+ }
+ return -1;
+ }
+ ccl_qual_add_set(bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar,
+ attsets);
+ return 0;
+}
+
+void ccl_qual_field(CCL_bibset bibset, const char *cp, const char *qual_name)
+{
+ const char *addinfo;
+ ccl_qual_field2(bibset, cp, qual_name, &addinfo);
+ if (addinfo)
+ yaz_log(YLOG_WARN, "ccl_qual_field2 fail: %s", addinfo);
}
void ccl_qual_fitem (CCL_bibset bibset, const char *cp, const char *qual_name)
--- /dev/null
+/*
+ * Copyright (C) 1995-2007, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: tokenizer.c,v 1.1 2007-04-26 21:45:17 adam Exp $
+ */
+
+/**
+ * \file tokenizer.c
+ * \brief Implements attribute match of CCL RPN nodes
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <yaz/log.h>
+#include <yaz/wrbuf.h>
+#include <yaz/tokenizer.h>
+
+struct yaz_tokenizer {
+ int (*get_byte_func)(const void **vp);
+ const void *get_byte_data;
+
+ int unget_byte;
+ char *white_space;
+ char *single_tokens;
+ char *quote_tokens_begin;
+ char *quote_tokens_end;
+ WRBUF wr_string;
+ int look;
+};
+
+void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple)
+{
+ xfree(t->single_tokens);
+ t->single_tokens = xstrdup(simple);
+}
+
+yaz_tokenizer_t yaz_tokenizer_create(void)
+{
+ yaz_tokenizer_t t = xmalloc(sizeof(*t));
+ t->white_space = xstrdup(" \t\r\n");
+ t->single_tokens = xstrdup("");
+ t->quote_tokens_begin = xstrdup("\"");
+ t->quote_tokens_end = xstrdup("\"");
+ t->get_byte_func = 0;
+ t->get_byte_data = 0;
+ t->wr_string = wrbuf_alloc();
+ t->look = YAZ_TOKENIZER_ERROR;
+ t->unget_byte = 0;
+ return t;
+}
+
+void yaz_tokenizer_destroy(yaz_tokenizer_t t)
+{
+ xfree(t->white_space);
+ xfree(t->single_tokens);
+ xfree(t->quote_tokens_begin);
+ xfree(t->quote_tokens_end);
+ wrbuf_destroy(t->wr_string);
+ xfree(t);
+}
+
+static int read_buf(const void **vp)
+{
+ const char *cp = *(const char **) vp;
+ int ch = *cp;
+ if (ch)
+ {
+ cp++;
+ *(const char **)vp = cp;
+ }
+ return ch;
+}
+
+static int get_byte(yaz_tokenizer_t t)
+{
+ int ch = t->unget_byte;
+ assert(t->get_byte_func);
+ if (ch)
+ t->unget_byte = 0;
+ else
+ ch = t->get_byte_func(&t->get_byte_data);
+ return ch;
+}
+
+static void unget_byte(yaz_tokenizer_t t, int ch)
+{
+ t->unget_byte = ch;
+}
+
+void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf)
+{
+ assert(t);
+ t->get_byte_func = read_buf;
+ t->get_byte_data = buf;
+}
+
+int yaz_tokenizer_move(yaz_tokenizer_t t)
+{
+ const char *cp;
+ int ch = get_byte(t);
+
+ /* skip white space */
+ while (ch && strchr(t->white_space, ch))
+ ch = get_byte(t);
+ if (!ch)
+ {
+ ch = YAZ_TOKENIZER_EOF;
+ }
+ else if ((cp = strchr(t->single_tokens, ch)))
+ ch = *cp; /* single token match */
+ else if ((cp = strchr(t->quote_tokens_begin, ch)))
+ { /* quoted string */
+ int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
+ ch = get_byte(t);
+ wrbuf_rewind(t->wr_string);
+ while (ch && ch != end_ch)
+ wrbuf_putc(t->wr_string, ch);
+ if (!ch)
+ ch = YAZ_TOKENIZER_ERROR;
+ else
+ ch = YAZ_TOKENIZER_QSTRING;
+ }
+ else
+ { /* unquoted string */
+ wrbuf_rewind(t->wr_string);
+ while (ch && !strchr(t->white_space, ch)
+ && !strchr(t->single_tokens, ch))
+ {
+ wrbuf_putc(t->wr_string, ch);
+ ch = get_byte(t);
+ }
+ unget_byte(t, ch);
+ ch = YAZ_TOKENIZER_STRING;
+ }
+ t->look = ch;
+ yaz_log(YLOG_LOG, "tokenizer returns %d (%s)", ch,
+ wrbuf_cstr(t->wr_string));
+
+ return ch;
+}
+
+const char *yaz_tokenizer_string(yaz_tokenizer_t t)
+{
+ return wrbuf_cstr(t->wr_string);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+