1 /* $Id: index_types.c,v 1.3 2007-10-29 08:20:16 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 \brief Implementation of Zebra's index types system
33 #include "index_types.h"
35 #include <yaz/icu_I18N.h>
37 #include <yaz/match_glob.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/wrbuf.h>
42 struct zebra_index_types_s {
44 zebra_index_type_t rules;
50 struct zebra_index_type_s {
55 const char *alwaysmatches;
56 const char *firstinfield;
62 struct icu_chain *chain;
64 zebra_index_type_t next;
69 static void index_type_destroy(zebra_index_type_t t);
71 zebra_index_type_t parse_index_type(const xmlNode *ptr)
73 struct _xmlAttr *attr;
74 struct zebra_index_type_s *rule;
76 rule = xmalloc(sizeof(*rule));
85 rule->alwaysmatches = 0;
86 rule->firstinfield = 0;
89 rule->staticrank_flag = 0;
90 rule->simple_chain = 0;
91 rule->simple_buf = wrbuf_alloc();
92 for (attr = ptr->properties; attr; attr = attr->next)
94 if (attr->children && attr->children->type == XML_TEXT_NODE)
96 if (!strcmp((const char *) attr->name, "id"))
97 rule->id = (const char *) attr->children->content;
98 else if (!strcmp((const char *) attr->name, "locale"))
99 rule->locale = (const char *) attr->children->content;
100 else if (!strcmp((const char *) attr->name, "position"))
101 rule->position = (const char *) attr->children->content;
102 else if (!strcmp((const char *) attr->name, "alwaysmatches"))
103 rule->alwaysmatches = (const char *) attr->children->content;
104 else if (!strcmp((const char *) attr->name, "firstinfield"))
105 rule->firstinfield = (const char *) attr->children->content;
106 else if (!strcmp((const char *) attr->name, "index"))
108 const char *v = (const char *) attr->children->content;
110 rule->index_flag = *v == '1';
112 else if (!strcmp((const char *) attr->name, "sort"))
114 const char *v = (const char *) attr->children->content;
116 rule->sort_flag = *v == '1';
118 else if (!strcmp((const char *) attr->name, "staticrank"))
120 const char *v = (const char *) attr->children->content;
122 rule->staticrank_flag = *v == '1';
126 yaz_log(YLOG_WARN, "Unsupport attribute '%s' for indextype",
128 index_type_destroy(rule);
134 while (ptr && ptr->type != XML_ELEMENT_NODE)
138 yaz_log(YLOG_WARN, "Missing rules for indexrule");
139 index_type_destroy(rule);
142 else if (!strcmp((const char *) ptr->name, "icu_chain"))
146 rule->chain = icu_chain_xml_config(ptr,
152 index_type_destroy(rule);
156 yaz_log(YLOG_WARN, "ICU unsupported (must be part of YAZ)");
161 else if (!strcmp((const char *) ptr->name, "simple"))
163 rule->simple_chain = 1;
167 yaz_log(YLOG_WARN, "Unsupported mapping %s for indexrule", ptr->name);
168 index_type_destroy(rule);
176 zebra_index_types_t zebra_index_types_create(const char *fname)
178 xmlDocPtr doc = xmlParseFile(fname);
181 return zebra_index_types_create_doc(doc);
184 zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc)
187 zebra_index_types_t r = xmalloc(sizeof(*r));
188 zebra_index_type_t *rp = &r->rules;
189 const xmlNode *top = xmlDocGetRootElement(doc);
193 if (top && top->type == XML_ELEMENT_NODE
194 && !strcmp((const char *) top->name, "indextypes"))
196 const xmlNode *ptr = top->children;
197 for (; ptr; ptr = ptr->next)
199 if (ptr->type == XML_ELEMENT_NODE
200 && !strcmp((const char *) ptr->name, "indextype"))
202 *rp = parse_index_type(ptr);
205 zebra_index_types_destroy(r);
214 zebra_index_types_destroy(r);
219 yaz_log(YLOG_WARN, "XML unsupported. Cannot read index rules");
225 static void index_type_destroy(zebra_index_type_t t)
231 icu_chain_destroy(t->chain);
233 wrbuf_destroy(t->simple_buf);
238 void zebra_index_types_destroy(zebra_index_types_t r)
243 zebra_index_type_t rule;
247 r->rules = rule->next;
248 index_type_destroy(rule);
257 zebra_index_type_t zebra_index_type_get(zebra_index_types_t types,
261 zebra_index_type_t rule = types->rules;
263 while (rule && !yaz_match_glob(rule->id, id))
270 const char *zebra_index_type_lookup_str(zebra_index_types_t types,
273 zebra_index_type_t t = zebra_index_type_get(types, id);
279 int zebra_index_type_is_index(zebra_index_type_t type)
281 return type->index_flag;
284 int zebra_index_type_is_sort(zebra_index_type_t type)
286 return type->sort_flag;
289 int zebra_index_type_is_staticrank(zebra_index_type_t type)
291 return type->staticrank_flag;
294 #define SE_CHARS ";,.()-/?<> \r\n\t"
296 int tokenize_simple(zebra_index_type_t type,
297 const char **result_buf, size_t *result_len)
299 char *buf = wrbuf_buf(type->simple_buf);
300 size_t len = wrbuf_len(type->simple_buf);
301 size_t i = type->simple_off;
304 while (i < len && strchr(SE_CHARS, buf[i]))
307 while (i < len && !strchr(SE_CHARS, buf[i]))
309 if (buf[i] > 32 && buf[i] < 127)
310 buf[i] = tolower(buf[i]);
314 type->simple_off = i;
317 *result_buf = buf + start;
318 *result_len = i - start;
324 int zebra_index_type_tokenize(zebra_index_type_t type,
325 const char *buf, size_t len,
326 const char **result_buf, size_t *result_len)
328 if (type->simple_chain)
332 wrbuf_rewind(type->simple_buf);
333 wrbuf_write(type->simple_buf, buf, len);
334 type->simple_off = 0;
336 return tokenize_simple(type, result_buf, result_len);
344 * indent-tabs-mode: nil
346 * vim: shiftwidth=4 tabstop=8 expandtab