1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2010 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <yaz/diagbib1.h>
25 #include <yaz/tpath.h>
26 #include <yaz/oid_db.h>
28 #include <libxml/xmlversion.h>
29 #include <libxml/parser.h>
30 #include <libxml/tree.h>
31 #include <libxml/xmlIO.h>
32 #include <libxml/xmlreader.h>
33 #include <libxslt/transform.h>
34 #include <libxslt/xsltutils.h>
37 #include <libexslt/exslt.h>
40 #include <idzebra/util.h>
41 #include <idzebra/recctrl.h>
43 struct filter_schema {
45 const char *identifier;
46 const char *stylesheet;
47 struct filter_schema *next;
48 const char *default_schema;
49 /* char default_schema; */
50 xsltStylesheetPtr stylesheet_xsp;
57 const char *profile_path;
59 const char *split_path;
61 struct filter_schema *schemas;
62 xmlTextReaderPtr reader;
65 #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
67 #define XML_STRCMP(a,b) strcmp((char*)a, b)
68 #define XML_STRLEN(a) strlen((char*)a)
70 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
72 static void set_param_str(const char **params, const char *name,
73 const char *value, ODR odr)
75 char *quoted = odr_malloc(odr, 3 + strlen(value));
76 sprintf(quoted, "'%s'", value);
84 static void set_param_int(const char **params, const char *name,
87 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
90 sprintf(quoted, "'" ZINT_FORMAT "'", value);
96 #define ENABLE_INPUT_CALLBACK 0
98 #if ENABLE_INPUT_CALLBACK
99 static int zebra_xmlInputMatchCallback (char const *filename)
101 yaz_log(YLOG_LOG, "match %s", filename);
105 static void * zebra_xmlInputOpenCallback (char const *filename)
110 static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
115 static int zebra_xmlInputCloseCallback (void * context)
121 static void *filter_init(Res res, RecType recType)
123 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
126 tinfo->full_name = 0;
127 tinfo->profile_path = 0;
128 tinfo->split_level = 0;
129 tinfo->split_path = 0;
130 tinfo->odr = odr_createmem(ODR_ENCODE);
138 #if ENABLE_INPUT_CALLBACK
139 xmlRegisterDefaultInputCallbacks();
140 xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
141 zebra_xmlInputOpenCallback,
142 zebra_xmlInputReadCallback,
143 zebra_xmlInputCloseCallback);
148 static int attr_content(struct _xmlAttr *attr, const char *name,
149 const char **dst_content)
151 if (!XML_STRCMP(attr->name, name) && attr->children
152 && attr->children->type == XML_TEXT_NODE)
154 *dst_content = (const char *)(attr->children->content);
160 static void destroy_schemas(struct filter_info *tinfo)
162 struct filter_schema *schema = tinfo->schemas;
165 struct filter_schema *schema_next = schema->next;
166 if (schema->stylesheet_xsp)
167 xsltFreeStylesheet(schema->stylesheet_xsp);
169 schema = schema_next;
174 xmlFreeDoc(tinfo->doc);
178 static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
180 char tmp_full_name[1024];
182 tinfo->fname = xstrdup(fname);
184 if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
185 NULL, tmp_full_name))
186 tinfo->full_name = xstrdup(tmp_full_name);
188 tinfo->full_name = xstrdup(tinfo->fname);
190 yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name);
192 tinfo->doc = xmlParseFile(tinfo->full_name);
196 yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s",
202 ptr = xmlDocGetRootElement(tinfo->doc);
203 if (!ptr || ptr->type != XML_ELEMENT_NODE
204 || XML_STRCMP(ptr->name, "schemaInfo"))
207 "alvis filter: config file %s :"
208 " expected root element <schemaInfo>",
213 for (ptr = ptr->children; ptr; ptr = ptr->next)
215 if (ptr->type != XML_ELEMENT_NODE)
217 if (!XML_STRCMP(ptr->name, "schema"))
219 struct _xmlAttr *attr;
220 struct filter_schema *schema = xmalloc(sizeof(*schema));
222 schema->identifier = 0;
223 schema->stylesheet = 0;
224 schema->default_schema = 0;
225 schema->next = tinfo->schemas;
226 schema->stylesheet_xsp = 0;
227 tinfo->schemas = schema;
228 for (attr = ptr->properties; attr; attr = attr->next)
230 attr_content(attr, "identifier", &schema->identifier);
231 attr_content(attr, "name", &schema->name);
232 attr_content(attr, "stylesheet", &schema->stylesheet);
233 attr_content(attr, "default", &schema->default_schema);
235 /*yaz_log(YLOG_LOG, "XSLT add %s %s %s",
236 schema->name, schema->identifier, schema->stylesheet); */
238 /* find requested schema */
240 if (schema->stylesheet)
242 char tmp_xslt_full_name[1024];
243 if (!yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path,
244 NULL, tmp_xslt_full_name))
247 "alvis filter: stylesheet %s not found in path %s",
248 schema->stylesheet, tinfo->profile_path);
251 schema->stylesheet_xsp
252 = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
253 if (!schema->stylesheet_xsp)
256 "alvis filter: could not parse xslt stylesheet %s",
262 else if (!XML_STRCMP(ptr->name, "split"))
264 struct _xmlAttr *attr;
265 for (attr = ptr->properties; attr; attr = attr->next)
267 const char *split_level_str = 0;
268 attr_content(attr, "level", &split_level_str);
270 split_level_str ? atoi(split_level_str) : 0;
275 yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
282 static struct filter_schema *lookup_schema(struct filter_info *tinfo,
285 struct filter_schema *schema;
287 for (schema = tinfo->schemas; schema; schema = schema->next)
289 /* find requested schema */
292 if (schema->identifier && !strcmp(schema->identifier, est))
295 if (schema->name && !strcmp(schema->name, est))
298 /* or return default schema if defined */
299 else if (schema->default_schema)
303 /* return first schema if no default schema defined */
305 return tinfo->schemas;
310 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
312 struct filter_info *tinfo = clientData;
315 yaz_log(YLOG_WARN, "alvis filter: need config file");
319 if (tinfo->fname && !strcmp(args, tinfo->fname))
322 tinfo->profile_path = res_get(res, "profilePath");
323 yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path);
325 destroy_schemas(tinfo);
326 return create_schemas(tinfo, args);
329 static void filter_destroy(void *clientData)
331 struct filter_info *tinfo = clientData;
332 destroy_schemas(tinfo);
333 xfree(tinfo->full_name);
335 xmlFreeTextReader(tinfo->reader);
336 odr_destroy(tinfo->odr);
340 static int ioread_ex(void *context, char *buffer, int len)
342 struct recExtractCtrl *p = context;
343 return p->stream->readf(p->stream, buffer, len);
346 static int ioclose_ex(void *context)
351 static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
352 xmlNodePtr ptr, RecWord *recWord)
354 for(; ptr; ptr = ptr->next)
356 index_cdata(tinfo, ctrl, ptr->children, recWord);
357 if (ptr->type != XML_TEXT_NODE)
359 recWord->term_buf = (const char *)ptr->content;
360 recWord->term_len = XML_STRLEN(ptr->content);
361 (*ctrl->tokenAdd)(recWord);
365 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
366 xmlNodePtr ptr, RecWord *recWord)
368 for(; ptr; ptr = ptr->next)
370 index_node(tinfo, ctrl, ptr->children, recWord);
371 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
372 XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
374 if (!XML_STRCMP(ptr->name, "index"))
376 const char *name_str = 0;
377 const char *type_str = 0;
378 const char *xpath_str = 0;
379 struct _xmlAttr *attr;
380 for (attr = ptr->properties; attr; attr = attr->next)
382 attr_content(attr, "name", &name_str);
383 attr_content(attr, "xpath", &xpath_str);
384 attr_content(attr, "type", &type_str);
388 const char *prev_type = recWord->index_type; /* save default type */
390 if (type_str && *type_str)
391 recWord->index_type = (const char *) type_str; /* type was given */
392 recWord->index_name = name_str;
393 index_cdata(tinfo, ctrl, ptr->children, recWord);
395 recWord->index_type = prev_type; /* restore it again */
401 static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
402 xmlNodePtr ptr, RecWord *recWord)
404 const char *type_str = "update";
406 if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
407 !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
408 && !XML_STRCMP(ptr->name, "record"))
410 const char *id_str = 0;
411 const char *rank_str = 0;
412 struct _xmlAttr *attr;
413 for (attr = ptr->properties; attr; attr = attr->next)
415 attr_content(attr, "type", &type_str);
416 attr_content(attr, "id", &id_str);
417 attr_content(attr, "rank", &rank_str);
420 sscanf(id_str, "%255s", ctrl->match_criteria);
423 ctrl->staticrank = atozint(rank_str);
427 if (!strcmp("update", type_str))
428 index_node(tinfo, ctrl, ptr, recWord);
429 else if (!strcmp("delete", type_str))
430 yaz_log(YLOG_WARN, "alvis filter delete: to be implemented");
432 yaz_log(YLOG_WARN, "alvis filter: unknown record type '%s'",
436 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
440 const char *params[10];
444 struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
447 set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
449 (*p->init)(p, &recWord);
451 if (schema && schema->stylesheet_xsp)
455 xsltApplyStylesheet(schema->stylesheet_xsp,
457 if (p->flagShowRecords)
459 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
460 fwrite(buf_out, len_out, 1, stdout);
463 root_ptr = xmlDocGetRootElement(resDoc);
465 index_record(tinfo, p, root_ptr, &recWord);
468 yaz_log(YLOG_WARN, "No root for index XML record."
469 " split_level=%d stylesheet=%s",
470 tinfo->split_level, schema->stylesheet);
474 xmlDocDumpMemory(doc, &buf_out, &len_out);
475 if (p->flagShowRecords)
476 fwrite(buf_out, len_out, 1, stdout);
478 (*p->setStoreData)(p, buf_out, len_out);
482 return RECCTRL_EXTRACT_OK;
485 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
492 xmlFreeTextReader(tinfo->reader);
493 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
502 return RECCTRL_EXTRACT_ERROR_GENERIC;
504 ret = xmlTextReaderRead(tinfo->reader);
507 int type = xmlTextReaderNodeType(tinfo->reader);
508 int depth = xmlTextReaderDepth(tinfo->reader);
509 if (type == XML_READER_TYPE_ELEMENT && tinfo->split_level == depth)
511 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
514 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
515 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
517 xmlDocSetRootElement(doc, ptr2);
519 return extract_doc(tinfo, p, doc);
523 xmlFreeTextReader(tinfo->reader);
525 return RECCTRL_EXTRACT_ERROR_GENERIC;
528 ret = xmlTextReaderRead(tinfo->reader);
530 xmlFreeTextReader(tinfo->reader);
532 return RECCTRL_EXTRACT_EOF;
535 static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
537 if (p->first_record) /* only one record per stream */
539 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
546 return RECCTRL_EXTRACT_ERROR_GENERIC;
548 xmlNodePtr root = xmlDocGetRootElement(doc);
550 return RECCTRL_EXTRACT_ERROR_GENERIC;
553 return extract_doc(tinfo, p, doc);
556 return RECCTRL_EXTRACT_EOF;
559 static int filter_extract(void *clientData, struct recExtractCtrl *p)
561 struct filter_info *tinfo = clientData;
563 odr_reset(tinfo->odr);
564 if (tinfo->split_level == 0 || p->setStoreData == 0)
565 return extract_full(tinfo, p);
567 return extract_split(tinfo, p);
570 static int ioread_ret(void *context, char *buffer, int len)
572 struct recRetrieveCtrl *p = context;
573 return p->stream->readf(p->stream, buffer, len);
576 static int ioclose_ret(void *context)
581 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
583 /* const char *esn = zebra_xslt_ns; */
585 const char *params[32];
586 struct filter_info *tinfo = clientData;
589 struct filter_schema *schema;
593 if (p->comp->which == Z_RecordComp_simple
594 && p->comp->u.simple->which == Z_ElementSetNames_generic)
596 esn = p->comp->u.simple->u.generic;
598 else if (p->comp->which == Z_RecordComp_complex
599 && p->comp->u.complex->generic->elementSpec
600 && p->comp->u.complex->generic->elementSpec->which ==
601 Z_ElementSpec_elementSetName)
603 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
606 schema = lookup_schema(tinfo, esn);
610 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
615 set_param_int(params, "id", p->localno, p->odr);
617 set_param_str(params, "filename", p->fname, p->odr);
618 if (p->staticrank >= 0)
619 set_param_int(params, "rank", p->staticrank, p->odr);
622 set_param_str(params, "schema", esn, p->odr);
625 set_param_str(params, "schema", schema->name, p->odr);
626 else if (schema->identifier)
627 set_param_str(params, "schema", schema->identifier, p->odr);
629 set_param_str(params, "schema", "", p->odr);
632 set_param_int(params, "score", p->score, p->odr);
633 set_param_int(params, "size", p->recordSize, p->odr);
635 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
638 XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
641 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
645 if (!schema->stylesheet_xsp)
649 resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
655 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
657 else if (!p->input_format
658 || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
663 if (schema->stylesheet_xsp)
664 xsltSaveResultToString(&buf_out, &len_out, resDoc,
665 schema->stylesheet_xsp);
667 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
669 p->output_format = yaz_oid_recsyn_xml;
670 p->rec_len = len_out;
671 p->rec_buf = odr_malloc(p->odr, p->rec_len);
672 memcpy(p->rec_buf, buf_out, p->rec_len);
675 else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
680 if (schema->stylesheet_xsp)
681 xsltSaveResultToString(&buf_out, &len_out, resDoc,
682 schema->stylesheet_xsp);
684 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
686 p->output_format = yaz_oid_recsyn_sutrs;
687 p->rec_len = len_out;
688 p->rec_buf = odr_malloc(p->odr, p->rec_len);
689 memcpy(p->rec_buf, buf_out, p->rec_len);
695 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
701 static struct recType filter_type = {
712 #ifdef IDZEBRA_STATIC_ALVIS
725 * c-file-style: "Stroustrup"
726 * indent-tabs-mode: nil
728 * vim: shiftwidth=4 tabstop=8 expandtab