-/* $Id: xslt.c,v 1.11 2005-06-23 06:45:47 adam Exp $
+/* $Id: xslt.c,v 1.27 2006-05-31 16:11:58 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <ctype.h>
#include <yaz/diagbib1.h>
+#include <yaz/tpath.h>
+
#include <libxml/xmlversion.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlIO.h>
#include <libxml/xmlreader.h>
#include <libxslt/transform.h>
+/* #include <libxslt/xsltutils.h> */
#include <idzebra/util.h>
#include <idzebra/recctrl.h>
-struct filter_schema {
+struct filter_xslt_schema {
const char *name;
const char *identifier;
const char *stylesheet;
- struct filter_schema *next;
+ struct filter_xslt_schema *next;
const char *default_schema;
const char *include_snippet;
xsltStylesheetPtr stylesheet_xsp;
};
-struct filter_info {
+struct filter_xslt_info {
xmlDocPtr doc;
char *fname;
+ char *full_name;
+ const char *profile_path;
const char *split_level;
const char *split_path;
ODR odr;
- struct filter_schema *schemas;
+ struct filter_xslt_schema *schemas;
xmlTextReaderPtr reader;
};
+
#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
+#define XML_STRCMP(a,b) strcmp((char*)a, b)
+#define XML_STRLEN(a) strlen((char*)a)
+
static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
static void set_param_xml(const char **params, const char *name,
params[2] = 0;
}
+#define ENABLE_INPUT_CALLBACK 0
-int zebra_xmlInputMatchCallback (char const *filename)
+#if ENABLE_INPUT_CALLBACK
+static int zebra_xmlInputMatchCallback (char const *filename)
{
yaz_log(YLOG_LOG, "match %s", filename);
return 0;
}
-
-void * zebra_xmlInputOpenCallback (char const *filename)
+static void * zebra_xmlInputOpenCallback (char const *filename)
{
return 0;
}
-int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
+static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
{
return 0;
}
-int zebra_xmlInputCloseCallback (void * context)
+static int zebra_xmlInputCloseCallback (void * context)
{
return 0;
}
+#endif
-
-
-
-
-static void *filter_init_xslt(Res res, RecType recType)
+static void *filter_init(Res res, RecType recType)
{
- struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
+ struct filter_xslt_info *tinfo
+ = (struct filter_xslt_info *) xmalloc(sizeof(*tinfo));
tinfo->reader = 0;
tinfo->fname = 0;
+ tinfo->full_name = 0;
+ tinfo->profile_path = 0;
tinfo->split_level = 0;
tinfo->split_path = 0;
tinfo->odr = odr_createmem(ODR_ENCODE);
tinfo->doc = 0;
tinfo->schemas = 0;
-#if 0
+#if ENABLE_INPUT_CALLBACK
xmlRegisterDefaultInputCallbacks();
xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
zebra_xmlInputOpenCallback,
return tinfo;
}
-static void *filter_init_xslt1(Res res, RecType recType)
-{
- struct filter_info *tinfo = (struct filter_info *)
- filter_init_xslt(res, recType);
- tinfo->split_level = "1";
- return tinfo;
-}
-
static int attr_content(struct _xmlAttr *attr, const char *name,
const char **dst_content)
{
- if (!strcmp(attr->name, name) && attr->children &&
+ if (!XML_STRCMP(attr->name, name) && attr->children &&
attr->children->type == XML_TEXT_NODE)
{
- *dst_content = attr->children->content;
+ *dst_content = (const char *)(attr->children->content);
return 1;
}
return 0;
}
-static void destroy_schemas(struct filter_info *tinfo)
+static void destroy_schemas(struct filter_xslt_info *tinfo)
{
- struct filter_schema *schema = tinfo->schemas;
+ struct filter_xslt_schema *schema = tinfo->schemas;
while (schema)
{
- struct filter_schema *schema_next = schema->next;
+ struct filter_xslt_schema *schema_next = schema->next;
if (schema->stylesheet_xsp)
xsltFreeStylesheet(schema->stylesheet_xsp);
xfree(schema);
tinfo->doc = 0;
}
-static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
+static ZEBRA_RES create_schemas(struct filter_xslt_info *tinfo,
+ const char *fname)
{
+ char tmp_full_name[1024];
xmlNodePtr ptr;
tinfo->fname = xstrdup(fname);
- tinfo->doc = xmlParseFile(tinfo->fname);
- if (!tinfo->doc)
- return ZEBRA_FAIL;
+
+ if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
+ NULL, tmp_full_name))
+ tinfo->full_name = xstrdup(tmp_full_name);
+ else
+ tinfo->full_name = xstrdup(tinfo->fname);
+
+ yaz_log(YLOG_LOG, "xslt filter: loading config file %s", tinfo->full_name);
+
+ tinfo->doc = xmlParseFile(tinfo->full_name);
+ if (!tinfo->doc) {
+ yaz_log(YLOG_WARN, "xslt filter: could not parse config file %s",
+ tinfo->full_name);
+ return ZEBRA_FAIL;
+ }
+
ptr = xmlDocGetRootElement(tinfo->doc);
if (!ptr || ptr->type != XML_ELEMENT_NODE ||
- strcmp(ptr->name, "schemaInfo"))
- return ZEBRA_FAIL;
+ XML_STRCMP(ptr->name, "schemaInfo")){
+ yaz_log(YLOG_WARN,
+ "xslt filter: config file %s :"
+ " expected root element <schemaInfo>",
+ tinfo->full_name);
+ return ZEBRA_FAIL;
+ }
+
for (ptr = ptr->children; ptr; ptr = ptr->next)
{
if (ptr->type != XML_ELEMENT_NODE)
continue;
- if (!strcmp(ptr->name, "schema"))
+ if (!XML_STRCMP(ptr->name, "schema"))
{
+ char tmp_xslt_full_name[1024];
struct _xmlAttr *attr;
- struct filter_schema *schema = xmalloc(sizeof(*schema));
+ struct filter_xslt_schema *schema = xmalloc(sizeof(*schema));
schema->name = 0;
schema->identifier = 0;
schema->stylesheet = 0;
attr_content(attr, "default", &schema->default_schema);
attr_content(attr, "snippet", &schema->include_snippet);
}
- if (schema->stylesheet)
- schema->stylesheet_xsp =
- xsltParseStylesheetFile(
- (const xmlChar*) schema->stylesheet);
+
+ if (schema->stylesheet){
+ yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path,
+ NULL, tmp_xslt_full_name);
+ schema->stylesheet_xsp
+ = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
+ if (!schema->stylesheet_xsp)
+ yaz_log(YLOG_WARN,
+ "xslt filter: could not parse xslt stylesheet %s",
+ tmp_xslt_full_name);
+ }
+
}
- else if (!strcmp(ptr->name, "split"))
+ else if (!XML_STRCMP(ptr->name, "split"))
{
struct _xmlAttr *attr;
for (attr = ptr->properties; attr; attr = attr->next)
return ZEBRA_OK;
}
-static struct filter_schema *lookup_schema(struct filter_info *tinfo,
+static struct filter_xslt_schema *lookup_schema(struct filter_xslt_info *tinfo,
const char *est)
{
- struct filter_schema *schema;
+ struct filter_xslt_schema *schema;
for (schema = tinfo->schemas; schema; schema = schema->next)
{
- if (est)
- {
+ /* find requested schema */
+ if (est)
+ {
if (schema->identifier && !strcmp(schema->identifier, est))
- return schema;
+ return schema;
+
if (schema->name && !strcmp(schema->name, est))
return schema;
- }
- if (schema->default_schema)
+ }
+ /* or return default schema if defined */
+ else if (schema->default_schema)
return schema;
}
+
+ /* return first schema if no default schema defined */
+ if (tinfo->schemas)
+ return tinfo->schemas;
+
return 0;
}
-static void filter_config(void *clientData, Res res, const char *args)
+static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
{
- struct filter_info *tinfo = clientData;
- if (!args || !*args)
- args = "xsltfilter.xml";
+ struct filter_xslt_info *tinfo = clientData;
+ if (!args || !*args){
+ yaz_log(YLOG_WARN, "xslt filter: need config file");
+ return ZEBRA_FAIL;
+ }
+
if (tinfo->fname && !strcmp(args, tinfo->fname))
- return;
+ return ZEBRA_OK;
+
+ tinfo->profile_path
+ /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */
+ = res_get(res, "profilePath");
+ yaz_log(YLOG_LOG, "xslt filter: profilePath %s", tinfo->profile_path);
+
destroy_schemas(tinfo);
create_schemas(tinfo, args);
+ return ZEBRA_OK;
}
static void filter_destroy(void *clientData)
{
- struct filter_info *tinfo = clientData;
+ struct filter_xslt_info *tinfo = clientData;
destroy_schemas(tinfo);
if (tinfo->reader)
xmlFreeTextReader(tinfo->reader);
return 0;
}
-static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
+static void index_cdata(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
xmlNodePtr ptr, RecWord *recWord)
{
for(; ptr; ptr = ptr->next)
index_cdata(tinfo, ctrl, ptr->children, recWord);
if (ptr->type != XML_TEXT_NODE)
continue;
- recWord->term_buf = ptr->content;
- recWord->term_len = strlen(ptr->content);
+ recWord->term_buf = (const char *)ptr->content;
+ recWord->term_len = XML_STRLEN(ptr->content);
(*ctrl->tokenAdd)(recWord);
}
}
-static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
+static void index_node(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
xmlNodePtr ptr, RecWord *recWord)
{
for(; ptr; ptr = ptr->next)
{
index_node(tinfo, ctrl, ptr->children, recWord);
if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
- strcmp(ptr->ns->href, zebra_xslt_ns))
+ XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
continue;
- if (!strcmp(ptr->name, "index"))
+ if (!XML_STRCMP(ptr->name, "index"))
{
- char *name_str = 0;
+ const char *name_str = 0;
const char *type_str = 0;
const char *xpath_str = 0;
struct _xmlAttr *attr;
for (attr = ptr->properties; attr; attr = attr->next)
{
- if (!strcmp(attr->name, "name")
- && attr->children && attr->children->type == XML_TEXT_NODE)
- name_str = attr->children->content;
- if (!strcmp(attr->name, "xpath")
- && attr->children && attr->children->type == XML_TEXT_NODE)
- xpath_str = attr->children->content;
- if (!strcmp(attr->name, "type")
- && attr->children && attr->children->type == XML_TEXT_NODE)
- type_str = attr->children->content;
+ attr_content(attr, "name", &name_str);
+ attr_content(attr, "xpath", &xpath_str);
+ attr_content(attr, "type", &type_str);
}
if (name_str)
{
}
}
-static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
+static void index_record(struct filter_xslt_info *tinfo,struct recExtractCtrl *ctrl,
+ xmlNodePtr ptr, RecWord *recWord)
+{
+ if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
+ !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
+ && !XML_STRCMP(ptr->name, "record"))
+ {
+ const char *type_str = "update";
+ const char *id_str = 0;
+ const char *rank_str = 0;
+ struct _xmlAttr *attr;
+ for (attr = ptr->properties; attr; attr = attr->next)
+ {
+ attr_content(attr, "type", &type_str);
+ attr_content(attr, "id", &id_str);
+ attr_content(attr, "rank", &rank_str);
+ }
+ if (id_str)
+ sscanf(id_str, "%255s", ctrl->match_criteria);
+ if (rank_str)
+ {
+ ctrl->staticrank = atoi(rank_str);
+ yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank);
+ }
+ else
+ yaz_log(YLOG_LOG, "no rank");
+
+ ptr = ptr->children;
+ }
+ index_node(tinfo, ctrl, ptr, recWord);
+}
+
+static int extract_doc(struct filter_xslt_info *tinfo, struct recExtractCtrl *p,
xmlDocPtr doc)
{
RecWord recWord;
xmlChar *buf_out;
int len_out;
- struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
+ struct filter_xslt_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
params[0] = 0;
set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
if (schema && schema->stylesheet_xsp)
{
+ xmlNodePtr root_ptr;
xmlDocPtr resDoc =
xsltApplyStylesheet(schema->stylesheet_xsp,
doc, params);
fwrite(buf_out, len_out, 1, stdout);
xmlFree(buf_out);
}
- index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
+ root_ptr = xmlDocGetRootElement(resDoc);
+ if (root_ptr)
+ index_record(tinfo, p, root_ptr, &recWord);
+ else
+ {
+ yaz_log(YLOG_WARN, "No root for index XML record."
+ " split_level=%s stylesheet=%s",
+ tinfo->split_level, schema->stylesheet);
+ }
xmlFreeDoc(resDoc);
}
xmlDocDumpMemory(doc, &buf_out, &len_out);
return RECCTRL_EXTRACT_OK;
}
-static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
+static int extract_split(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
{
int ret;
int split_depth = 0;
{
xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
- xmlDocPtr doc = xmlNewDoc("1.0");
+ xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
xmlDocSetRootElement(doc, ptr2);
- return extract_doc(tinfo, p, doc);
- }
+ return extract_doc(tinfo, p, doc);
+ }
ret = xmlTextReaderRead(tinfo->reader);
}
xmlFreeTextReader(tinfo->reader);
return RECCTRL_EXTRACT_EOF;
}
-static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
+static int extract_full(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
{
if (p->first_record) /* only one record per stream */
{
static int filter_extract(void *clientData, struct recExtractCtrl *p)
{
- struct filter_info *tinfo = clientData;
+ struct filter_xslt_info *tinfo = clientData;
odr_reset(tinfo->odr);
static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
- const char *esn = zebra_xslt_ns;
- const char *params[10];
- struct filter_info *tinfo = clientData;
+ const char *esn = 0;
+ const char *params[32];
+ struct filter_xslt_info *tinfo = clientData;
xmlDocPtr resDoc;
xmlDocPtr doc;
- struct filter_schema *schema;
+ struct filter_xslt_schema *schema;
int window_size = -1;
if (p->comp)
{
- if (p->comp->which != Z_RecordComp_simple
- || p->comp->u.simple->which != Z_ElementSetNames_generic)
+ if (p->comp->which == Z_RecordComp_simple
+ && p->comp->u.simple->which == Z_ElementSetNames_generic)
+ {
+ esn = p->comp->u.simple->u.generic;
+ }
+ else if (p->comp->which == Z_RecordComp_complex
+ && p->comp->u.complex->generic->elementSpec
+ && p->comp->u.complex->generic->elementSpec->which ==
+ Z_ElementSpec_elementSetName)
{
- p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
- return 0;
+ esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
}
- esn = p->comp->u.simple->u.generic;
}
schema = lookup_schema(tinfo, esn);
if (!schema)
window_size = atoi(schema->include_snippet);
params[0] = 0;
- set_param_str(params, "schema", esn, p->odr);
+ set_param_int(params, "id", p->localno, p->odr);
if (p->fname)
set_param_str(params, "filename", p->fname, p->odr);
+ if (p->staticrank >= 0)
+ set_param_int(params, "rank", p->staticrank, p->odr);
+
+ if (esn)
+ set_param_str(params, "schema", esn, p->odr);
+ else
+ if (schema->name)
+ set_param_str(params, "schema", schema->name, p->odr);
+ else if (schema->identifier)
+ set_param_str(params, "schema", schema->identifier, p->odr);
+ else
+ set_param_str(params, "schema", "", p->odr);
+
if (p->score >= 0)
set_param_int(params, "score", p->score, p->odr);
set_param_int(params, "size", p->recordSize, p->odr);
-
+
if (window_size >= 0)
set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
p->odr);
{
xmlChar *buf_out;
int len_out;
- xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+
+ xsltSaveResultToString(&buf_out, &len_out, resDoc,
+ schema->stylesheet_xsp);
p->output_format = VAL_TEXT_XML;
p->rec_len = len_out;
{
xmlChar *buf_out;
int len_out;
- xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+
+ xsltSaveResultToString(&buf_out, &len_out, resDoc,
+ schema->stylesheet_xsp);
p->output_format = VAL_SUTRS;
p->rec_len = len_out;
return 0;
}
-static struct recType filter_type_xslt = {
+static struct recType filter_type = {
0,
"xslt",
- filter_init_xslt,
- filter_config,
- filter_destroy,
- filter_extract,
- filter_retrieve
-};
-
-static struct recType filter_type_xslt1 = {
- 0,
- "xslt1",
- filter_init_xslt1,
+ filter_init,
filter_config,
filter_destroy,
filter_extract,
#endif
[] = {
- &filter_type_xslt,
-#ifdef LIBXML_READER_ENABLED
- &filter_type_xslt1,
-#endif
+ &filter_type,
0,
};
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+