-/* $Id: xslt.c,v 1.1 2005-04-28 08:20:40 adam Exp $
+/* $Id: xslt.c,v 1.5 2005-05-01 07:38:51 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <ctype.h>
#include <yaz/diagbib1.h>
+#include <libxml/xmlversion.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#ifdef LIBXML_READER_ENABLED
#include <libxml/xmlreader.h>
+#endif
#include <libxslt/transform.h>
#include <idzebra/util.h>
struct filter_info {
xsltStylesheetPtr stylesheet_xsp;
+#ifdef LIBXML_READER_ENABLED
xmlTextReaderPtr reader;
+#endif
char *fname;
int split_depth;
+ ODR odr;
};
-static const char *zebra_index_ns = "http://indexdata.dk/zebra/indexing/1";
+#define ZEBRA_INDEX_NS "http://indexdata.dk/zebra/indexing/1"
+#define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
+static const char *zebra_index_ns = ZEBRA_INDEX_NS;
+
+static void set_param_str(const char **params, const char *name,
+ const char *value, ODR odr)
+{
+ char *quoted = odr_malloc(odr, 3 + strlen(value));
+ sprintf(quoted, "'%s'", value);
+ while (*params)
+ params++;
+ params[0] = name;
+ params[1] = quoted;
+ params[2] = 0;
+}
+
+static void set_param_int(const char **params, const char *name,
+ zint value, ODR odr)
+{
+ char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
+ while (*params)
+ params++;
+ sprintf(quoted, "'" ZINT_FORMAT "'", value);
+ params[0] = name;
+ params[1] = quoted;
+ params[2] = 0;
+}
-static void *filter_init (Res res, RecType recType)
+
+static void *filter_init_xslt(Res res, RecType recType)
{
struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
tinfo->stylesheet_xsp = 0;
+#ifdef LIBXML_READER_ENABLED
tinfo->reader = 0;
+#endif
tinfo->fname = 0;
+ tinfo->split_depth = 0;
+ tinfo->odr = odr_createmem(ODR_ENCODE);
+ return tinfo;
+}
+
+static void *filter_init_xslt1(Res res, RecType recType)
+{
+ struct filter_info *tinfo = (struct filter_info *)
+ filter_init_xslt(res, recType);
tinfo->split_depth = 1;
return tinfo;
}
struct filter_info *tinfo = clientData;
if (tinfo->stylesheet_xsp)
xsltFreeStylesheet(tinfo->stylesheet_xsp);
+#ifdef LIBXML_READER_ENABLED
+ if (tinfo->reader)
+ xmlFreeTextReader(tinfo->reader);
+#endif
xfree(tinfo->fname);
+ odr_destroy(tinfo->odr);
xfree(tinfo);
}
}
}
-static int filter_extract(void *clientData, struct recExtractCtrl *p)
+static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
+ xmlDocPtr doc)
{
- static const char *params[] = {
- "schema", "'http://indexdata.dk/zebra/indexing/1'",
- 0
- };
- struct filter_info *tinfo = clientData;
RecWord recWord;
- int ret;
+ const char *params[10];
+ xmlChar *buf_out;
+ int len_out;
+
+ params[0] = 0;
+ set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr);
+ (*p->init)(p, &recWord);
+ recWord.reg_type = 'w';
+
+ if (tinfo->stylesheet_xsp)
+ {
+ xmlDocPtr resDoc =
+ xsltApplyStylesheet(tinfo->stylesheet_xsp,
+ doc, params);
+ if (p->flagShowRecords)
+ {
+ xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);
+ }
+ index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
+ xmlFreeDoc(resDoc);
+ }
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ if (p->flagShowRecords)
+ fwrite(buf_out, len_out, 1, stdout);
+ (*p->setStoreData)(p, buf_out, len_out);
+ xmlFree(buf_out);
+
+ xmlFreeDoc(doc);
+ return RECCTRL_EXTRACT_OK;
+}
+
+#ifdef LIBXML_READER_ENABLED
+static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
+{
+ int ret;
if (p->first_record)
{
if (tinfo->reader)
if (!tinfo->stylesheet_xsp)
return RECCTRL_EXTRACT_ERROR_GENERIC;
- (*p->init)(p, &recWord);
- recWord.reg_type = 'w';
-
ret = xmlTextReaderRead(tinfo->reader);
while (ret == 1) {
int type = xmlTextReaderNodeType(tinfo->reader);
if (tinfo->split_depth == 0 ||
(type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
{
- xmlChar *buf_out;
- int len_out;
-
xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
xmlDocPtr doc = xmlNewDoc("1.0");
xmlDocSetRootElement(doc, ptr2);
-
- if (tinfo->stylesheet_xsp)
- {
- xmlDocPtr resDoc =
- xsltApplyStylesheet(tinfo->stylesheet_xsp,
- doc, params);
- if (p->flagShowRecords)
- {
- xmlDocDumpMemory(resDoc, &buf_out, &len_out);
- fwrite(buf_out, len_out, 1, stdout);
- xmlFree(buf_out);
- }
- index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
- xmlFreeDoc(resDoc);
- }
- xmlDocDumpMemory(doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
- (*p->setStoreData)(p, buf_out, len_out);
- xmlFree(buf_out);
- xmlFreeDoc(doc);
- return RECCTRL_EXTRACT_OK;
+ return extract_doc(tinfo, p, doc);
}
ret = xmlTextReaderRead(tinfo->reader);
}
tinfo->reader = 0;
return RECCTRL_EXTRACT_EOF;
}
+#endif
+
+static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
+{
+ if (p->first_record) /* only one record per stream */
+ {
+ xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
+ 0 /* URL */,
+ 0 /* encoding */,
+ XML_PARSE_XINCLUDE);
+ if (!doc)
+ {
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+ }
+ return extract_doc(tinfo, p, doc);
+ }
+ else
+ return RECCTRL_EXTRACT_EOF;
+}
+
+static int filter_extract(void *clientData, struct recExtractCtrl *p)
+{
+ struct filter_info *tinfo = clientData;
+
+ odr_reset(tinfo->odr);
+
+ if (tinfo->split_depth == 0)
+ return extract_full(tinfo, p);
+ else
+ {
+#ifdef LIBXML_READER_ENABLED
+ return extract_split(tinfo, p);
+#else
+ /* no xmlreader so we can't split it */
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+#endif
+ }
+}
static int ioread_ret(void *context, char *buffer, int len)
{
static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
- static const char *params[] = {
- "schema", "'F'",
- 0
- };
+ const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
+ const char *params[10];
struct filter_info *tinfo = clientData;
xmlDocPtr resDoc;
xmlDocPtr doc;
if (p->comp)
{
- const char *esn;
- char *esn_quoted;
if (p->comp->which != Z_RecordComp_simple
|| p->comp->u.simple->which != Z_ElementSetNames_generic)
{
return 0;
}
esn = p->comp->u.simple->u.generic;
- esn_quoted = odr_malloc(p->odr, 3 + strlen(esn));
- sprintf(esn_quoted, "'%s'", esn);
- params[1] = esn_quoted;
}
+
+ params[0] = 0;
+ set_param_str(params, "schema", esn, p->odr);
+ if (p->fname)
+ set_param_str(params, "filename", p->fname, p->odr);
+ if (p->score >= 0)
+ set_param_int(params, "score", p->score, p->odr);
+ set_param_int(params, "size", p->recordSize, p->odr);
+
if (!tinfo->stylesheet_xsp)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
return 0;
}
- resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
- doc, params);
+
+ if (!strcmp(esn, ZEBRA_SCHEMA_IDENTITY_NS))
+ resDoc = doc;
+ else
+ {
+ resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
+ doc, params);
+ xmlFreeDoc(doc);
+ }
if (!resDoc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
}
xmlFreeDoc(resDoc);
- xmlFreeDoc(doc);
return 0;
}
-static struct recType filter_type = {
+static struct recType filter_type_xslt = {
0,
"xslt",
- filter_init,
+ filter_init_xslt,
+ filter_config,
+ filter_destroy,
+ filter_extract,
+ filter_retrieve
+};
+
+static struct recType filter_type_xslt1 = {
+ 0,
+ "xslt1",
+ filter_init_xslt1,
filter_config,
filter_destroy,
filter_extract,
#endif
[] = {
- &filter_type,
+ &filter_type_xslt,
+#ifdef LIBXML_READER_ENABLED
+ &filter_type_xslt1,
+#endif
0,
};