-/* $Id: mod_dom.c,v 1.11 2007-02-14 16:43:37 marc Exp $
+/* $Id: mod_dom.c,v 1.12 2007-02-15 13:01:00 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
#include <idzebra/util.h>
#include <idzebra/recctrl.h>
+
+
+/* Alvis style indexing */
+#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
+static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
+
+/* DOM filter style indexing */
+#define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
+static const char *zebra_dom_ns = ZEBRA_DOM_NS;
+
+/* DOM filter style indexing */
+#define ZEBRA_PI_NAME "zebra-2.0"
+static const char *zebra_pi_name = ZEBRA_PI_NAME;
+
+
+
struct convert_s {
const char *stylesheet;
xsltStylesheetPtr stylesheet_xsp;
}
-/* Alvis style indexing */
-#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
-static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
/* Alvis style indexing */
static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
/* Alvis style indexing */
static void extract_doc_alvis(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlDocPtr doc)
{
if (doc){
int len_out;
xmlNodePtr root_ptr;
- (*recctr->init)(recctr, &recWord);
+ (*extctr->init)(extctr, &recWord);
- if (recctr->flagShowRecords){
+ if (extctr->flagShowRecords){
xmlDocDumpMemory(doc, &buf_out, &len_out);
fwrite(buf_out, len_out, 1, stdout);
xmlFree(buf_out);
}
root_ptr = xmlDocGetRootElement(doc);
if (root_ptr)
- index_record(tinfo, recctr, root_ptr, &recWord);
+ index_record(tinfo, extctr, root_ptr, &recWord);
else
yaz_log(YLOG_WARN, "No root for index XML record");
}
return 0;
}
-/* DOM filter style indexing */
-/* #define ZEBRA_XSLT_NS "http://indexdata.com/zebra-2.0" */
-/* static const char *zebra_xslt_ns = ZEBRA_XSLT_NS; */
-
-/* DOM filter style indexing */
-#define ZEBRA_PI_NAME "zebra-2.0"
-static const char *zebra_pi_name = ZEBRA_PI_NAME;
-
/* DOM filter style indexing */
static void index_value_of(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlNodePtr node,
xmlChar * index_p)
{
xmlChar *text = xmlNodeGetContent(node);
+ size_t text_len = strlen((const char *)text);
- xmlChar *look = index_p;
- xmlChar *bval;
- xmlChar *eval;
- xmlChar index[256];
- xmlChar type[256];
+ /* if there is no text, we do not need to proceed */
+ if (text_len)
+ {
+ xmlChar *look = index_p;
+ xmlChar *bval;
+ xmlChar *eval;
+
+ xmlChar index[256];
+ xmlChar type[256];
+
+ /* assingning text to be indexed */
+ RecWord recWord;
+ (*extctr->init)(extctr, &recWord);
+ recWord.term_buf = (const char *)text;
+ recWord.term_len = text_len;
- /* parsing all index name/type pairs - may not start with ' ' or ':' */
- while (*look && ' ' != *look && ':' != *look){
+ /* parsing all index name/type pairs */
+ /* may not start with ' ' or ':' */
+ while (*look && ' ' != *look && ':' != *look){
- /* setting name and type to zero */
- *index = '\0';
- *type = '\0';
+ /* setting name and type to zero */
+ *index = '\0';
+ *type = '\0';
- /* parsing one index name */
- bval = look;
- while (*look && ':' != *look && ' ' != *look){
- look++;
- }
- eval = look;
- strncpy((char *)index, (const char *)bval, eval - bval);
- index[eval - bval] = '\0';
+ /* parsing one index name */
+ bval = look;
+ while (*look && ':' != *look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)index, (const char *)bval, eval - bval);
+ index[eval - bval] = '\0';
- /* parsing one index type, if existing */
- if (':' == *look){
- look++;
+ /* parsing one index type, if existing */
+ if (':' == *look){
+ look++;
- bval = look;
- while (*look && ' ' != *look){
- look++;
- }
- eval = look;
- strncpy((char *)type, (const char *)bval, eval - bval);
- type[eval - bval] = '\0';
- }
+ bval = look;
+ while (*look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)type, (const char *)bval, eval - bval);
+ type[eval - bval] = '\0';
+ }
- printf("INDEX '%s:%s' '%s'\n", index, type, text);
-
- if (*look && ' ' == *look && *(look+1)){
- look++;
- }
- }
+ /* actually indexing the text given */
+ /* printf("INDEX '%s:%s' '%s'\n", index, type, text); */
- xmlFree(text);
+ recWord.index_name = (const char *)index;
+ if (type && *type)
+ recWord.index_type = *type;
+ (extctr->tokenAdd)(&recWord);
- /* //recWord->term_buf = (const char *)ptr->content; */
- /* //recWord->term_len = XML_STRLEN(ptr->content); */
- /* // if (type_str && *type_str) */
- /* // recWord->index_type = *type_str; /\* type was given *\/ */
- /* // recWord->index_name = name_str; */
- /* // recWord->index_type = prev_type; /\* restore it again *\/ */
+ /* eat whitespaces */
+ if (*look && ' ' == *look && *(look+1)){
+ look++;
+ }
+ }
+ }
+
+ xmlFree(text);
}
/* DOM filter style indexing */
static void set_record_info(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlChar * id_p,
xmlChar * rank_p,
xmlChar * type_p)
{
printf("RECORD id=%s rank=%s type=%s\n", id_p, rank_p, type_p);
+
+ if (id_p)
+ sscanf((const char *)id_p, "%255s", extctr->match_criteria);
+
+ if (rank_p)
+ extctr->staticrank = atozint((const char *)rank_p);
+
+ /* if (!strcmp("update", type_str)) */
+ /* index_node(tinfo, ctrl, ptr, recWord); */
+ /* else if (!strcmp("delete", type_str)) */
+ /* yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
+ /* else */
+ /* yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", */
+ /* type_str); */
+
}
/* DOM filter style indexing */
static void process_xml_element_zebra_node(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlNodePtr node)
{
if (node->type == XML_ELEMENT_NODE
- && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_xslt_ns)){
+ && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)){
if (0 == XML_STRCMP(node->name, "index")){
xmlChar *index_p = 0;
struct _xmlAttr *attr;
for (attr = node->properties; attr; attr = attr->next){
if (attr_content_xml(attr, "name", &index_p)){
- index_value_of(tinfo, recctr, node, index_p);
+ index_value_of(tinfo, extctr, node, index_p);
}
else
// printf("%s: dom filter: s% bad attribute %s",
}
- set_record_info(tinfo, recctr, id_p, rank_p, type_p);
+ set_record_info(tinfo, extctr, id_p, rank_p, type_p);
} else {
// printf("%s: dom filter: s% bad attribute %s",
// tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
printf("dom filter: %s bad element <%s>,"
" expected <record>|<index> in namespace '%s'\n",
- xmlGetNodePath(node), node->name, zebra_xslt_ns);
+ xmlGetNodePath(node), node->name, zebra_dom_ns);
}
}
/* DOM filter style indexing */
static void process_xml_pi_node(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlNodePtr node,
xmlChar **index_pp)
{
xmlGetNodePath(node), pi_p, look);
} else {
/* set_record_info(id, rank, type); */
- set_record_info(tinfo, recctr, id, rank, 0);
+ set_record_info(tinfo, extctr, id, rank, 0);
}
}
/* DOM filter style indexing */
static void process_xml_element_node(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlNodePtr node)
{
/* remember indexing instruction from PI to next element node */
/* check if we are an element node in the special zebra namespace
and either set record data or index value-of node content*/
- process_xml_element_zebra_node(tinfo, recctr, node);
+ process_xml_element_zebra_node(tinfo, extctr, node);
/* loop through kid nodes */
for (node = node->children; node; node = node->next)
{
/* check and set PI record and index index instructions */
if (node->type == XML_PI_NODE){
- process_xml_pi_node(tinfo, recctr, node, &index_p);
+ process_xml_pi_node(tinfo, extctr, node, &index_p);
}
else if (node->type == XML_ELEMENT_NODE){
/* if there was a PI index instruction before this element */
if (index_p){
- index_value_of(tinfo, recctr, node, index_p);
+ index_value_of(tinfo, extctr, node, index_p);
index_p = 0;
}
- process_xml_element_node(tinfo, recctr, node);
+ process_xml_element_node(tinfo, extctr, node);
}
else
continue;
/* DOM filter style indexing */
static void extract_dom_doc_node(struct filter_info *tinfo,
- struct recExtractCtrl *recctr,
+ struct recExtractCtrl *extctr,
xmlDocPtr doc)
{
/* printf("DOC %s\n", xmlGetNodePath((xmlNodePtr)doc)); */
- process_xml_element_node(tinfo, recctr, (xmlNodePtr)doc);
+ xmlChar *buf_out;
+ int len_out;
+ if (extctr->flagShowRecords){
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);
+ }
+
+ process_xml_element_node(tinfo, extctr, (xmlNodePtr)doc);
}
xmlDocPtr doc)
{
- /* RecWord recWord; */
xmlChar *buf_out;
int len_out;
const char *params[10];
xmlDocPtr store_doc = 0;
params[0] = 0;
- set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr_record);
+ set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
/* input conversion */
perform_convert(tinfo, input->convert, params, &doc, 0);
/* finally, do the indexing */
if (doc){
extract_dom_doc_node(tinfo, p, doc);
- extract_doc_alvis(tinfo, p, doc);
+ /* extract_doc_alvis(tinfo, p, doc); */
xmlFreeDoc(doc);
}
static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
- /* const char *esn = zebra_xslt_ns; */
+ /* const char *esn = zebra_dom_ns; */
const char *esn = 0;
const char *params[32];
struct filter_info *tinfo = clientData;