-/* $Id: mod_dom.c,v 1.23 2007-02-28 14:46:41 marc Exp $
+
+/* $Id: mod_dom.c,v 1.30 2007-03-07 14:18:35 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
struct filter_retrieve *retrieve_list;
struct filter_input *input_list;
struct filter_store *store;
+ int record_info_invoked;
};
tinfo->input_list = 0;
tinfo->store = 0;
tinfo->doc_config = 0;
+ tinfo->record_info_invoked = 0;
#if YAZ_HAVE_EXSLT
exsltRegisterAll();
}
static ZEBRA_RES perform_convert(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
struct convert_s *convert,
const char **params,
xmlDocPtr *doc,
if (last_xsp)
*last_xsp = convert->stylesheet_xsp;
- xmlFreeDoc(*doc);
- /* *doc = res_doc; */
+ if (!res_doc)
+ break;
/* now saving into buffer and re-reading into DOM to avoid annoing
XSLT problem with thrown-out indentation text nodes */
- if (res_doc){
- xsltSaveResultToString(&buf_out, &len_out, res_doc,
- convert->stylesheet_xsp);
- xmlFreeDoc(res_doc);
- }
-
+ xsltSaveResultToString(&buf_out, &len_out, res_doc,
+ convert->stylesheet_xsp);
+ xmlFreeDoc(res_doc);
- *doc = xmlParseDoc(buf_out);
+ xmlFreeDoc(*doc);
- yaz_log(YLOG_DEBUG, "%s: %s \n %s",
- tinfo->fname ? tinfo->fname : "none ",
- convert->stylesheet,
- buf_out);
+ *doc = xmlParseMemory((const char *) buf_out, len_out);
+ /* writing debug info out */
+ if (extctr && extctr->flagShowRecords)
+ yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ convert->stylesheet,
+ len_out, buf_out);
+
xmlFree(buf_out);
}
return ZEBRA_OK;
xmlNodePtr node,
xmlChar * index_p)
{
- xmlChar *text = xmlNodeGetContent(node);
- size_t text_len = strlen((const char *)text);
-
- /*dom_log(YLOG_DEBUG, tinfo, node, "Indexing: '%s' '%s'", index_p, text);*/
-
- /* if there is no text, we do not need to proceed */
- if (text_len)
- {
- xmlChar *look = index_p;
- xmlChar *bval;
- xmlChar *eval;
-
- xmlChar index[256];
- xmlChar type[256];
+ if (tinfo->record_info_invoked == 1)
+ {
+ xmlChar *text = xmlNodeGetContent(node);
+ size_t text_len = strlen((const char *)text);
+
+ /* if there is no text, we do not need to proceed */
+ if (text_len)
+ {
+ xmlChar *look = index_p;
+ xmlChar *bval;
+ xmlChar *eval;
+
+ xmlChar index[256];
+ xmlChar type[256];
- /* assingning text to be indexed */
- recword->term_buf = (const char *)text;
- recword->term_len = text_len;
+ /* assingning text to be indexed */
+ recword->term_buf = (const char *)text;
+ recword->term_len = text_len;
- /* parsing all index name/type pairs */
- /* may not start with ' ' or ':' */
- while (*look && ' ' != *look && ':' != *look)
- {
- /* setting name and type to zero */
- *index = '\0';
- *type = '\0';
-
- /* parsing one index name */
- bval = look;
- while (*look && ':' != *look && ' ' != *look)
+ /* parsing all index name/type pairs */
+ /* may not start with ' ' or ':' */
+ while (*look && ' ' != *look && ':' != *look)
{
- look++;
- }
- eval = look;
- strncpy((char *)index, (const char *)bval, eval - bval);
- index[eval - bval] = '\0';
-
+ /* setting name and type to zero */
+ *index = '\0';
+ *type = '\0';
- /* parsing one index type, if existing */
- if (':' == *look)
- {
- look++;
-
+ /* parsing one index name */
bval = look;
- while (*look && ' ' != *look)
+ while (*look && ':' != *look && ' ' != *look)
{
look++;
}
eval = look;
- strncpy((char *)type, (const char *)bval, eval - bval);
- type[eval - bval] = '\0';
- }
-
- /* actually indexing the text given */
- dom_log(YLOG_DEBUG, tinfo, 0,
- "INDEX '%s:%s' '%s'",
- index ? (const char *) index : "null",
- type ? (const char *) type : "null",
- text ? (const char *) text : "null");
-
- recword->index_name = (const char *)index;
- if (type && *type)
- recword->index_type = *type;
- (extctr->tokenAdd)(recword);
+ strncpy((char *)index, (const char *)bval, eval - bval);
+ index[eval - bval] = '\0';
+
+
+ /* parsing one index type, if existing */
+ if (':' == *look)
+ {
+ look++;
+
+ bval = look;
+ while (*look && ' ' != *look)
+ {
+ look++;
+ }
+ eval = look;
+ strncpy((char *)type, (const char *)bval, eval - bval);
+ type[eval - bval] = '\0';
+ }
- /* eat whitespaces */
- if (*look && ' ' == *look && *(look+1))
- {
- look++;
- }
+ /* actually indexing the text given */
+ dom_log(YLOG_DEBUG, tinfo, 0,
+ "INDEX '%s:%s' '%s'",
+ index ? (const char *) index : "null",
+ type ? (const char *) type : "null",
+ text ? (const char *) text : "null");
+
+ recword->index_name = (const char *)index;
+ if (type && *type)
+ recword->index_type = *type;
+
+ /* writing debug out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "INDEX '%s:%s' '%s'",
+ index ? (const char *) index : "null",
+ type ? (const char *) type : "null",
+ text ? (const char *) text : "null");
+
+ /* actually indexing the text given */
+ recword->index_name = (const char *)index;
+ if (type && *type)
+ recword->index_type = *type;
+ (extctr->tokenAdd)(recword);
+
+ /* eat whitespaces */
+ if (*look && ' ' == *look && *(look+1))
+ {
+ look++;
+ }
+ }
}
+ xmlFree(text);
}
-
- xmlFree(text);
}
/* DOM filter style indexing */
static void set_record_info(struct filter_info *tinfo,
struct recExtractCtrl *extctr,
+ xmlNodePtr node,
xmlChar * id_p,
xmlChar * rank_p,
xmlChar * type_p)
{
- dom_log(YLOG_DEBUG, tinfo, 0,
- "RECORD id=%s rank=%s type=%s",
- id_p ? (const char *) id_p : "null",
- rank_p ? (const char *) rank_p : "null",
- type_p ? (const char *) type_p : "null");
+
+ /* writing debug info out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "RECORD id=%s rank=%s type=%s",
+ id_p ? (const char *) id_p : "(null)",
+ rank_p ? (const char *) rank_p : "(null)",
+ type_p ? (const char *) type_p : "(null)");
+
if (id_p)
sscanf((const char *)id_p, "%255s", extctr->match_criteria);
/* else */
/* dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'", */
/* type_str); */
+ if (tinfo->record_info_invoked == 1)
+ {
+ /* warn about multiple only once */
+ dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
+ }
+ tinfo->record_info_invoked++;
}
attr->name);
}
}
- set_record_info(tinfo, extctr, id_p, rank_p, type_p);
+ set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
}
else
{
pi_p, look);
}
else
- set_record_info(tinfo, extctr, id, rank, 0);
+ set_record_info(tinfo, extctr, node, id, rank, 0);
}
/* parsing index instruction */
struct recExtractCtrl *extctr,
xmlDocPtr doc)
{
- xmlChar *buf_out;
- int len_out;
-
/* only need to do the initialization once, reuse recword for all terms */
RecWord recword;
(*extctr->init)(extctr, &recword);
- if (extctr->flagShowRecords)
- {
- xmlDocDumpMemory(doc, &buf_out, &len_out);
- fwrite(buf_out, len_out, 1, stdout);
- xmlFree(buf_out);
- }
-
process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
}
xsltStylesheetPtr last_xsp = 0;
xmlDocPtr store_doc = 0;
+ /* per default do not ingest record */
+ tinfo->record_info_invoked = 0;
+
+ /* exit if empty document given */
+ if (!doc)
+ return RECCTRL_EXTRACT_SKIP;
+
+ /* we actuallu have a document which needs to be processed further */
params[0] = 0;
set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
/* input conversion */
- perform_convert(tinfo, input->convert, params, &doc, 0);
+ perform_convert(tinfo, p, input->convert, params, &doc, 0);
if (tinfo->store)
{
/* store conversion */
store_doc = xmlCopyDoc(doc, 1);
- perform_convert(tinfo, tinfo->store->convert,
+ perform_convert(tinfo, p, tinfo->store->convert,
params, &store_doc, &last_xsp);
}
+ /* saving either store doc or original doc in case no store doc exists */
if (last_xsp)
xsltSaveResultToString(&buf_out, &len_out,
store_doc ? store_doc : doc, last_xsp);
else
xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
+
(*p->setStoreData)(p, buf_out, len_out);
xmlFree(buf_out);
xmlFreeDoc(store_doc);
/* extract conversion */
- perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
+ perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
+
/* finally, do the indexing */
- if (doc)
+ if (doc){
extract_dom_doc_node(tinfo, p, doc);
-
- if (doc)
xmlFreeDoc(doc);
+ }
+
+ /* there was nothing to index, so there is no inserted/updated record */
+ if (tinfo->record_info_invoked == 0)
+ return RECCTRL_EXTRACT_SKIP;
return RECCTRL_EXTRACT_OK;
}
p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE|
- XML_PARSE_NOENT);
+ XML_PARSE_XINCLUDE
+ | XML_PARSE_NOENT
+ | XML_PARSE_NONET);
}
if (!input->u.xmlreader.reader)
return RECCTRL_EXTRACT_ERROR_GENERIC;
{
int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
+
if (type == XML_READER_TYPE_ELEMENT &&
input->u.xmlreader.split_level == depth)
{
- xmlNodePtr ptr
- = xmlTextReaderExpand(input->u.xmlreader.reader);
+ xmlNodePtr ptr;
+
+ /* per default do not ingest record */
+ tinfo->record_info_invoked = 0;
+
+ ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
if (ptr)
- {
+ {
+ /* we have a new document */
+
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
xmlDocSetRootElement(doc, ptr2);
+ /* writing debug info out */
+ if (p->flagShowRecords)
+ {
+ xmlChar *buf_out = 0;
+ int len_out = 0;
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%.*s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ depth, len_out, buf_out);
+ xmlFree(buf_out);
+ }
+
return convert_extract_doc(tinfo, input, p, doc);
}
else
p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+ XML_PARSE_XINCLUDE
+ | XML_PARSE_NOENT
+ | XML_PARSE_NONET);
if (!doc)
{
return RECCTRL_EXTRACT_ERROR_GENERIC;
doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+ XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
}
/* retrieve conversion */
- perform_convert(tinfo, retrieve->convert, params, &doc, &last_xsp);
+ perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;