fd5986669becc8c40b9135c4816f064388cfce66
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2010 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24
25 #include <yaz/diagbib1.h>
26 #include <yaz/tpath.h>
27 #include <yaz/snprintf.h>
28
29 #include <libxml/xmlversion.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/xmlreader.h>
34 #include <libxslt/transform.h>
35 #include <libxslt/xsltutils.h>
36
37 #if YAZ_HAVE_EXSLT
38 #include <libexslt/exslt.h>
39 #endif
40
41 #include <idzebra/util.h>
42 #include <idzebra/recctrl.h>
43 #include <yaz/oid_db.h>
44
45 /* DOM filter style indexing */
46 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
47 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
48
49 /* DOM filter style indexing */
50 #define ZEBRA_PI_NAME "zebra-2.0"
51 static const char *zebra_pi_name = ZEBRA_PI_NAME;
52
53 enum convert_type {
54     convert_xslt_type,
55     convert_meta_type
56 };
57
58 struct convert_xslt {
59     const char *stylesheet;
60     xsltStylesheetPtr stylesheet_xsp;
61 };
62
63 struct convert_meta {
64     int dummy;
65 };
66
67 struct convert_s {
68     enum convert_type which;
69     union {
70         struct convert_xslt xslt;
71         struct convert_meta meta;
72     } u;
73     struct convert_s *next;
74 };
75
76 struct filter_extract {
77     const char *name;
78     struct convert_s *convert;
79 };
80
81 struct filter_store {
82     struct convert_s *convert;
83 };
84
85 struct filter_retrieve {
86     const char *name;
87     const char *identifier;
88     struct convert_s *convert;
89     struct filter_retrieve *next;
90 };
91
92 #define DOM_INPUT_XMLREADER 1
93 #define DOM_INPUT_MARC 2
94 struct filter_input {
95     const char *syntax;
96     const char *name;
97     struct convert_s *convert;
98     int type;
99     union {
100         struct {
101             xmlTextReaderPtr reader;
102             int split_level;
103         } xmlreader;
104         struct {
105             const char *input_charset;
106             yaz_marc_t handle;
107             yaz_iconv_t iconv;
108         } marc;
109     } u;
110     struct filter_input *next;
111 };
112   
113 struct filter_info {
114     char *fname;
115     char *full_name;
116     const char *profile_path;
117     NMEM nmem_record;
118     NMEM nmem_config;
119     xmlDocPtr doc_config;
120     struct filter_extract *extract;
121     struct filter_retrieve *retrieve_list;
122     struct filter_input *input_list;
123     struct filter_store *store;
124     int record_info_invoked;
125 };
126
127
128
129 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
130 #define XML_STRLEN(a) strlen((char*)a)
131
132
133 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
134
135 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
136                     const char *fmt, ...)
137 #ifdef __GNUC__
138     __attribute__ ((format (printf, 4, 5)))
139 #endif
140     ;
141
142 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
143                     const char *fmt, ...)
144 {
145     va_list ap;
146     char buf[4096];
147
148     va_start(ap, fmt);
149     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
150     if (ptr)
151     {
152         yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
153                 xmlGetLineNo(ptr), buf);
154     }
155     else
156     {
157         yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
158     }
159     va_end(ap);
160 }
161
162
163 static void set_param_str(const char **params, const char *name,
164                           const char *value, NMEM nmem)
165 {
166     char *quoted = nmem_malloc(nmem, 3 + strlen(value));
167     sprintf(quoted, "'%s'", value);
168     while (*params)
169         params++;
170     params[0] = name;
171     params[1] = quoted;
172     params[2] = 0;
173 }
174
175 static void set_param_int(const char **params, const char *name,
176                           zint value, NMEM nmem)
177 {
178     char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
179     while (*params)
180         params++;
181     sprintf(quoted, "'" ZINT_FORMAT "'", value);
182     params[0] = name;
183     params[1] = quoted;
184     params[2] = 0;
185 }
186
187 static void *filter_init(Res res, RecType recType)
188 {
189     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
190     tinfo->fname = 0;
191     tinfo->full_name = 0;
192     tinfo->profile_path = 0;
193     tinfo->nmem_record = nmem_create();
194     tinfo->nmem_config = nmem_create();
195     tinfo->extract = 0;
196     tinfo->retrieve_list = 0;
197     tinfo->input_list = 0;
198     tinfo->store = 0;
199     tinfo->doc_config = 0;
200     tinfo->record_info_invoked = 0;
201
202 #if YAZ_HAVE_EXSLT
203     exsltRegisterAll(); 
204 #endif
205
206     return tinfo;
207 }
208
209 static int attr_content(struct _xmlAttr *attr, const char *name,
210                         const char **dst_content)
211 {
212     if (!XML_STRCMP(attr->name, name) && attr->children 
213         && attr->children->type == XML_TEXT_NODE)
214     {
215         *dst_content = (const char *)(attr->children->content);
216         return 1;
217     }
218     return 0;
219 }
220
221 static void destroy_xsp(struct convert_s *c)
222 {
223     while (c)
224     {
225         if (c->which == convert_xslt_type)
226         {
227             if (c->u.xslt.stylesheet_xsp)
228                 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
229         }
230         c = c->next;
231     }
232 }
233
234 static void destroy_dom(struct filter_info *tinfo)
235 {
236     if (tinfo->extract)
237     {
238         destroy_xsp(tinfo->extract->convert);
239         tinfo->extract = 0;
240     }
241     if (tinfo->store)
242     {
243         destroy_xsp(tinfo->store->convert);
244         tinfo->store = 0;
245     }
246     if (tinfo->input_list)
247     {
248         struct filter_input *i_ptr;
249         for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
250         {
251             switch(i_ptr->type)
252             {
253             case DOM_INPUT_XMLREADER:
254                 if (i_ptr->u.xmlreader.reader)
255                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
256                 break;
257             case DOM_INPUT_MARC:
258                 yaz_iconv_close(i_ptr->u.marc.iconv);
259                 yaz_marc_destroy(i_ptr->u.marc.handle);
260                 break;
261             }
262             destroy_xsp(i_ptr->convert);
263         }
264         tinfo->input_list = 0;
265     }
266     if (tinfo->retrieve_list)
267     {
268         struct filter_retrieve *r_ptr;
269         for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
270             destroy_xsp(r_ptr->convert);
271         tinfo->retrieve_list = 0;
272     }
273
274     if (tinfo->doc_config)
275     {
276         xmlFreeDoc(tinfo->doc_config);
277         tinfo->doc_config = 0;
278     }
279     nmem_reset(tinfo->nmem_config);
280 }
281
282 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
283                                struct convert_s **l)
284 {
285     *l = 0;
286     FOR_EACH_ELEMENT(ptr) {
287         if (!XML_STRCMP(ptr->name, "xslt"))
288         {
289             struct _xmlAttr *attr;
290             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
291             
292             p->next = 0;
293             p->which = convert_xslt_type;
294             p->u.xslt.stylesheet = 0;
295             p->u.xslt.stylesheet_xsp = 0;
296             
297             for (attr = ptr->properties; attr; attr = attr->next)
298                 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
299                     ;
300                 else
301                 {
302                     dom_log(YLOG_WARN, tinfo, ptr,
303                             "bad attribute @%s", attr->name);
304                 }
305             if (p->u.xslt.stylesheet)
306             {
307                 char tmp_xslt_full_name[1024];
308                 if (!yaz_filepath_resolve(p->u.xslt.stylesheet, 
309                                           tinfo->profile_path,
310                                           NULL, 
311                                           tmp_xslt_full_name))
312                 {
313                     dom_log(YLOG_WARN, tinfo, 0,
314                             "stylesheet %s not found in "
315                             "path %s",
316                             p->u.xslt.stylesheet, 
317                             tinfo->profile_path);
318                     return ZEBRA_FAIL;
319                 }
320                 
321                 p->u.xslt.stylesheet_xsp
322                     = xsltParseStylesheetFile((const xmlChar*) 
323                                               tmp_xslt_full_name);
324                 if (!p->u.xslt.stylesheet_xsp)
325                 {
326                     dom_log(YLOG_WARN, tinfo, 0,
327                             "could not parse xslt stylesheet %s",
328                             tmp_xslt_full_name);
329                     return ZEBRA_FAIL;
330                 }
331             }
332             else
333             {
334                 dom_log(YLOG_WARN, tinfo, ptr,
335                         "missing attribute 'stylesheet'");
336                 return ZEBRA_FAIL;
337             }
338             *l = p;
339             l = &p->next;
340         }
341         else if (!XML_STRCMP(ptr->name, "process-meta"))
342         {
343             struct _xmlAttr *attr;
344             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
345             
346             p->next = 0;
347             p->which = convert_meta_type;
348             
349             for (attr = ptr->properties; attr; attr = attr->next)
350                 dom_log(YLOG_WARN, tinfo, ptr,
351                         "bad attribute @%s", attr->name);
352             *l = p;
353             l = &p->next;
354         }
355         else
356         {
357             dom_log(YLOG_WARN, tinfo, ptr,
358                     "bad element '%s', expected <xslt>", ptr->name);
359             return ZEBRA_FAIL;
360         }
361     }
362     return ZEBRA_OK;
363 }
364
365 static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, 
366                         struct recRetrieveCtrl *retctr)
367 {
368
369     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
370         0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
371     {
372         if (0 == XML_STRCMP(node->name, "meta"))
373         {
374             const char *element_set_name = 0;
375             
376             struct _xmlAttr *attr;      
377             for (attr = node->properties; attr; attr = attr->next)
378             {
379                 if (attr_content(attr, "name", &element_set_name))
380                     ;
381                 else
382                 {
383                     dom_log(YLOG_WARN, tinfo, node,
384                             "bad attribute @%s, expected @name", attr->name);
385                 }
386             }
387             if (element_set_name)
388             {
389                 WRBUF result = wrbuf_alloc();
390                 WRBUF addinfo = wrbuf_alloc();
391                 const Odr_oid *input_format = yaz_oid_recsyn_xml;
392                 const Odr_oid *output_format = 0;
393                 int ret;
394                 
395                 ret = retctr->special_fetch(retctr->handle,
396                                             element_set_name,
397                                             input_format, &output_format,
398                                             result, addinfo);
399                 if (ret == 0)
400                 {
401                     xmlDocPtr sub_doc = 
402                         xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
403                     if (sub_doc)
404                     {
405                         xmlNodePtr t = xmlDocGetRootElement(sub_doc);
406                         xmlReplaceNode(node, xmlCopyNode(t, 1));
407                         xmlFreeDoc(sub_doc);
408                     }
409                 }
410                 wrbuf_destroy(result);
411                 wrbuf_destroy(addinfo);
412             }
413         }
414     }
415     for (node = node->children; node; node = node->next)
416         process_meta(tinfo, doc, node, retctr);
417     return 0;
418 }
419
420 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
421                                  struct recExtractCtrl *extctr,
422                                  struct recRetrieveCtrl *retctr,
423                                  struct convert_s *convert,
424                                  const char **params,
425                                  xmlDocPtr *doc,
426                                  xsltStylesheetPtr *last_xsp)
427 {
428     for (; convert; convert = convert->next)
429     {
430         if (convert->which == convert_xslt_type)
431         {
432             xmlChar *buf_out = 0;
433             int len_out = 0;
434             xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
435                                                     *doc, params);
436             if (last_xsp)
437                 *last_xsp = convert->u.xslt.stylesheet_xsp;
438             
439             if (!res_doc)
440                 break;
441             
442             /* now saving into buffer and re-reading into DOM to avoid annoing
443                XSLT problem with thrown-out indentation text nodes */
444             xsltSaveResultToString(&buf_out, &len_out, res_doc,
445                                    convert->u.xslt.stylesheet_xsp); 
446             xmlFreeDoc(res_doc);
447             
448             xmlFreeDoc(*doc);
449             
450             *doc = xmlParseMemory((const char *) buf_out, len_out);
451             
452             /* writing debug info out */
453             if (extctr && extctr->flagShowRecords)
454                 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
455                         tinfo->fname ? tinfo->fname : "(none)", 
456                         convert->u.xslt.stylesheet,
457                         len_out, buf_out);
458             
459             xmlFree(buf_out);
460         }
461         else if (convert->which == convert_meta_type)
462         {
463             if (retctr) /* only execute meta on retrieval */
464             {
465                 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
466
467                 /* last stylesheet absent */
468                 if (last_xsp)
469                     *last_xsp = 0;
470             }
471         }
472     }
473     return ZEBRA_OK;
474 }
475
476 static struct filter_input *new_input(struct filter_info *tinfo, int type)
477 {
478     struct filter_input *p;
479     struct filter_input **np = &tinfo->input_list;
480     for (;*np; np = &(*np)->next)
481         ;
482     p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
483     p->next = 0;
484     p->syntax = 0;
485     p->name = 0;
486     p->convert = 0;
487     p->type = type;
488     return p;
489 }
490
491 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
492                              const char *syntax, const char *name)
493 {
494     FOR_EACH_ELEMENT(ptr) {
495         if (!XML_STRCMP(ptr->name, "marc"))
496         {
497             yaz_iconv_t iconv = 0;
498             const char *input_charset = "marc-8";
499             struct _xmlAttr *attr;
500             
501             for (attr = ptr->properties; attr; attr = attr->next)
502             {
503                 if (attr_content(attr, "inputcharset", &input_charset))
504                     ;
505                 else
506                 {
507                     dom_log(YLOG_WARN, tinfo, ptr,
508                             "bad attribute @%s, expected @inputcharset",
509                             attr->name);
510                 }
511             }
512             iconv = yaz_iconv_open("utf-8", input_charset);
513             if (!iconv)
514             {
515                 dom_log(YLOG_WARN, tinfo, ptr, 
516                         "unsupported @charset '%s'", input_charset);
517                 return ZEBRA_FAIL;
518             }
519             else
520             {
521                 struct filter_input *p 
522                     = new_input(tinfo, DOM_INPUT_MARC);
523                 p->u.marc.handle = yaz_marc_create();
524                 p->u.marc.iconv = iconv;
525                 
526                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
527                 
528                 ptr = ptr->next;
529                 
530                 parse_convert(tinfo, ptr, &p->convert);
531             }
532             break;
533
534         }
535         else if (!XML_STRCMP(ptr->name, "xmlreader"))
536         {
537             struct filter_input *p 
538                 = new_input(tinfo, DOM_INPUT_XMLREADER);
539             struct _xmlAttr *attr;
540             const char *level_str = 0;
541
542             p->u.xmlreader.split_level = 0;
543             p->u.xmlreader.reader = 0;
544
545             for (attr = ptr->properties; attr; attr = attr->next)
546             {
547                 if (attr_content(attr, "level", &level_str))
548                     ;
549                 else
550                 {
551                     dom_log(YLOG_WARN, tinfo, ptr,
552                             "bad attribute @%s, expected @level",
553                             attr->name);
554                 }
555             }
556             if (level_str)
557                 p->u.xmlreader.split_level = atoi(level_str);
558                 
559             ptr = ptr->next;
560
561             parse_convert(tinfo, ptr, &p->convert);
562             break;
563         }
564         else
565         {
566             dom_log(YLOG_WARN, tinfo, ptr,
567                     "bad element <%s>, expected <marc>|<xmlreader>",
568                     ptr->name);
569             return ZEBRA_FAIL;
570         }
571     }
572     return ZEBRA_OK;
573 }
574
575 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
576 {
577     char tmp_full_name[1024];
578     xmlNodePtr ptr;
579     xmlDocPtr doc;
580
581     tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
582     
583     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
584                              NULL, tmp_full_name))
585         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
586     else
587         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
588     
589     yaz_log(YLOG_LOG, "%s dom filter: "
590             "loading config file %s", tinfo->fname, tinfo->full_name);
591
592     doc = xmlParseFile(tinfo->full_name);
593     if (!doc)
594     {
595         yaz_log(YLOG_WARN, "%s: dom filter: "
596                 "failed to parse config file %s",
597                 tinfo->fname, tinfo->full_name);
598         return ZEBRA_FAIL;
599     }
600     /* save because we store ptrs to the content */ 
601     tinfo->doc_config = doc;
602     
603     ptr = xmlDocGetRootElement(doc);
604     if (!ptr || ptr->type != XML_ELEMENT_NODE 
605         || XML_STRCMP(ptr->name, "dom"))
606     {
607         dom_log(YLOG_WARN, tinfo, ptr,
608                 "bad root element <%s>, expected root element <dom>", 
609                 ptr->name);  
610         return ZEBRA_FAIL;
611     }
612
613     ptr = ptr->children;
614     FOR_EACH_ELEMENT(ptr) {
615         if (!XML_STRCMP(ptr->name, "extract"))
616         {
617             /*
618               <extract name="index">
619               <xslt stylesheet="first.xsl"/>
620               <xslt stylesheet="second.xsl"/>
621               </extract>
622             */
623             struct _xmlAttr *attr;
624             struct filter_extract *f =
625                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
626             
627             tinfo->extract = f;
628             f->name = 0;
629             f->convert = 0;
630             for (attr = ptr->properties; attr; attr = attr->next)
631             {
632                 if (attr_content(attr, "name", &f->name))
633                     ;
634                 else
635                 {
636                     dom_log(YLOG_WARN, tinfo, ptr,
637                             "bad attribute @%s, expected @name",
638                             attr->name);
639                 }
640             }
641             parse_convert(tinfo, ptr->children, &f->convert);
642         }
643         else if (!XML_STRCMP(ptr->name, "retrieve"))
644         {  
645             /* 
646                <retrieve name="F">
647                <xslt stylesheet="some.xsl"/>
648                <xslt stylesheet="some.xsl"/>
649                </retrieve>
650             */
651             struct _xmlAttr *attr;
652             struct filter_retrieve **fp = &tinfo->retrieve_list;
653             struct filter_retrieve *f =
654                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
655             
656             while (*fp)
657                 fp = &(*fp)->next;
658
659             *fp = f;
660             f->name = 0;
661             f->identifier = 0;
662             f->convert = 0;
663             f->next = 0;
664
665             for (attr = ptr->properties; attr; attr = attr->next)
666             {
667                 if (attr_content(attr, "identifier", 
668                                  &f->identifier))
669                     ;
670                 else if (attr_content(attr, "name", &f->name))
671                     ;
672                 else
673                 {
674                     dom_log(YLOG_WARN, tinfo, ptr,
675                             "bad attribute @%s,  expected @identifier|@name",
676                             attr->name);
677                 }
678             }
679             parse_convert(tinfo, ptr->children, &f->convert);
680         }
681         else if (!XML_STRCMP(ptr->name, "store"))
682         {
683             /*
684               <store name="F">
685               <xslt stylesheet="some.xsl"/>
686               <xslt stylesheet="some.xsl"/>
687               </retrieve>
688             */
689             struct filter_store *f =
690                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
691             
692             tinfo->store = f;
693             f->convert = 0;
694             parse_convert(tinfo, ptr->children, &f->convert);
695         }
696         else if (!XML_STRCMP(ptr->name, "input"))
697         {
698             /*
699               <input syntax="xml">
700               <xmlreader level="1"/>
701               </input>
702               <input syntax="usmarc">
703               <marc inputcharset="marc-8"/>
704               </input>
705             */
706             struct _xmlAttr *attr;
707             const char  *syntax = 0;
708             const char *name = 0;
709             for (attr = ptr->properties; attr; attr = attr->next)
710             {
711                 if (attr_content(attr, "syntax", &syntax))
712                     ;
713                 else if (attr_content(attr, "name", &name))
714                     ;
715                 else
716                 {
717                     dom_log(YLOG_WARN, tinfo, ptr,
718                             "bad attribute @%s,  expected @syntax|@name",
719                             attr->name);
720                 }
721             }
722             parse_input(tinfo, ptr->children, syntax, name);
723         }
724         else
725         {
726             dom_log(YLOG_WARN, tinfo, ptr,
727                     "bad element <%s>, "
728                     "expected <extract>|<input>|<retrieve>|<store>",
729                     ptr->name);
730             return ZEBRA_FAIL;
731         }
732     }
733     if (!tinfo->input_list)
734     {
735         struct filter_input *p 
736             = new_input(tinfo, DOM_INPUT_XMLREADER);
737         p->u.xmlreader.split_level = 0;
738         p->u.xmlreader.reader = 0;
739     }
740     return ZEBRA_OK;
741 }
742
743 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
744                                                const char *est)
745 {
746     struct filter_retrieve *f = tinfo->retrieve_list;
747
748     /* return first schema if no est is provided */
749     if (!est)
750         return f;
751     for (; f; f = f->next)
752     { 
753         /* find requested schema */
754         if (est) 
755         {    
756             if (f->identifier && !strcmp(f->identifier, est))
757                 return f;
758             if (f->name && !strcmp(f->name, est))
759                 return f;
760         } 
761     }
762     return 0;
763 }
764
765 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
766 {
767     struct filter_info *tinfo = clientData;
768     if (!args || !*args)
769     {
770         yaz_log(YLOG_WARN, "dom filter: need config file");
771         return ZEBRA_FAIL;
772     }
773
774     if (tinfo->fname && !strcmp(args, tinfo->fname))
775         return ZEBRA_OK;
776     
777     tinfo->profile_path = res_get(res, "profilePath");
778
779     destroy_dom(tinfo);
780     return parse_dom(tinfo, args);
781 }
782
783 static void filter_destroy(void *clientData)
784 {
785     struct filter_info *tinfo = clientData;
786     destroy_dom(tinfo);
787     nmem_destroy(tinfo->nmem_config);
788     nmem_destroy(tinfo->nmem_record);
789     xfree(tinfo);
790 }
791
792 static int ioread_ex(void *context, char *buffer, int len)
793 {
794     struct recExtractCtrl *p = context;
795     return p->stream->readf(p->stream, buffer, len);
796 }
797
798 static int ioclose_ex(void *context)
799 {
800     return 0;
801 }
802
803
804
805 /* DOM filter style indexing */
806 static void index_value_of(struct filter_info *tinfo, 
807                            struct recExtractCtrl *extctr,
808                            RecWord* recword, 
809                            xmlNodePtr node, 
810                            const char *index_p)
811 {
812     if (tinfo->record_info_invoked == 1)
813     {
814         xmlChar *text = xmlNodeGetContent(node);
815         size_t text_len = strlen((const char *)text);
816         
817         /* if there is no text, we do not need to proceed */
818         if (text_len)
819         {            
820             /* keep seqno base so that all text will have
821                identical seqno's for multiple fields , e.g
822                <z:index name="title:w any:w title:p">.. */
823             
824             zint seqno_base = recword->seqno;
825             zint seqno_max = recword->seqno;
826        
827
828             const char *look = index_p;
829             const char *bval;
830             const char *eval;
831
832             xmlChar index[256];
833             xmlChar type[256];
834
835             /* assingning text to be indexed */
836             recword->term_buf = (const char *)text;
837             recword->term_len = text_len;
838
839             /* parsing all index name/type pairs */
840             /* may not start with ' ' or ':' */
841             while (*look && ' ' != *look && ':' != *look)
842             {
843                 /* setting name and type to zero */
844                 *index = '\0';
845                 *type = '\0';
846     
847                 /* parsing one index name */
848                 bval = look;
849                 while (*look && ':' != *look && ' ' != *look)
850                 {
851                     look++;
852                 }
853                 eval = look;
854                 strncpy((char *)index, (const char *)bval, eval - bval);
855                 index[eval - bval] = '\0';
856     
857     
858                 /* parsing one index type, if existing */
859                 if (':' == *look)
860                 {
861                     look++;
862       
863                     bval = look;
864                     while (*look && ' ' != *look)
865                     {
866                         look++;
867                     }
868                     eval = look;
869                     strncpy((char *)type, (const char *)bval, eval - bval);
870                     type[eval - bval] = '\0';
871                 }
872
873                 /* actually indexing the text given */
874
875                 recword->seqno = seqno_base;
876                 recword->index_name = (const char *)index;
877                 if (*type)
878                     recword->index_type = (const char *) type;
879
880                 /* writing debug out */
881                 if (extctr->flagShowRecords)
882                     dom_log(YLOG_LOG, tinfo, 0, 
883                             "INDEX '%s:%s' '%s'", 
884                             (const char *) index,
885                             (const char *) type, 
886                             (const char *) text);
887                 
888                 (extctr->tokenAdd)(recword);
889
890                 if (seqno_max < recword->seqno)
891                     seqno_max = recword->seqno;
892
893                 /* eat whitespaces */
894                 if (*look && ' ' == *look)
895                 {
896                     look++;
897                 } 
898             }
899             recword->seqno = seqno_max;
900         }
901         xmlFree(text); 
902     }
903 }
904
905
906 /* DOM filter style indexing */
907 static void set_record_info(struct filter_info *tinfo, 
908                             struct recExtractCtrl *extctr, 
909                             xmlNodePtr node, 
910                             const char * id_p, 
911                             const char * rank_p, 
912                             const char * type_p)
913 {
914     /* writing debug info out */
915     if (extctr && extctr->flagShowRecords)
916         dom_log(YLOG_LOG, tinfo, node,
917                 "RECORD id=%s rank=%s type=%s", 
918                 id_p ? (const char *) id_p : "(null)",
919                 rank_p ? (const char *) rank_p : "(null)",
920                 type_p ? (const char *) type_p : "(null)");
921     
922
923     if (id_p && *id_p)
924     {
925         size_t l = strlen(id_p);
926         if (l >= sizeof(extctr->match_criteria))
927             l = sizeof(extctr->match_criteria)-1;
928         memcpy(extctr->match_criteria, id_p, l);
929         extctr->match_criteria[l] = '\0';
930     }
931
932     if (rank_p && *rank_p)
933         extctr->staticrank = atozint((const char *)rank_p);
934
935     if (type_p && *type_p)
936     {
937         enum zebra_recctrl_action_t action = action_update;
938         if (!strcmp(type_p, "insert"))
939             action = action_insert;
940         else if (!strcmp(type_p, "delete"))
941             action = action_delete;
942         else if (!strcmp(type_p, "replace"))
943             action = action_replace;
944         else if (!strcmp(type_p, "update"))
945             action = action_update;
946         else
947             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
948         extctr->action = action;
949     }
950
951     if (tinfo->record_info_invoked == 1)
952     {
953         /* warn about multiple only once */
954         dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
955     }
956     tinfo->record_info_invoked++;
957
958 }
959
960
961 /* DOM filter style indexing */
962 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
963                                            struct recExtractCtrl *extctr, 
964                                            RecWord* recword, 
965                                            xmlNodePtr node)
966 {
967     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
968         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
969     {
970         if (0 == XML_STRCMP(node->name, "index"))
971         {
972             const char *index_p = 0;
973
974             struct _xmlAttr *attr;      
975             for (attr = node->properties; attr; attr = attr->next)
976             {
977                 if (attr_content(attr, "name", &index_p))
978                 {
979                     index_value_of(tinfo, extctr, recword, node, index_p);
980                 }  
981                 else
982                 {
983                     dom_log(YLOG_WARN, tinfo, node,
984                             "bad attribute @%s, expected @name",
985                             attr->name);
986                 }
987             }
988         }
989         else if (0 == XML_STRCMP(node->name, "record"))
990         {
991             const char *id_p = 0;
992             const char *rank_p = 0;
993             const char *type_p = 0;
994
995             struct _xmlAttr *attr;
996             for (attr = node->properties; attr; attr = attr->next)
997             {
998                 if (attr_content(attr, "id", &id_p))
999                     ;
1000                 else if (attr_content(attr, "rank", &rank_p))
1001                     ;
1002                 else if (attr_content(attr, "type", &type_p))
1003                     ;
1004                 else
1005                 {
1006                     dom_log(YLOG_WARN, tinfo, node,
1007                             "bad attribute @%s, expected @id|@rank|@type",
1008                             attr->name);
1009                 }
1010             }
1011             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
1012         } 
1013         else
1014         {
1015             dom_log(YLOG_WARN, tinfo, node,
1016                     "bad element <%s>,"
1017                     " expected <record>|<index> in namespace '%s'",
1018                     node->name, zebra_dom_ns);
1019         }
1020     }
1021 }
1022
1023 static int attr_content_pi(const char **c_ptr, const char *name,
1024                            char *value, size_t value_max)
1025 {
1026     size_t name_len = strlen(name);
1027     const char *look = *c_ptr;
1028     int ret = 0;
1029
1030     if (strlen(look) > name_len)
1031     {
1032         if (look[name_len] == '=' && !memcmp(look, name, name_len))
1033         {
1034             size_t i = 0;
1035             look += name_len+1;
1036             while (*look && ' ' != *look)
1037             {
1038                 if (i < value_max-1)
1039                     value[i++] = *look;
1040                 look++;
1041             }
1042             value[i] = '\0';
1043             ret = 1;
1044         }
1045     }
1046     *c_ptr = look;
1047     return ret;
1048 }
1049
1050 /* DOM filter style indexing */
1051 static void process_xml_pi_node(struct filter_info *tinfo, 
1052                                 struct recExtractCtrl *extctr, 
1053                                 xmlNodePtr node,
1054                                 const char **index_pp)
1055 {
1056     /* if right PI name, continue parsing PI */
1057     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1058     {
1059         xmlChar *pi_p =  node->content;
1060         const char *look = (const char *) node->content;
1061     
1062         /* parsing PI record instructions */
1063         if (0 == strncmp((const char *)look, "record", 6))
1064         {
1065             char id[256];
1066             char rank[256];
1067             char type[256];
1068             
1069             *id = '\0';
1070             *rank = '\0';
1071             *type = '\0';
1072             look += 6;
1073             for (;;)
1074             {
1075                 /* eat whitespace */
1076                 while (' ' == *look)
1077                     look++;
1078                 if (*look == '\0')
1079                     break;
1080                 if (attr_content_pi(&look, "id", id, sizeof(id)))
1081                     ;
1082                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1083                     ;
1084                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1085                     ;
1086                 else
1087                 {
1088                     dom_log(YLOG_WARN, tinfo, node,
1089                             "content '%s', can not parse '%s'",
1090                             pi_p, look);
1091                     break;
1092                 }
1093             }
1094             set_record_info(tinfo, extctr, node, id, rank, type);
1095         } 
1096         /* parsing index instruction */
1097         else if (0 == strncmp((const char *)look, "index", 5))
1098         {
1099             look += 5;
1100       
1101             /* eat whitespace */
1102             while (*look && ' ' == *look)
1103                 look++;
1104
1105             /* export index instructions to outside */
1106             *index_pp = look;
1107         } 
1108         else 
1109         {
1110             dom_log(YLOG_WARN, tinfo, node,
1111                     "content '%s', can not parse '%s'",
1112                     pi_p, look);
1113         }
1114     }
1115 }
1116
1117 /* DOM filter style indexing */
1118 static void process_xml_element_node(struct filter_info *tinfo, 
1119                                      struct recExtractCtrl *extctr, 
1120                                      RecWord* recword, 
1121                                      xmlNodePtr node)
1122 {
1123     /* remember indexing instruction from PI to next element node */
1124     const char *index_p = 0;
1125
1126     /* check if we are an element node in the special zebra namespace 
1127        and either set record data or index value-of node content*/
1128     process_xml_element_zebra_node(tinfo, extctr, recword, node);
1129   
1130     /* loop through kid nodes */
1131     for (node = node->children; node; node = node->next)
1132     {
1133         /* check and set PI record and index index instructions */
1134         if (node->type == XML_PI_NODE)
1135         {
1136             process_xml_pi_node(tinfo, extctr, node, &index_p);
1137         }
1138         else if (node->type == XML_ELEMENT_NODE)
1139         {
1140             /* if there was a PI index instruction before this element */
1141             if (index_p)
1142             {
1143                 index_value_of(tinfo, extctr, recword, node, index_p);
1144                 index_p = 0;
1145             }
1146             process_xml_element_node(tinfo, extctr, recword,node);
1147         }
1148         else
1149             continue;
1150     }
1151 }
1152
1153
1154 /* DOM filter style indexing */
1155 static void extract_dom_doc_node(struct filter_info *tinfo, 
1156                                  struct recExtractCtrl *extctr, 
1157                                  xmlDocPtr doc)
1158 {
1159     /* only need to do the initialization once, reuse recword for all terms */
1160     RecWord recword;
1161     (*extctr->init)(extctr, &recword);
1162
1163     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1164 }
1165
1166
1167 static int convert_extract_doc(struct filter_info *tinfo, 
1168                                struct filter_input *input,
1169                                struct recExtractCtrl *p, 
1170                                xmlDocPtr doc)
1171 {
1172     xmlChar *buf_out;
1173     int len_out;
1174     const char *params[10];
1175     xsltStylesheetPtr last_xsp = 0;
1176
1177     /* per default do not ingest record */
1178     tinfo->record_info_invoked = 0;
1179
1180     /* exit if empty document given */
1181     if (!doc)
1182         return RECCTRL_EXTRACT_SKIP;
1183
1184     /* we actuallu have a document which needs to be processed further */
1185     params[0] = 0;
1186     set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1187
1188     if (p && p->flagShowRecords)
1189     {
1190         xmlChar *buf_out;
1191         int len_out;
1192         xmlDocDumpMemory(doc, &buf_out, &len_out);
1193 #if 0 
1194         FILE *outf = fopen("extract.xml", "w");
1195         fwrite(buf_out, 1, len_out, outf);
1196         fclose(outf);
1197 #endif
1198         yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1199     }
1200
1201     if (p->setStoreData)
1202     {
1203         xmlDocPtr store_doc = 0;
1204
1205         /* input conversion */
1206         perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1207         
1208         if (tinfo->store)
1209         {
1210             /* store conversion */
1211             store_doc = xmlCopyDoc(doc, 1);
1212             perform_convert(tinfo, p, 0, tinfo->store->convert,
1213                             params, &store_doc, &last_xsp);
1214         }
1215         
1216         /* saving either store doc or original doc in case no store doc exists */
1217         if (last_xsp)
1218             xsltSaveResultToString(&buf_out, &len_out, 
1219                                    store_doc ? store_doc : doc, last_xsp);
1220         else
1221             xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1222         
1223         if (p->setStoreData)
1224             (*p->setStoreData)(p, buf_out, len_out);
1225         xmlFree(buf_out);
1226         if (store_doc)
1227             xmlFreeDoc(store_doc);
1228     }
1229
1230
1231     /* extract conversion */
1232     perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1233
1234
1235     /* finally, do the indexing */
1236     if (doc){
1237         extract_dom_doc_node(tinfo, p, doc);
1238         xmlFreeDoc(doc);
1239     }
1240     
1241     /* there was nothing to index, so there is no inserted/updated record */
1242     if (tinfo->record_info_invoked == 0)
1243         return RECCTRL_EXTRACT_SKIP;
1244
1245     return RECCTRL_EXTRACT_OK;
1246 }
1247
1248 static int extract_xml_split(struct filter_info *tinfo,
1249                              struct filter_input *input,
1250                              struct recExtractCtrl *p)
1251 {
1252     int ret;
1253
1254     if (p->first_record)
1255     {
1256         if (input->u.xmlreader.reader)
1257             xmlFreeTextReader(input->u.xmlreader.reader);
1258         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1259                                                    p /* I/O handler */,
1260                                                    0 /* URL */, 
1261                                                    0 /* encoding */,
1262                                                    XML_PARSE_XINCLUDE
1263                                                    | XML_PARSE_NOENT
1264                                                    | XML_PARSE_NONET);
1265     }
1266     if (!input->u.xmlreader.reader)
1267         return RECCTRL_EXTRACT_ERROR_GENERIC;
1268
1269     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1270     while (ret == 1)
1271     {
1272         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1273         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1274
1275         if (type == XML_READER_TYPE_ELEMENT && 
1276             input->u.xmlreader.split_level == depth)
1277         {
1278             xmlNodePtr ptr;
1279
1280             /* per default do not ingest record */
1281             tinfo->record_info_invoked = 0;
1282             
1283             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1284             if (ptr)
1285             {
1286                 /* we have a new document */
1287
1288                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1289                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1290                 
1291                 xmlDocSetRootElement(doc, ptr2);
1292                 
1293                 /* writing debug info out */
1294                 if (p->flagShowRecords)
1295                 {
1296                     xmlChar *buf_out = 0;
1297                     int len_out = 0;
1298                     xmlDocDumpMemory(doc, &buf_out, &len_out);
1299                     yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
1300                             tinfo->fname ? tinfo->fname : "(none)",
1301                             depth, len_out, buf_out); 
1302                     xmlFree(buf_out);
1303                 }
1304                 
1305                 return convert_extract_doc(tinfo, input, p, doc);
1306             }
1307             else
1308             {
1309                 xmlFreeTextReader(input->u.xmlreader.reader);
1310                 input->u.xmlreader.reader = 0;
1311                 return RECCTRL_EXTRACT_ERROR_GENERIC;
1312             }
1313         }
1314         ret = xmlTextReaderRead(input->u.xmlreader.reader);
1315     }
1316     xmlFreeTextReader(input->u.xmlreader.reader);
1317     input->u.xmlreader.reader = 0;
1318     return RECCTRL_EXTRACT_EOF;
1319 }
1320
1321 static int extract_xml_full(struct filter_info *tinfo, 
1322                             struct filter_input *input,
1323                             struct recExtractCtrl *p)
1324 {
1325     if (p->first_record) /* only one record per stream */
1326     {
1327         xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1328                                   p /* I/O handler */,
1329                                   0 /* URL */,
1330                                   0 /* encoding */,
1331                                   XML_PARSE_XINCLUDE
1332                                   | XML_PARSE_NOENT
1333                                   | XML_PARSE_NONET);
1334         if (!doc)
1335         {
1336             return RECCTRL_EXTRACT_ERROR_GENERIC;
1337         }
1338         return convert_extract_doc(tinfo, input, p, doc);
1339     }
1340     else
1341         return RECCTRL_EXTRACT_EOF;
1342 }
1343
1344 static int extract_iso2709(struct filter_info *tinfo,
1345                            struct filter_input *input,
1346                            struct recExtractCtrl *p)
1347 {
1348     char buf[100000];
1349     int record_length;
1350     int read_bytes, r;
1351
1352     if (p->stream->readf(p->stream, buf, 5) != 5)
1353         return RECCTRL_EXTRACT_EOF;
1354     while (*buf < '0' || *buf > '9')
1355     {
1356         int i;
1357
1358         dom_log(YLOG_WARN, tinfo, 0,
1359                 "MARC: Skipping bad byte %d (0x%02X)",
1360                 *buf & 0xff, *buf & 0xff);
1361         for (i = 0; i<4; i++)
1362             buf[i] = buf[i+1];
1363
1364         if (p->stream->readf(p->stream, buf+4, 1) != 1)
1365             return RECCTRL_EXTRACT_EOF;
1366     }
1367     record_length = atoi_n (buf, 5);
1368     if (record_length < 25)
1369     {
1370         dom_log(YLOG_WARN, tinfo, 0,
1371                 "MARC record length < 25, is %d",  record_length);
1372         return RECCTRL_EXTRACT_ERROR_GENERIC;
1373     }
1374     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1375     if (read_bytes < record_length-5)
1376     {
1377         dom_log(YLOG_WARN, tinfo, 0,
1378                 "couldn't read whole MARC record");
1379         return RECCTRL_EXTRACT_ERROR_GENERIC;
1380     }
1381     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1382     if (r < record_length)
1383     {
1384         dom_log (YLOG_WARN, tinfo, 0,
1385                  "parsing of MARC record failed r=%d length=%d",
1386                  r, record_length);
1387         return RECCTRL_EXTRACT_ERROR_GENERIC;
1388     }
1389     else
1390     {
1391         xmlDocPtr rdoc;
1392         xmlNode *root_ptr;
1393         yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 
1394                            "http://www.loc.gov/MARC21/slim", 0, 0);
1395         rdoc = xmlNewDoc((const xmlChar*) "1.0");
1396         xmlDocSetRootElement(rdoc, root_ptr);
1397         return convert_extract_doc(tinfo, input, p, rdoc);        
1398     }
1399     return RECCTRL_EXTRACT_OK;
1400 }
1401
1402 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1403 {
1404     struct filter_info *tinfo = clientData;
1405     struct filter_input *input = tinfo->input_list;
1406
1407     if (!input)
1408         return RECCTRL_EXTRACT_ERROR_GENERIC;
1409     
1410     nmem_reset(tinfo->nmem_record);
1411
1412     if (p->setStoreData == 0)
1413         return extract_xml_full(tinfo, input, p);
1414     switch(input->type)
1415     {
1416     case DOM_INPUT_XMLREADER:
1417         if (input->u.xmlreader.split_level == 0)
1418             return extract_xml_full(tinfo, input, p);
1419         else
1420             return extract_xml_split(tinfo, input, p);
1421         break;
1422     case DOM_INPUT_MARC:
1423         return extract_iso2709(tinfo, input, p);
1424     }
1425     return RECCTRL_EXTRACT_ERROR_GENERIC;
1426 }
1427
1428 static int ioread_ret(void *context, char *buffer, int len)
1429 {
1430     struct recRetrieveCtrl *p = context;
1431     int r = p->stream->readf(p->stream, buffer, len);
1432     return r;
1433 }
1434
1435 static int ioclose_ret(void *context)
1436 {
1437     return 0;
1438 }
1439
1440 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1441 {
1442     /* const char *esn = zebra_dom_ns; */
1443     const char *esn = 0;
1444     const char *params[32];
1445     struct filter_info *tinfo = clientData;
1446     xmlDocPtr doc;
1447     struct filter_retrieve *retrieve;
1448     xsltStylesheetPtr last_xsp = 0;
1449
1450     if (p->comp)
1451     {
1452         if (p->comp->which == Z_RecordComp_simple
1453             && p->comp->u.simple->which == Z_ElementSetNames_generic)
1454         {
1455             esn = p->comp->u.simple->u.generic;
1456         }
1457         else if (p->comp->which == Z_RecordComp_complex 
1458                  && p->comp->u.complex->generic->elementSpec
1459                  && p->comp->u.complex->generic->elementSpec->which ==
1460                  Z_ElementSpec_elementSetName)
1461         {
1462             esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1463         }
1464     }
1465     retrieve = lookup_retrieve(tinfo, esn);
1466     if (!retrieve)
1467     {
1468         p->diagnostic =
1469             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1470         p->addinfo = odr_strdup_null(p->odr, esn);
1471         return 0;
1472     }
1473
1474     params[0] = 0;
1475     set_param_int(params, "id", p->localno, p->odr->mem);
1476     if (p->fname)
1477         set_param_str(params, "filename", p->fname, p->odr->mem);
1478     if (p->staticrank >= 0)
1479         set_param_int(params, "rank", p->staticrank, p->odr->mem);
1480
1481     if (esn)
1482         set_param_str(params, "schema", esn, p->odr->mem);
1483     else
1484         if (retrieve->name)
1485             set_param_str(params, "schema", retrieve->name, p->odr->mem);
1486         else if (retrieve->identifier)
1487             set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1488         else
1489             set_param_str(params, "schema", "", p->odr->mem);
1490
1491     if (p->score >= 0)
1492         set_param_int(params, "score", p->score, p->odr->mem);
1493     set_param_int(params, "size", p->recordSize, p->odr->mem);
1494
1495     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1496                     0 /* URL */,
1497                     0 /* encoding */,
1498                     XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1499     if (!doc)
1500     {
1501         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1502         return 0;
1503     }
1504
1505     /* retrieve conversion */
1506     perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1507     if (!doc)
1508     {
1509         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1510     }
1511     else if (!p->input_format
1512              || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1513     {
1514         xmlChar *buf_out;
1515         int len_out;
1516
1517         if (last_xsp)
1518             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1519         else
1520             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1521
1522         p->output_format = yaz_oid_recsyn_xml;
1523         p->rec_len = len_out;
1524         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1525         memcpy(p->rec_buf, buf_out, p->rec_len);
1526         xmlFree(buf_out);
1527     }
1528     else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1529     {
1530         xmlChar *buf_out;
1531         int len_out;
1532
1533         if (last_xsp)
1534             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1535         else
1536             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1537         
1538         p->output_format = yaz_oid_recsyn_sutrs;
1539         p->rec_len = len_out;
1540         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1541         memcpy(p->rec_buf, buf_out, p->rec_len);
1542         
1543         xmlFree(buf_out);
1544     }
1545     else
1546     {
1547         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1548     }
1549     xmlFreeDoc(doc);
1550     return 0;
1551 }
1552
1553 static struct recType filter_type = {
1554     0,
1555     "dom",
1556     filter_init,
1557     filter_config,
1558     filter_destroy,
1559     filter_extract,
1560     filter_retrieve
1561 };
1562
1563 RecType
1564 #ifdef IDZEBRA_STATIC_DOM
1565 idzebra_filter_dom
1566 #else
1567 idzebra_filter
1568 #endif
1569
1570 [] = {
1571     &filter_type,
1572     0,
1573 };
1574 /*
1575  * Local variables:
1576  * c-basic-offset: 4
1577  * c-file-style: "Stroustrup"
1578  * indent-tabs-mode: nil
1579  * End:
1580  * vim: shiftwidth=4 tabstop=8 expandtab
1581  */
1582