1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 #include <idzebra/recgrs.h>
35 #include <yaz/xmalloc.h>
39 #define XML_CHUNK 1024
42 data1_node *d1_stack[256];
49 static void report_xml_error(XML_Parser parser)
51 zint line = XML_GetCurrentLineNumber(parser);
52 zint col = XML_GetCurrentColumnNumber(parser);
53 yaz_log (YLOG_WARN, ZINT_FORMAT ":" ZINT_FORMAT ":XML error: %s",
54 line, col, XML_ErrorString(XML_GetErrorCode(parser)));
57 static void cb_start (void *user, const char *el, const char **attr)
59 struct user_info *ui = (struct user_info*) user;
61 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
62 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
63 ui->d1_stack[ui->level-1]);
65 yaz_log (ui->loglevel, "cb_start %s", el);
68 static void cb_end (void *user, const char *el)
70 struct user_info *ui = (struct user_info*) user;
73 yaz_log (ui->loglevel, "cb_end %s", el);
76 static void cb_chardata (void *user, const char *s, int len)
78 struct user_info *ui = (struct user_info*) user;
80 yaz_log (ui->loglevel, "cb_chardata %.*s", len, s);
82 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
83 ui->d1_stack[ui->level -1]);
86 static void cb_decl (void *user, const char *version, const char *encoding,
89 struct user_info *ui = (struct user_info*) user;
90 const char *attr_list[7];
92 attr_list[0] = "version";
93 attr_list[1] = version;
95 attr_list[2] = "encoding";
96 attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */
98 attr_list[4] = "standalone";
99 attr_list[5] = standalone ? "yes" : "no";
103 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
104 ui->d1_stack[ui->level-1]);
106 yaz_log (YLOG_LOG, "decl version=%s encoding=%s",
107 version ? version : "null",
108 encoding ? encoding : "null");
112 static void cb_processing (void *user, const char *target,
115 struct user_info *ui = (struct user_info*) user;
117 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
118 ui->d1_stack[ui->level-1]);
119 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
121 yaz_log (ui->loglevel, "decl processing target=%s data=%s",
122 target ? target : "null",
123 data ? data : "null");
126 static void cb_comment (void *user, const char *data)
128 struct user_info *ui = (struct user_info*) user;
129 yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null");
130 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
133 static void cb_doctype_start (void *userData, const char *doctypeName,
134 const char *sysid, const char *pubid,
135 int has_internal_subset)
137 struct user_info *ui = (struct user_info*) userData;
138 yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s",
139 doctypeName, sysid, pubid);
142 static void cb_doctype_end (void *userData)
144 struct user_info *ui = (struct user_info*) userData;
145 yaz_log (ui->loglevel, "doctype end");
149 static void cb_entity_decl (void *userData, const char *entityName,
150 int is_parameter_entity,
151 const char *value, int value_length,
152 const char *base, const char *systemId,
153 const char *publicId, const char *notationName)
155 struct user_info *ui = (struct user_info*) userData;
156 yaz_log (ui->loglevel,
157 "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s"
158 " publicId=%s notationName=%s",
159 entityName, is_parameter_entity, value_length, value,
160 base, systemId, publicId, notationName);
164 static int cb_external_entity(XML_Parser pparser,
167 const char *systemId,
168 const char *publicId)
170 struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
175 yaz_log (ui->loglevel,
176 "external entity context=%s base=%s systemid=%s publicid=%s",
177 context, base, systemId, publicId);
181 if (!(inf = fopen (systemId, "rb")))
183 yaz_log (YLOG_WARN|YLOG_ERRNO, "fopen %s", systemId);
187 parser = XML_ExternalEntityParserCreate (pparser, "", 0);
191 void *buf = XML_GetBuffer (parser, XML_CHUNK);
194 yaz_log (YLOG_WARN, "XML_GetBuffer fail");
197 r = fread (buf, 1, XML_CHUNK, inf);
202 yaz_log (YLOG_WARN|YLOG_ERRNO, "fread %s", systemId);
207 if (!XML_ParseBuffer (parser, r, done))
210 report_xml_error(parser);
214 XML_ParserFree (parser);
220 static int cb_encoding_convert (void *data, const char *s)
222 iconv_t t = (iconv_t) data;
225 char outbuf_[2], *outbuf = outbuf_;
227 char *inbuf = (char *) s;
231 yaz_log(YLOG_LOG, "------------------------- cb_encoding_convert --- ");
233 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
234 if (ret == (size_t) (-1) && errno != E2BIG)
236 iconv (t, 0, 0, 0, 0);
241 memcpy (&code, outbuf_, sizeof(short));
245 static void cb_encoding_release (void *data)
247 iconv_t t = (iconv_t) data;
251 static int cb_encoding_handler (void *userData, const char *name,
256 struct user_info *ui = (struct user_info*) userData;
258 iconv_t t = iconv_open ("UNICODE", name);
259 if (t == (iconv_t) (-1))
262 info->data = 0; /* signal that multibyte is not in use */
263 yaz_log (ui->loglevel, "Encoding handler of %s", name);
264 for (i = 0; i<256; i++)
269 char *inbuf = inbuf_;
270 char *outbuf = outbuf_;
275 iconv (t, 0, 0, 0, 0); /* reset iconv */
277 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
278 if (ret == (size_t) (-1))
282 yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i);
283 info->map[i] = -1; /* invalid sequence */
286 { /* multi byte input */
303 assert (i >= 0 && i<255);
306 for (k = 0; k<len; k++)
308 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
310 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
311 if (ret == (size_t) (-1))
313 if (errno == EILSEQ || errno == E2BIG)
319 else if (errno == EINVAL)
325 else if (outleft == 0)
328 info->data = t; /* signal that multibyte is in use */
336 if (info->map[i] < -1)
337 yaz_log (ui->loglevel, "Encoding %d: multibyte input %d",
340 yaz_log (ui->loglevel, "Encoding %d: multibyte input failed",
345 info->map[i] = -1; /* no room for output */
347 yaz_log (YLOG_WARN, "Encoding %d: no room for output",
351 else if (outleft == 0)
354 memcpy (&code, outbuf_, sizeof(short));
359 { /* should never happen */
361 yaz_log (YLOG_DEBUG, "Encoding %d: bad state", i);
365 { /* at least one multi byte */
366 info->convert = cb_encoding_convert;
367 info->release = cb_encoding_release;
371 /* no multi byte - we no longer need iconv handler */
383 static void cb_ns_start(void *userData, const char *prefix, const char *uri)
385 struct user_info *ui = (struct user_info*) userData;
387 yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri);
390 static void cb_ns_end(void *userData, const char *prefix)
392 struct user_info *ui = (struct user_info*) userData;
394 yaz_log(ui->loglevel, "cb_ns_end %s", prefix);
397 data1_node *zebra_read_xml(data1_handle dh,
398 struct ZebraRecStream *stream,
402 struct user_info uinfo;
404 data1_node *first_node;
407 uinfo.loglevel = YLOG_DEBUG;
411 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
412 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
414 parser = XML_ParserCreate (0 /* encoding */);
416 XML_SetElementHandler (parser, cb_start, cb_end);
417 XML_SetCharacterDataHandler (parser, cb_chardata);
418 XML_SetXmlDeclHandler (parser, cb_decl);
419 XML_SetProcessingInstructionHandler (parser, cb_processing);
420 XML_SetUserData (parser, &uinfo);
421 XML_SetCommentHandler (parser, cb_comment);
422 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
423 XML_SetEntityDeclHandler (parser, cb_entity_decl);
424 XML_SetExternalEntityRefHandler (parser, cb_external_entity);
425 XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end);
427 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo);
432 void *buf = XML_GetBuffer (parser, XML_CHUNK);
436 yaz_log (YLOG_WARN, "XML_GetBuffer fail");
439 r = stream->readf(stream, buf, XML_CHUNK);
443 yaz_log (YLOG_WARN, "XML read fail");
450 if (no_read && !XML_ParseBuffer (parser, r, done))
453 report_xml_error(parser);
456 XML_ParserFree (parser);
459 if (!uinfo.d1_stack[1] || !done)
461 /* insert XML header if not present .. */
462 first_node = uinfo.d1_stack[0]->child;
463 if (first_node->which != DATA1N_preprocess ||
464 strcmp(first_node->u.preprocess.target, "xml"))
466 const char *attr_list[5];
468 attr_list[0] = "version";
469 attr_list[1] = "1.0";
471 attr_list[2] = "encoding";
472 attr_list[3] = "UTF-8"; /* encoding */
476 data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list,
479 return uinfo.d1_stack[0];
483 XML_Expat_Version expat_version;
486 static data1_node *grs_read_xml(struct grs_read_info *p)
488 return zebra_read_xml(p->dh, p->stream, p->mem);
491 static void *filter_init(Res res, RecType recType)
493 struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p));
495 p->expat_version = XML_ExpatVersionInfo();
500 static void filter_destroy(void *clientData)
502 struct xml_info *p = (struct xml_info *) clientData;
507 static int filter_extract(void *clientData, struct recExtractCtrl *ctrl)
509 return zebra_grs_extract(clientData, ctrl, grs_read_xml);
512 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *ctrl)
514 return zebra_grs_retrieve(clientData, ctrl, grs_read_xml);
517 static struct recType filter_type = {
528 #ifdef IDZEBRA_STATIC_GRS_XML
529 idzebra_filter_grs_xml
544 * c-file-style: "Stroustrup"
545 * indent-tabs-mode: nil
547 * vim: shiftwidth=4 tabstop=8 expandtab