From: Adam Dickmeiss Date: Wed, 19 Apr 2006 10:05:02 +0000 (+0000) Subject: New facilities for the MARC module. The reading - and writing of X-Git-Tag: YAZ.2.1.18~35 X-Git-Url: http://sru.miketaylor.org.uk/?a=commitdiff_plain;h=626ee9b09c95c3585c432623c042872b0d1fea92;p=yaz-moved-to-github.git New facilities for the MARC module. The reading - and writing of content are separate methods for the yaz_marc_t handle. The following read functions are available: yaz_marc_read_iso2709 (Reads MARC in ISO2709 format), yaz_marc_read_xml (reads MARC in MARCXML/MarcXchange format). Write functions have prefix yaz_marc_write_.. The existing utilities yaz_marc_decode_wrbuf and yaz_marc_decode_buf are still available. Removed support for OAI-MARC and simplexml (not the simplexml from PHP5). --- diff --git a/NEWS b/NEWS index 3597881..6781a21 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,16 @@ +New facilities for the MARC module. The reading - and writing of +content are separate methods for the yaz_marc_t handle. The following +read functions are available: yaz_marc_read_iso2709 (Reads MARC in +ISO2709 format), yaz_marc_read_xml (reads MARC in MARCXML/MarcXchange +format). Write functions have prefix yaz_marc_write_.. The existing +utilities yaz_marc_decode_wrbuf and yaz_marc_decode_buf are still +available. Removed support for OAI-MARC and simplexml (not the +simplexml from PHP5). + Added a new ZOOM event type ZOOM_EVENT_END which signals no more events to be returned for this connection. ---- 2.1.16 2006/03/31 +--- 2.1.16 2006/03/31 Allow multiple languages and charsets to be specified with yaz-client. Each item must be separated by comma (NO BLANKS). E.g. @@ -11,10 +20,10 @@ Translation of proximity nodes from CQL into PQF now works. Moved to automake 1.8, 1.9. -Added function yaz_log_set_handler which allows a log handler -to be installed. This handler will be called for all log messages. -Output to file is also produced; but that can be disabled by passing -NULL fname to yaz_log_init_file. +Added function yaz_log_set_handler which allows a log handler to be +installed. This handler will be called for all log messages. Output +to file is also produced; but that can be disabled by passing NULL +fname to yaz_log_init_file. Fixed another problem with MARC-8 -> ISO-8859-1 conversions. Bug #537. diff --git a/doc/yaz-marcdump-man.xml b/doc/yaz-marcdump-man.xml index e73d9be..257b0b1 100644 --- a/doc/yaz-marcdump-man.xml +++ b/doc/yaz-marcdump-man.xml @@ -1,5 +1,5 @@ - + yaz-marcdump @@ -18,7 +18,6 @@ - @@ -29,14 +28,26 @@ DESCRIPTION - yaz-marcdump reads ISO2709/MARC records from one or + yaz-marcdump reads MARC records from one or more files. - It validates each record and supports output in line-format, - MARCXML, OAIMARC as well as Hex output. + It parses each record and supports output in line-format, + ISO2709, MARCXML, MarcXchange as well as Hex output. - By default, each record is printed to standard output in a line + This utility parses records ISO2709(raw MARC) as well as XML + if that is structured as MARCXML/MarcXchange. + + + + As of YAZ 2.1.18, OAI-MARC is no longer supported. + OAI-MARC is deprecated. Use MARCXML instead. + + + + By default, each record is written to standard output in a line format with newline for each field, $x for each subfield x. + The output format may be changed with options -X, + -e, -I. yaz-marcdump can also be requested to perform @@ -50,16 +61,16 @@ -x - Print MARC records in a simple XML format. - This format is equivalent to YAZ_MARC_SIMPLEXML in - yaz/marcdisp.h. + Reads MARC records in MARCXML/MarcXchange format. Without + this option, yaz-marcdump reads records + in ISO2709 format. -X - Print MARC records in MARCXML. + Writes MARC records in MARCXML. This format is equivalent to YAZ_MARC_MARCXML in yaz/marcdisp.h. @@ -68,7 +79,7 @@ -e - Print MARC records in MarcXchange format. + Writes MARC records in MarcXchange format. This format is equivalent to YAZ_MARC_XCHANGE in yaz/marcdisp.h. @@ -77,22 +88,13 @@ -I - Print MARC records in ISO2709 format. + Writes MARC records in ISO2709 format. This format is equivalent to YAZ_MARC_ISO2709 in yaz/marcdisp.h. - -O - - Print MARC records in OAIMARC. Another XML variant. - This format is equivalent to YAZ_MARC_OAIMARC in - yaz/marcdisp.h. - - - - -ffrom] Specify the character set from @@ -113,7 +115,7 @@ -v - Print more information about the parsing process. + Writes more information about the parsing process. Useful if you have ill-formatted ISO2709 records as input. diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index 4c425f3..f6a35dc 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, @@ -23,12 +23,12 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: marcdisp.h,v 1.14 2005-06-25 15:46:03 adam Exp $ + * $Id: marcdisp.h,v 1.15 2006-04-19 10:05:02 adam Exp $ */ /** * \file marcdisp.h - * \brief Header for MARC display - and conversion utilities + * \brief MARC conversion */ #ifndef MARCDISP_H @@ -40,59 +40,102 @@ YAZ_BEGIN_CDECL +/** \brief a yaz_marc_t handle (private content) */ typedef struct yaz_marc_t_ *yaz_marc_t; -/* create handler */ +/** \brief construct yaz_marc_t handle */ YAZ_EXPORT yaz_marc_t yaz_marc_create(void); -/* destroy */ + +/** \brief destroy yaz_marc_t handle */ YAZ_EXPORT void yaz_marc_destroy(yaz_marc_t mt); -/* set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */ +/** \brief set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */ YAZ_EXPORT void yaz_marc_xml(yaz_marc_t mt, int xmlmode); + +/** \brief Output format: Line-format */ #define YAZ_MARC_LINE 0 +/** \brief Output format: simplexml (no longer supported) */ #define YAZ_MARC_SIMPLEXML 1 +/** \brief Output format: OAI-MARC (no longer supported) */ #define YAZ_MARC_OAIMARC 2 +/** \brief Output format: MARCXML */ #define YAZ_MARC_MARCXML 3 +/** \brief Output format: ISO2709 */ #define YAZ_MARC_ISO2709 4 +/** \brief Output format: MarcXchange */ #define YAZ_MARC_XCHANGE 5 -/* supply iconv handle for character set conversion .. */ +/** \brief supply iconv handle for character set conversion .. */ YAZ_EXPORT void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd); -/* set debug level, 0=none, 1=more, 2=even more, .. */ +/** \brief set debug level + \param mt handle + \param level level, where 0=lowest, 1 more debug, 2 even more +*/ YAZ_EXPORT void yaz_marc_debug(yaz_marc_t mt, int level); -/* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. - On success, result in *result with size *rsize. */ -YAZ_EXPORT int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize); - -/* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. - On success, result in WRBUF */ -YAZ_EXPORT int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, - int bsize, WRBUF wrbuf); - -/* old functions (depricated) */ -YAZ_EXPORT int marc_display (const char *buf, FILE *outf); -YAZ_EXPORT int marc_display_ex (const char *buf, FILE *outf, int debug); -YAZ_EXPORT int marc_display_exl (const char *buf, FILE *outf, int debug, - int length); -YAZ_EXPORT int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, - int bsize); +/** \brief decodes ISO2709 buffer using straight buffers + \param mt marc handle + \param buf input buffer + \param bsize size of buffer or (-1 if "any size") + \param result result to be stored here (allocate before use!) + \param rsize size of result (set before calling) + + decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. + On success, result in *result with size *rsize. + Returns -1 on error, size of input record (>0) if OK +*/ +YAZ_EXPORT int yaz_marc_decode_buf(yaz_marc_t mt, const char *buf, int bsize, + char **result, int *rsize); + +/** \brief decodes ISO2709/MAC buffer and stores result in WRBUF */ +YAZ_EXPORT int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, + int bsize, WRBUF wrbuf); + +/** \brief depricated */ +YAZ_EXPORT int marc_display(const char *buf, FILE *outf); +/** \brief depricated */ +YAZ_EXPORT int marc_display_ex(const char *buf, FILE *outf, int debug); +/** \brief depricated */ +YAZ_EXPORT int marc_display_exl(const char *buf, FILE *outf, int debug, + int length); +/** \brief depricated */ +YAZ_EXPORT int marc_display_wrbuf(const char *buf, WRBUF wr, int debug, + int bsize); +/** \brief depricated */ YAZ_EXPORT int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml); YAZ_EXPORT void yaz_marc_subfield_str(yaz_marc_t mt, const char *s); YAZ_EXPORT void yaz_marc_endline_str(yaz_marc_t mt, const char *s); -/* like atoi except that it reads exactly len characters */ -YAZ_EXPORT int atoi_n (const char *buf, int len); +/** \brief like atoi except that it reads exactly len characters */ +YAZ_EXPORT int atoi_n(const char *buf, int len); -/* MARC control characters */ +/** \brief MARC control char: record separator (29 Dec, 1D Hex) */ #define ISO2709_RS 035 +/** \brief MARC control char: field separator (30 Dec, 1E Hex) */ #define ISO2709_FS 036 +/** \brief MARC control char: identifier-field separator (31 Dec, 1F Hex) */ #define ISO2709_IDFS 037 +/** \brief read ISO2709/MARC record from buffer */ +YAZ_EXPORT int yaz_marc_read_iso2709(yaz_marc_t mt, + const char *buf, int bsize); +/** \brief read MARCXML record from buffer */ +YAZ_EXPORT int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode); + +/** \brief writes record in line format */ +YAZ_EXPORT int yaz_marc_write_line(yaz_marc_t mt, WRBUF wrbuf); +/** \brief writes record in MARCXML format */ +YAZ_EXPORT int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wrbuf); +/** \brief writes record in MarcXchange format */ +YAZ_EXPORT int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wrbuf); +/** \brief writes record in ISO2709 format */ +YAZ_EXPORT int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wrbuf); +/** \brief writes record in mode - given by yaz_marc_xml mode */ +YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr); + YAZ_END_CDECL #endif diff --git a/include/yaz/wrbuf.h b/include/yaz/wrbuf.h index 44ab87e..0999a45 100644 --- a/include/yaz/wrbuf.h +++ b/include/yaz/wrbuf.h @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: wrbuf.h,v 1.16 2005-09-27 17:52:46 adam Exp $ + * $Id: wrbuf.h,v 1.17 2006-04-19 10:05:02 adam Exp $ */ /** * \file wrbuf.h @@ -53,11 +53,17 @@ YAZ_EXPORT int wrbuf_write(WRBUF b, const char *buf, int size); YAZ_EXPORT int wrbuf_xmlputs_n(WRBUF b, const char *cp, int size); YAZ_EXPORT int wrbuf_puts(WRBUF b, const char *buf); YAZ_EXPORT int wrbuf_xmlputs(WRBUF b, const char *cp); -YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...); +YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...) +#ifdef __GNUC__ + __attribute__ ((format (printf, 2, 3))) +#endif + ; + YAZ_EXPORT int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size); YAZ_EXPORT int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size); +YAZ_EXPORT int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz); YAZ_EXPORT void wrbuf_chop_right(WRBUF b); diff --git a/src/marcdisp.c b/src/marcdisp.c index 6dba155..ea4b2bc 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,19 +1,24 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.25 2006-01-26 15:37:05 adam Exp $ + * $Id: marcdisp.c,v 1.26 2006-04-19 10:05:03 adam Exp $ */ /** * \file marcdisp.c - * \brief Implements MARC display - and conversion utilities + * \brief Implements MARC conversion utilities */ #if HAVE_CONFIG_H #include #endif +#ifdef WIN32 +#include +#endif + +#include #include #include #include @@ -21,13 +26,70 @@ #include #include +#if HAVE_XML2 +#include +#include +#endif + +static void yaz_marc_reset(yaz_marc_t mt); + +/** \brief node types for yaz_marc_node */ +enum YAZ_MARC_NODE_TYPE +{ + YAZ_MARC_DATAFIELD, + YAZ_MARC_CONTROLFIELD, + YAZ_MARC_COMMENT, + YAZ_MARC_LEADER +}; + +/** \brief represets a data field */ +struct yaz_marc_datafield { + char *tag; + char *indicator; + struct yaz_marc_subfield *subfields; +}; + +/** \brief represents a control field */ +struct yaz_marc_controlfield { + char *tag; + char *data; +}; + +/** \brief a comment node */ +struct yaz_marc_comment { + char *comment; +}; + +/** \brief MARC node */ +struct yaz_marc_node { + enum YAZ_MARC_NODE_TYPE which; + union { + struct yaz_marc_datafield datafield; + struct yaz_marc_controlfield controlfield; + char *comment; + char *leader; + } u; + struct yaz_marc_node *next; +}; + +/** \brief represents a subfield */ +struct yaz_marc_subfield { + char *code_data; + struct yaz_marc_subfield *next; +}; + +/** \brief the internals of a yaz_marc_t handle */ struct yaz_marc_t_ { WRBUF m_wr; + NMEM nmem; int xml; int debug; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; + struct yaz_marc_node *nodes; + struct yaz_marc_node **nodes_pp; + struct yaz_marc_subfield **subfield_pp; }; yaz_marc_t yaz_marc_create(void) @@ -39,9 +101,263 @@ yaz_marc_t yaz_marc_create(void) mt->iconv_cd = 0; strcpy(mt->subfield_str, " $"); strcpy(mt->endline_str, "\n"); + + mt->nmem = nmem_create(); + yaz_marc_reset(mt); return mt; } +void yaz_marc_destroy(yaz_marc_t mt) +{ + if (!mt) + return ; + nmem_destroy(mt->nmem); + wrbuf_free (mt->m_wr, 1); + xfree (mt); +} + +struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) +{ + struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); + n->next = 0; + *mt->nodes_pp = n; + mt->nodes_pp = &n->next; + return n; +} + +void yaz_marc_add_comment(yaz_marc_t mt, char *comment) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_COMMENT; + n->u.comment = nmem_strdup(mt->nmem, comment); +} + +#if HAVE_XML2 +static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem) +{ + char *cdata; + int len = 0; + const xmlNode *ptr; + + for (ptr = ptr_cdata; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + len += xmlStrlen(ptr->content); + cdata = (char *) nmem_malloc(nmem, len+1); + *cdata = '\0'; + for (ptr = ptr_cdata; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + strcat(cdata, (const char *) ptr->content); + return cdata; +} +#endif + +void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) +{ + va_list ap; + char buf[200]; + va_start(ap, fmt); + +#ifdef WIN32 + _vsnprintf(buf, sizeof(buf)-1, fmt, ap); +#else +/* !WIN32 */ +#if HAVE_VSNPRINTF + vsnprintf(buf, sizeof(buf), fmt, ap); +#else + vsprintf(buf, fmt, ap); +#endif +#endif +/* WIN32 */ + yaz_marc_add_comment(mt, buf); + va_end (ap); +} + +void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_LEADER; + n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len); +} + +void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, + const char *data, size_t data_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_strdup(mt->nmem, tag); + n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len); + if (mt->debug) + { + size_t i; + char msg[80]; + + sprintf(msg, "controlfield:"); + for (i = 0; i < 16 && i < data_len; i++) + sprintf(msg + strlen(msg), " %02X", data[i] & 0xff); + if (i < data_len) + sprintf(msg + strlen(msg), " .."); + yaz_marc_add_comment(mt, msg); + } +} + +#if HAVE_XML2 +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem); + n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem); +} +#endif + +void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, + const char *indicator, size_t indicator_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = nmem_strdup(mt->nmem, tag); + n->u.datafield.indicator = + nmem_strdupn(mt->nmem, indicator, indicator_len); + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; +} + +#if HAVE_XML2 +void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const char *indicator, size_t indicator_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem); + n->u.datafield.indicator = + nmem_strdupn(mt->nmem, indicator, indicator_len); + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; +} +#endif + +void yaz_marc_add_subfield(yaz_marc_t mt, + const char *code_data, size_t code_data_len) +{ + if (mt->debug) + { + size_t i; + char msg[80]; + + sprintf(msg, "subfield:"); + for (i = 0; i < 16 && i < code_data_len; i++) + sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff); + if (i < code_data_len) + sprintf(msg + strlen(msg), " .."); + yaz_marc_add_comment(mt, msg); + } + + if (mt->subfield_pp) + { + struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n)); + n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len); + n->next = 0; + /* mark subfield_pp to point to this one, so we append here next */ + *mt->subfield_pp = n; + mt->subfield_pp = &n->next; + } +} + +static int atoi_n_check(const char *buf, int size, int *val) +{ + if (!isdigit(*(const unsigned char *) buf)) + return 0; + *val = atoi_n(buf, size); + return 1; +} + +/** \brief reads the MARC 24 bytes leader and checks content + \param mt handle + \param leader of the 24 byte leader + \param indicator_length indicator length + \param identifier_length identifier length + \param base_address base address + \param length_data_entry length of data entry + \param length_starting length of starting + \param length_implementation length of implementation defined data +*/ +static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation) +{ + char leader[24]; + + memcpy(leader, leader_c, 24); + + if (!atoi_n_check(leader+10, 1, indicator_length)) + { + yaz_marc_cprintf(mt, + "Indicator length at offset 10 should hold a digit." + " Assuming 2"); + leader[10] = '2'; + *indicator_length = 2; + } + if (!atoi_n_check(leader+11, 1, identifier_length)) + { + yaz_marc_cprintf(mt, + "Identifier length at offset 11 should hold a digit." + " Assuming 2"); + leader[11] = '2'; + *identifier_length = 2; + } + if (!atoi_n_check(leader+12, 5, base_address)) + { + yaz_marc_cprintf(mt, + "Base address at offsets 12..16 should hold a number." + " Assuming 0"); + *base_address = 0; + } + if (!atoi_n_check(leader+20, 1, length_data_entry)) + { + yaz_marc_cprintf(mt, + "Length data entry at offset 20 should hold a digit." + " Assuming 4"); + *length_data_entry = 4; + leader[20] = '4'; + } + if (!atoi_n_check(leader+21, 1, length_starting)) + { + yaz_marc_cprintf(mt, + "Length starting at offset 21 should hold a digit." + " Assuming 5"); + *length_starting = 5; + leader[21] = '5'; + } + if (!atoi_n_check(leader+22, 1, length_implementation)) + { + yaz_marc_cprintf(mt, + "Length implementation at offset 22 should hold a digit." + " Assuming 0"); + *length_implementation = 0; + leader[22] = '0'; + } + + if (mt->debug) + { + yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length); + yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length); + yaz_marc_cprintf(mt, "Base address %5d", *base_address); + yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry); + yaz_marc_cprintf(mt, "Length starting %5d", *length_starting); + yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation); + } + yaz_marc_add_leader(mt, leader, 24); +} + void yaz_marc_subfield_str(yaz_marc_t mt, const char *s) { strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1); @@ -54,14 +370,6 @@ void yaz_marc_endline_str(yaz_marc_t mt, const char *s) mt->endline_str[sizeof(mt->endline_str)-1] = '\0'; } -void yaz_marc_destroy(yaz_marc_t mt) -{ - if (!mt) - return ; - wrbuf_free (mt->m_wr, 1); - xfree (mt); -} - static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) { if (mt->xml == YAZ_MARC_ISO2709) @@ -96,189 +404,584 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf) return 1; /* we don't know */ } -static int atoi_n_check(const char *buf, int size, int *val) +static void yaz_marc_reset(yaz_marc_t mt) { - if (!isdigit(*(const unsigned char *) buf)) - return 0; - *val = atoi_n(buf, size); - return 1; + nmem_reset(mt->nmem); + mt->nodes = 0; + mt->nodes_pp = &mt->nodes; + mt->subfield_pp = 0; } -int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) +int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { - int entry_p; - int record_length; - int indicator_length; + struct yaz_marc_node *n; int identifier_length; - int end_of_directory; - int base_address; - int length_data_entry; - int length_starting; - int length_implementation; - char lead[24]; - int produce_warnings = 0; + const char *leader = 0; - if (mt->debug) - produce_warnings = 1; - if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC - || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE) - produce_warnings = 1; + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; - record_length = atoi_n (buf, 5); - if (record_length < 25) + for (n = mt->nodes; n; n = n->next) { - if (mt->debug) - wrbuf_printf(wr, "\n", - record_length); - return -1; + struct yaz_marc_subfield *s; + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr, "%s %s", n->u.datafield.tag, + n->u.datafield.indicator); + for (s = n->u.datafield.subfields; s; s = s->next) + { + /* if identifier length is 2 (most MARCs), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + size_t using_code_len = + (identifier_length != 2) ? identifier_length - 1 + : + cdata_one_character(mt, s->code_data); + + wrbuf_puts (wr, mt->subfield_str); + wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, + using_code_len); + wrbuf_printf(wr, " "); + wrbuf_iconv_puts(wr, mt->iconv_cd, + s->code_data + using_code_len); + } + wrbuf_puts (wr, mt->endline_str); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, "%s ", n->u.controlfield.tag); + wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + wrbuf_puts (wr, mt->endline_str); + break; + case YAZ_MARC_COMMENT: + wrbuf_puts(wr, "("); + wrbuf_iconv_write(wr, mt->iconv_cd, + n->u.comment, strlen(n->u.comment)); + wrbuf_puts(wr, ")\n"); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, "%s\n", n->u.leader); + } } - memcpy(lead, buf, 24); /* se can modify the header for output */ + return 0; +} - /* ballout if bsize is known and record_length is less than that */ - if (bsize != -1 && record_length > bsize) +int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) +{ + switch(mt->xml) + { + case YAZ_MARC_LINE: + return yaz_marc_write_line(mt, wr); + case YAZ_MARC_MARCXML: + return yaz_marc_write_marcxml(mt, wr); + case YAZ_MARC_XCHANGE: + return yaz_marc_write_marcxchange(mt, wr); + case YAZ_MARC_ISO2709: + return yaz_marc_write_iso2709(mt, wr); + } + return -1; +} + +static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, + const char *ns) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; - if (!atoi_n_check(buf+10, 1, &indicator_length)) + + wrbuf_printf(wr, "\n", ns); + for (n = mt->nodes; n; n = n->next) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - lead[10] = '2'; - indicator_length = 2; + struct yaz_marc_subfield *s; + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, "\""); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " ind%d=\"", i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_printf(wr, "\""); + } + } + wrbuf_printf(wr, ">\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + /* if identifier length is 2 (most MARCs), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + size_t using_code_len = + (identifier_length != 2) ? identifier_length - 1 + : + cdata_one_character(mt, s->code_data); + + wrbuf_puts(wr, " iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(wr, "\">"); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data + using_code_len, + strlen(s->code_data + using_code_len)); + wrbuf_puts(wr, "\n"); + } + wrbuf_printf(wr, " \n"); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, " iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_printf(wr, "\">"); + wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_COMMENT: + wrbuf_printf(wr, "\n", n->u.comment); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, " "); + wrbuf_iconv_write_cdata(wr, + 0 /* no charset conversion for leader */, + n->u.leader, strlen(n->u.leader)); + wrbuf_printf(wr, "\n"); + } } - if (!atoi_n_check(buf+11, 1, &identifier_length)) + wrbuf_puts(wr, "\n"); + return 0; +} + +int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) +{ + return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim"); +} + +int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr) +{ + return yaz_marc_write_marcxml_ns(mt, wr, + "http://www.bs.dk/standards/MarcXchange"); +} + +int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int indicator_length; + int identifier_length; + int length_data_entry; + int length_starting; + int length_implementation; + int data_offset = 0; + const char *leader = 0; + WRBUF wr_dir, wr_head; + int base_address; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + leader = n->u.leader; + + if (!leader) + return -1; + if (!atoi_n_check(leader+10, 1, &indicator_length)) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + if (!atoi_n_check(leader+20, 1, &length_data_entry)) + return -1; + if (!atoi_n_check(leader+21, 1, &length_starting)) + return -1; + if (!atoi_n_check(leader+22, 1, &length_implementation)) + return -1; + + wr_dir = wrbuf_alloc(); + for (n = mt->nodes; n; n = n->next) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - lead[11] = '2'; - identifier_length = 2; + int data_length = 0; + struct yaz_marc_subfield *s; + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag); + data_length += indicator_length; + for (s = n->u.datafield.subfields; s; s = s->next) + data_length += 1+strlen(s->code_data); + data_length++; + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); + data_length += strlen(n->u.controlfield.data); + data_length++; + break; + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: + break; + } + if (data_length) + { + wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length); + wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset); + data_offset += data_length; + } } - if (!atoi_n_check(buf+12, 5, &base_address)) + /* mark end of directory */ + wrbuf_putc(wr_dir, ISO2709_FS); + + /* base address of data (comes after leader+directory) */ + base_address = 24 + wrbuf_len(wr_dir); + + wr_head = wrbuf_alloc(); + + /* write record length */ + wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1); + /* from "original" leader */ + wrbuf_write(wr_head, leader+5, 7); + /* base address of data */ + wrbuf_printf(wr_head, "%05d", base_address); + /* from "original" leader */ + wrbuf_write(wr_head, leader+17, 7); + + wrbuf_write(wr, wrbuf_buf(wr_head), 24); + wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); + wrbuf_free(wr_head, 1); + wrbuf_free(wr_dir, 1); + + for (n = mt->nodes; n; n = n->next) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - base_address = 0; + struct yaz_marc_subfield *s; + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr, "%.*s", indicator_length, + n->u.datafield.indicator); + for (s = n->u.datafield.subfields; s; s = s->next) + wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data); + wrbuf_printf(wr, "%c", ISO2709_FS); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS); + break; + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: + break; + } } - if (!atoi_n_check(buf+20, 1, &length_data_entry)) + wrbuf_printf(wr, "%c", ISO2709_RS); + return 0; +} + +#if HAVE_XML2 +int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) +{ + for (; ptr; ptr = ptr->next) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_data_entry = 4; - lead[20] = '4'; + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "subfield")) + { + size_t ctrl_data_len = 0; + char *ctrl_data_buf = 0; + const xmlNode *p = 0, *ptr_code = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "code")) + ptr_code = attr->children; + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'subfield'", + attr->name); + return -1; + } + if (!ptr_code) + { + yaz_marc_cprintf( + mt, "Missing attribute 'code' for 'subfield'" ); + return -1; + } + if (ptr_code->type == XML_TEXT_NODE) + { + ctrl_data_len = + strlen((const char *)ptr_code->content); + } + else + { + yaz_marc_cprintf( + mt, "Missing value for 'code' in 'subfield'" ); + return -1; + } + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + ctrl_data_len += strlen((const char *)p->content); + ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1); + strcpy(ctrl_data_buf, (const char *)ptr_code->content); + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + strcat(ctrl_data_buf, (const char *)p->content); + yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len); + } + else + { + yaz_marc_cprintf( + mt, "Expected element 'subfield', got '%.80s'", ptr->name); + return -1; + } + } } - if (!atoi_n_check(buf+21, 1, &length_starting)) + return 0; +} + +static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) +{ + int indicator_length; + int identifier_length; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + const char *leader = 0; + const xmlNode *ptr = *ptr_p; + + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "leader")) + { + xmlNode *p = ptr->children; + for(; p; p = p->next) + if (p->type == XML_TEXT_NODE) + leader = (const char *) p->content; + break; + } + else + { + yaz_marc_cprintf( + mt, "Expected element 'leader', got '%.80s'", ptr->name); + return -1; + } + } + if (!leader) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_starting = 5; - lead[21] = '5'; + yaz_marc_cprintf(mt, "Missing element 'leader'"); + return -1; } - if (!atoi_n_check(buf+22, 1, &length_implementation)) + if (strlen(leader) != 24) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_implementation = 0; - lead[22] = '0'; + yaz_marc_cprintf(mt, "Bad length %d of leader data." + " Must have length of 24 characters", strlen(leader)); + return -1; } + yaz_marc_read_leader(mt, leader, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + *ptr_p = ptr; + return 0; +} - if (mt->xml != YAZ_MARC_LINE) - { - char str[80]; - int i; - switch(mt->xml) +static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) +{ + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) { - case YAZ_MARC_ISO2709: - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "name, "controlfield")) + { + const xmlNode *ptr_tag = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "tag")) + ptr_tag = attr->children; + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'controlfield'", + attr->name); + return -1; + } + if (!ptr_tag) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'controlfield'" ); + return -1; + } + yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children); + } + else if (!strcmp((const char *) ptr->name, "datafield")) { - sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]); - wrbuf_puts (wr, str); + char indstr[11]; /* 0(unused), 1,....9, + zero term */ + const xmlNode *ptr_tag = 0; + struct _xmlAttr *attr; + int i; + for (i = 0; i<11; i++) + indstr[i] = '\0'; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "tag")) + ptr_tag = attr->children; + else if (strlen((const char *)attr->name) == 4 && + !memcmp(attr->name, "ind", 3)) + { + int no = atoi((const char *)attr->name+3); + if (attr->children + && attr->children->type == XML_TEXT_NODE) + indstr[no] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'datafield'", + attr->name); + return -1; + } + if (!ptr_tag) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'datafield'" ); + return -1; + } + /* note that indstr[0] is unused so we use indstr[1..] */ + yaz_marc_add_datafield_xml(mt, ptr_tag, + indstr+1, strlen(indstr+1)); + + if (yaz_marc_read_xml_subfields(mt, ptr->children)) + return -1; + } + else + { + yaz_marc_cprintf(mt, + "Expected element controlfield or datafield," + " got %.80s", ptr->name); + return -1; + } + } + return 0; +} + +int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode) +{ + const xmlNode *ptr = xmlnode; + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "record")) + break; + else + { + yaz_marc_cprintf( + mt, "Unknown element '%.80s' in MARC XML reader", + ptr->name); + return -1; } - wrbuf_puts (wr, ">\n"); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts( - wr, - "\n", - buf[5], buf[6], buf[7]); - wrbuf_puts (wr, str); - break; - case YAZ_MARC_MARCXML: - wrbuf_printf( - wr, - "\n" - " "); - lead[9] = 'a'; /* set leader to signal unicode */ - marc_cdata(mt, lead, 24, wr); - wrbuf_printf(wr, "\n"); - break; - case YAZ_MARC_XCHANGE: - wrbuf_printf( - wr, - "\n" - " "); - marc_cdata(mt, lead, 24, wr); - wrbuf_printf(wr, "\n"); - break; } + if (!ptr) + { + yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record"); + return -1; } - if (mt->debug) + /* ptr points to record node now */ + ptr = ptr->children; + if (yaz_marc_read_xml_leader(mt, &ptr)) + return -1; + return yaz_marc_read_xml_fields(mt, ptr->next); +} +#else +int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode) +{ + return -1; +} +#endif + +int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize) +{ + int entry_p; + int record_length; + int indicator_length; + int identifier_length; + int end_of_directory; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + + yaz_marc_reset(mt); + + record_length = atoi_n (buf, 5); + if (record_length < 25) + { + yaz_marc_cprintf(mt, "Record length %d < 24", record_length); + return -1; + } + /* ballout if bsize is known and record_length is less than that */ + if (bsize != -1 && record_length > bsize) { - char str[40]; - - wrbuf_puts (wr, "\n"); + yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d", + record_length, bsize); + return -1; } + if (mt->debug) + yaz_marc_cprintf(mt, "Record length %5d", record_length); - /* first pass. determine length of directory & base of data */ + yaz_marc_read_leader(mt, buf, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + + /* First pass. determine length of directory & base of data */ for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) { /* length of directory entry */ int l = 3 + length_data_entry + length_starting; if (entry_p + l >= record_length) { - wrbuf_printf (wr, "\n", entry_p); + yaz_marc_cprintf(mt, "Directory offset %d: end of record." + " Missing FS char", entry_p); return -1; } if (mt->debug) - wrbuf_printf (wr, "\n", - entry_p, buf+entry_p); - /* check for digits in length info */ + { + yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s", + entry_p, buf+entry_p); + } + /* Check for digits in length info */ while (--l >= 3) if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) break; if (l >= 3) { - /* not all digits, so stop directory scan */ - wrbuf_printf (wr, "\n", entry_p); + /* Not all digits, so stop directory scan */ + yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data" + " length and/or length starting", entry_p); break; } entry_p += 3 + length_data_entry + length_starting; @@ -286,71 +989,17 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) end_of_directory = entry_p; if (base_address != entry_p+1) { - if (produce_warnings) - wrbuf_printf (wr,"\n", base_address, entry_p+1); + yaz_marc_cprintf(mt, "Base address not at end of directory," + " base %d, end %d", base_address, entry_p+1); } - if (mt->xml == YAZ_MARC_ISO2709) - { - WRBUF wr_head = wrbuf_alloc(); - WRBUF wr_dir = wrbuf_alloc(); - WRBUF wr_tmp = wrbuf_alloc(); - int data_p = 0; - /* second pass. create directory for ISO2709 output */ - for (entry_p = 24; entry_p != end_of_directory; ) - { - int data_length, data_offset, end_offset; - int i, sz1, sz2; - - wrbuf_write(wr_dir, buf+entry_p, 3); - entry_p += 3; - - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) - return -1; - - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - i++; - sz1 = 1+i - (data_offset + base_address); - if (mt->iconv_cd) - { - sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd, - buf + data_offset+base_address, sz1); - wrbuf_rewind(wr_tmp); - } - else - sz2 = sz1; - wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2); - wrbuf_printf(wr_dir, "%0*d", length_starting, data_p); - data_p += sz2; - } - wrbuf_putc(wr_dir, ISO2709_FS); - wrbuf_printf(wr_head, "%05d", data_p+1 + base_address); - wrbuf_write(wr_head, lead+5, 7); - wrbuf_printf(wr_head, "%05d", base_address); - wrbuf_write(wr_head, lead+17, 7); - - wrbuf_write(wr, wrbuf_buf(wr_head), 24); - wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); - wrbuf_free(wr_head, 1); - wrbuf_free(wr_dir, 1); - wrbuf_free(wr_tmp, 1); - } - /* third pass. create data output */ + /* Second pass. parse control - and datafields */ for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; int end_offset; - int i, j; + int i; char tag[4]; int identifier_flag = 0; int entry_p0 = entry_p; @@ -358,9 +1007,9 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) memcpy (tag, buf+entry_p, 3); entry_p += 3; tag[3] = '\0'; - data_length = atoi_n (buf+entry_p, length_data_entry); + data_length = atoi_n(buf+entry_p, length_data_entry); entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); + data_offset = atoi_n(buf+entry_p, length_starting); entry_p += length_starting; i = data_offset + base_address; end_offset = i+data_length-1; @@ -370,15 +1019,14 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) if (mt->debug) { - wrbuf_printf(wr, "\n", - entry_p0, data_length, data_offset); + yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d," + " data-offset %d", + tag, entry_p0, data_length, data_offset); } if (end_offset >= record_length) { - wrbuf_printf (wr,"\n", - entry_p0, end_offset, record_length); + yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d", + entry_p0, end_offset, record_length); break; } @@ -393,200 +1041,60 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) identifier_flag = 2; } - if (mt->debug) - { - wrbuf_printf(wr, "\n", - identifier_flag); - } - - switch(mt->xml) - { - case YAZ_MARC_LINE: - wrbuf_puts (wr, tag); - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_printf (wr, "xml) - { - case YAZ_MARC_ISO2709: - wrbuf_putc(wr, buf[i]); - break; - case YAZ_MARC_LINE: - wrbuf_putc(wr, buf[i]); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_printf(wr, " Indicator%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); - break; - case YAZ_MARC_OAIMARC: - wrbuf_printf(wr, " i%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_printf(wr, " ind%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); - } - } - } - if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML - || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE) - { - wrbuf_puts (wr, ">"); - if (identifier_flag) - wrbuf_puts (wr, "\n"); - } - if (identifier_flag) - { + yaz_marc_add_datafield(mt, tag, buf+i, indicator_length); + i += indicator_length; + while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) { - int i0; - - int sb_octet_length = identifier_length-1; - if (identifier_length == 2) - sb_octet_length = cdata_one_character(mt, buf+i); + int code_offset = i+1; - i++; - switch(mt->xml) - { - case YAZ_MARC_ISO2709: - --i; - wrbuf_iconv_write(wr, mt->iconv_cd, - buf+i, identifier_length); - i += identifier_length; - break; - case YAZ_MARC_LINE: - wrbuf_puts (wr, mt->subfield_str); - marc_cdata(mt, buf+i, sb_octet_length, wr); - i = i+sb_octet_length; - wrbuf_putc (wr, ' '); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_puts (wr, " "); - break; - } - i0 = i; + i ++; while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS) + buf[i] != ISO2709_FS) i++; - marc_cdata(mt, buf + i0, i - i0, wr); - - if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS) - marc_cdata(mt, buf + i, 1, wr); - - if (mt->xml == YAZ_MARC_SIMPLEXML || - mt->xml == YAZ_MARC_MARCXML || - mt->xml == YAZ_MARC_XCHANGE || - mt->xml == YAZ_MARC_OAIMARC) - wrbuf_puts (wr, "\n"); + yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset); } } else { + /* controlfield */ int i0 = i; while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) i++; - marc_cdata(mt, buf + i0, i - i0, wr); - if (mt->xml == YAZ_MARC_ISO2709) - marc_cdata(mt, buf + i, 1, wr); + yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0); } - if (mt->xml == YAZ_MARC_LINE) - wrbuf_puts (wr, mt->endline_str); if (i < end_offset) - wrbuf_printf(wr, "\n", data_length); + { + yaz_marc_cprintf(mt, "Separator but not at end of field length=%d", + data_length); + } if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - wrbuf_printf(wr, "\n", data_length); - switch(mt->xml) { - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_OAIMARC: - if (identifier_flag) - wrbuf_puts (wr, "\n"); - else - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - if (identifier_flag) - wrbuf_puts (wr, " \n"); - else - wrbuf_puts (wr, "\n"); - break; + yaz_marc_cprintf(mt, "No separator at end of field length=%d", + data_length); } } - switch (mt->xml) - { - case YAZ_MARC_LINE: - wrbuf_puts (wr, ""); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_ISO2709: - wrbuf_putc (wr, ISO2709_RS); - break; - } return record_length; } +int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) +{ + int s, r = yaz_marc_read_iso2709(mt, buf, bsize); + if (r <= 0) + return r; + s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */ + if (s != 0) + return -1; /* error */ + return r; /* OK, return length > 0 */ +} + int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, char **result, int *rsize) { @@ -618,7 +1126,7 @@ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd) mt->iconv_cd = cd; } -/* depricated */ +/* deprecated */ int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml) { yaz_marc_t mt = yaz_marc_create(); @@ -631,13 +1139,13 @@ int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml) return r; } -/* depricated */ +/* deprecated */ int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize) { return yaz_marc_decode(buf, wr, debug, bsize, 0); } -/* depricated */ +/* deprecated */ int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) { yaz_marc_t mt = yaz_marc_create(); @@ -653,13 +1161,13 @@ int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) return r; } -/* depricated */ +/* deprecated */ int marc_display_ex (const char *buf, FILE *outf, int debug) { return marc_display_exl (buf, outf, debug, -1); } -/* depricated */ +/* deprecated */ int marc_display (const char *buf, FILE *outf) { return marc_display_ex (buf, outf, 0); diff --git a/src/querytowrbuf.c b/src/querytowrbuf.c index 3791082..70e23df 100644 --- a/src/querytowrbuf.c +++ b/src/querytowrbuf.c @@ -2,12 +2,11 @@ * Copyright (C) 1995-2005, Index Data ApS * All rights reserved. * - * $Id: querytowrbuf.c,v 1.3 2006-01-20 14:44:55 adam Exp $ + * $Id: querytowrbuf.c,v 1.4 2006-04-19 10:05:03 adam Exp $ */ -/** - * \file querytostr.c - * \brief Query to WRBUF (to strings) +/** \file querytowrbuf.c + \brief Query to WRBUF (to strings) */ #include diff --git a/src/wrbuf.c b/src/wrbuf.c index 19c2ce2..47dbe9a 100644 --- a/src/wrbuf.c +++ b/src/wrbuf.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: wrbuf.c,v 1.9 2005-09-27 17:52:46 adam Exp $ + * $Id: wrbuf.c,v 1.10 2006-04-19 10:05:03 adam Exp $ */ /** @@ -195,6 +195,11 @@ int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size) return wrbuf_iconv_write_x(b, cd, buf, size, 0); } +int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz) +{ + return wrbuf_iconv_write(b, cd, strz, strlen(strz)); +} + int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size) { return wrbuf_iconv_write_x(b, cd, buf, size, 1); diff --git a/src/xmlquery.c b/src/xmlquery.c index 9574be4..77184ee 100644 --- a/src/xmlquery.c +++ b/src/xmlquery.c @@ -2,13 +2,12 @@ * Copyright (C) 1995-2005, Index Data ApS * All rights reserved. * - * $Id: xmlquery.c,v 1.6 2006-02-23 13:09:54 adam Exp $ + * $Id: xmlquery.c,v 1.7 2006-04-19 10:05:03 adam Exp $ */ -/** - * \file querytostr.c - * \brief Query / XML conversions - */ +/** \file xmlquery.c + \brief Query / XML conversions +*/ #include #include diff --git a/src/zoom-c.c b/src/zoom-c.c index 4173682..148b54e 100644 --- a/src/zoom-c.c +++ b/src/zoom-c.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: zoom-c.c,v 1.68 2006-04-07 11:27:24 adam Exp $ + * $Id: zoom-c.c,v 1.69 2006-04-19 10:05:03 adam Exp $ */ /** * \file zoom-c.c @@ -1150,7 +1150,7 @@ static zoom_ret ZOOM_connection_send_init (ZOOM_connection c) ZOOM_options_get(c->options, "implementationName"), odr_prepend(c->odr_out, "ZOOM-C", ireq->implementationName)); - version = odr_strdup(c->odr_out, "$Revision: 1.68 $"); + version = odr_strdup(c->odr_out, "$Revision: 1.69 $"); if (strlen(version) > 10) /* check for unexpanded CVS strings */ version[strlen(version)-2] = '\0'; ireq->implementationVersion = odr_prepend(c->odr_out, @@ -1865,7 +1865,7 @@ ZOOM_record_get (ZOOM_record rec, const char *type_spec, int *len) } return 0; } - else if (!strcmp (type, "xml") || !strcmp(type, "oai")) + else if (!strcmp (type, "xml")) { Z_External *r = (Z_External *) npr->u.databaseRecord; oident *ent = oid_getentbyoid(r->direct_reference); @@ -1889,8 +1889,6 @@ ZOOM_record_get (ZOOM_record rec, const char *type_spec, int *len) const char *ret_buf; int marc_decode_type = YAZ_MARC_MARCXML; - if (!strcmp(type, "oai")) - marc_decode_type = YAZ_MARC_OAIMARC; switch (ent->value) { case VAL_SOIF: diff --git a/test/Makefile.am b/test/Makefile.am index fd5eda6..7a1e037 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,16 +1,21 @@ ## Copyright (C) 1994-2006, Index Data ## All rights reserved. -## $Id: Makefile.am,v 1.12 2006-04-01 11:45:23 adam Exp $ +## $Id: Makefile.am,v 1.13 2006-04-19 10:05:04 adam Exp $ check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf tstodr tstccl tstlog \ tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery -check_SCRIPTS = tstcql.sh tstmarc.sh +check_SCRIPTS = tstcql.sh tstmarciso.sh tstmarcxml.sh TESTS = $(check_PROGRAMS) $(check_SCRIPTS) EXTRA_DIST = tstodr.asn tstodrcodec.c tstodrcodec.h cqlsample \ $(check_SCRIPTS) \ - marc1 marc1.xml marc2 marc2.xml marc3 marc3.xml marc4 marc4.xml + marc1 marc1.xml marc1.chr marc1.xml.marc \ + marc2 marc2.xml marc2.chr marc2.xml.marc \ + marc3 marc3.xml marc3.chr marc3.xml.marc \ + marc4 marc4.xml marc4.chr marc4.xml.marc \ + marc5 marc5.xml marc5.chr marc5.xml.marc \ + marc6 marc6.xml marc6.chr marc6.xml.marc YAZCOMP = ../util/yaz-asncomp YAZCOMPLINE = $(YAZCOMP) -d z.tcl -i yaz -I../include $(YCFLAGS) diff --git a/test/marc1.chr b/test/marc1.chr new file mode 100644 index 0000000..328c81e --- /dev/null +++ b/test/marc1.chr @@ -0,0 +1 @@ +iso-8859-1 diff --git a/test/marc1.xml b/test/marc1.xml index d93b391..75b3a95 100644 --- a/test/marc1.xml +++ b/test/marc1.xml @@ -1,6 +1,6 @@ - - 00988nam0a32003011 450 + + 00988nam0 32003011 450 9 181 423 4 710100 diff --git a/test/marc1.xml.marc b/test/marc1.xml.marc new file mode 100644 index 0000000..e899276 --- /dev/null +++ b/test/marc1.xml.marc @@ -0,0 +1 @@ +00989nam0 32003011 450 001002800000004001000028008002800038009001100066021002700077032001500104100002100119245005400140250004100194260004000235300002700275504012300302512006100425520005000486652002400536652001600560666001600576666001900592666001500611666001800626666001900644666000900663666001500672000a9 181 423 4b710100fa000rnae000tmuua2002buslengv0000aagxx000a1-4000-4596-7d$14,00000&DBC2004390000aSlomanhLarry000aOn the road with Bob DylaneLarry "Ratso" Sloman000aRevised editionbThree Rivers Press000aNew YorkbThree Rivers Pressc2002000axv, 464 sider, tavler000aLarry "Ratso" Slomans meget personlige beretning om Bob Dylans koncertturne i USA i 1975: "The Rolling Thunder revue"000aPå omslaget: With a new introduction by Kinky Friedman000aTidligere: 1. udgave. New York, Bantam, 19780000m99.4aDylanhBob000p78.9064v5000ffolkemusik000ffolkemusikere000frockmusik000frockmusikere000frockkoncerter000eUSA000i1970-1979 \ No newline at end of file diff --git a/test/marc2.chr b/test/marc2.chr new file mode 100644 index 0000000..328c81e --- /dev/null +++ b/test/marc2.chr @@ -0,0 +1 @@ +iso-8859-1 diff --git a/test/marc2.xml b/test/marc2.xml index ee51723..6faad3e 100644 --- a/test/marc2.xml +++ b/test/marc2.xml @@ -1,6 +1,6 @@ - - 01116nam0a32002171 450 + + 01116nam0 32002171 450 9 182 502 3 710100 diff --git a/test/marc2.xml.marc b/test/marc2.xml.marc new file mode 100644 index 0000000..5580029 --- /dev/null +++ b/test/marc2.xml.marc @@ -0,0 +1 @@ +01121nam0 32002171 450 001002800000004001000028008002800038009001100066039000900077100001900086245005900105260003300164300001000197512007300207531001400280538001900294652001500313666004900328795050600377795002000883000a9 182 502 3b710100fa000rcae000tsufa1995bgblengv0000asgxc000abef000aMimmshGarnet000aCry babyaWarm and soulfuleGarnet Mimms ... [et al.]000aBury St. EdmundsbBGOc1995000n1 cd000aIndspilninger publiceret 1963 (Cry baby) og 1965 (Warm and soulful)000aIndhold:000fBGOgBGOCD268000m78.794v4000msoulmrhythm & bluesnvokalp1960-1969lUSA000å11aCry babyaNobody but youaUntil you were goneaAnytime you want meaSo closeaFor your precious loveaBaby don't you weepaA ¤quiet placeaCry to meaDon't change your heartaWanting youaThe ¤truth hurtsaI'll take good care of youaLooking for youaIt won't hurt (half as much)aIt was easier to hurt heraThinkin'aProve it to meaMore than a miracleaAs long as I have youaOne girlaThere goes my babyaIt's just a matter of timeaA ¤little bit of soapaLook awayaI'll make it up to you000å40y0a1 girl \ No newline at end of file diff --git a/test/marc3.chr b/test/marc3.chr new file mode 100644 index 0000000..328c81e --- /dev/null +++ b/test/marc3.chr @@ -0,0 +1 @@ +iso-8859-1 diff --git a/test/marc3.xml b/test/marc3.xml index 6407ec2..53f62b8 100644 --- a/test/marc3.xml +++ b/test/marc3.xml @@ -1,6 +1,6 @@ - - 00914naa a2200337 450 + + 00914naa 2200337 450 a00001508 a diff --git a/test/marc3.xml.marc b/test/marc3.xml.marc new file mode 100644 index 0000000..9d5c4b7 --- /dev/null +++ b/test/marc3.xml.marc @@ -0,0 +1 @@ +00914naa 2200337 450 001001700000004000900017008002400026009001000050041000800060041000800068097000700076245003300083300002600116557003300142630001600175630001300191633001000204633000900214648006100223648001600284648005400300J01000600354BAS000500360LKR004200365CAT003000407CAT003000437CAT003000467CAT003900497CAT002600536UID001400562 aa00001508fa airn a1991bxxlnortav9 aagxx anor deng00a06 aByfornyelse ved Ibsen-Ringen bfarvefotobplanbsnit aByggekunstj1991v1/2k41-45 fbyfornyelse fsanering fNorge fOslo aTelje Torp Aasen ArkitektkontorcKristian Augustsgate 7B aEng, Dagfin ranlund, TomcKristian AugustsgatecPilestredet 19 aa 0 aITMlARK50b0000145y1991i1/2k41-45 abc20020111lARK01h2002 abc20020111lARK01h2116 abc20021002lARK01h1000 aICLLOADb00c20021122lARK01h1948 c20030618lARK01h1330 aa00001508 \ No newline at end of file diff --git a/test/marc4.chr b/test/marc4.chr new file mode 100644 index 0000000..328c81e --- /dev/null +++ b/test/marc4.chr @@ -0,0 +1 @@ +iso-8859-1 diff --git a/test/marc4.xml b/test/marc4.xml index fc8e91b..c2f2702 100644 --- a/test/marc4.xml +++ b/test/marc4.xml @@ -1,9 +1,9 @@ + - - 009140091a22a 22003370 - + 009140091422a 22003370 + diff --git a/test/marc4.xml.marc b/test/marc4.xml.marc new file mode 100644 index 0000000..a8f9ac9 --- /dev/null +++ b/test/marc4.xml.marc @@ -0,0 +1 @@ +00026009142200025003370  \ No newline at end of file diff --git a/test/marc5.chr b/test/marc5.chr new file mode 100644 index 0000000..a524421 --- /dev/null +++ b/test/marc5.chr @@ -0,0 +1 @@ +utf-8 diff --git a/test/marc5.xml b/test/marc5.xml new file mode 100644 index 0000000..54323ee --- /dev/null +++ b/test/marc5.xml @@ -0,0 +1,31 @@ + + 00492nam a22001455a 4500 + 000277485 + 20051026111436.0 + 050413s1894 gr 000 0 gre d + + Μαρούδης, Κωνσταντίνος Ιω + + + Ελληνικόν κρυπτογραφικόν λεξικόν / + Κωνστ. Ι. Μαρούδης. + + + εκδ. + + + Αθήνα, + 1894. + + + 248 σελ. + + + Greek language, Modern + Dialects + Dictionaries + + + Cryptography. + + diff --git a/test/marc5.xml.marc b/test/marc5.xml.marc new file mode 100644 index 0000000..50102a7 --- /dev/null +++ b/test/marc5.xml.marc @@ -0,0 +1 @@ +00492nam a22001455a 450000100100000000500170001000800410002710000520006824501040012025000140022426000230023830000160026165000510027765000180032800027748520051026111436.0050413s1894 gr 000 0 gre d1 aΜαρούδης, Κωνσταντίνος Ιω10aΕλληνικόν κρυπτογραφικόν λεξικόν /cΚωνστ. Ι. Μαρούδης. η εκδ. aΑθήνα,c1894. a248 σελ. 0aGreek language, ModernxDialectsvDictionaries 0aCryptography. \ No newline at end of file diff --git a/test/marc6 b/test/marc6 new file mode 100644 index 0000000..c78fdce --- /dev/null +++ b/test/marc6 @@ -0,0 +1 @@ +00366nam 22001698a 4500001001300000003000400013005001700017008004100034010001700179040001300075050001200088100001700100245003000117260001200147263000900159300001100168 11224466 DLC00000000000000.0910710c19910701nju 00010 eng  aDLCcDLC00a123-xyz10aJack Collins10aHow to program a computer1 aPenguin a8710 ap. cm. a 11224466  \ No newline at end of file diff --git a/test/marc6.chr b/test/marc6.chr new file mode 100644 index 0000000..f51f8e4 --- /dev/null +++ b/test/marc6.chr @@ -0,0 +1 @@ +marc-8 diff --git a/test/marc6.xml b/test/marc6.xml new file mode 100644 index 0000000..2b8578d --- /dev/null +++ b/test/marc6.xml @@ -0,0 +1,32 @@ + + 00366nam 22001698a 4500 + 11224466 + DLC + 00000000000000.0 + 910710c19910701nju 00010 eng + + 11224466 + + + DLC + DLC + + + 123-xyz + + + Jack Collins + + + How to program a computer + + + Penguin + + + 8710 + + + p. cm. + + diff --git a/test/marc6.xml.marc b/test/marc6.xml.marc new file mode 100644 index 0000000..6f62ae2 --- /dev/null +++ b/test/marc6.xml.marc @@ -0,0 +1 @@ +00366nam 22001698a 4500001001300000003000400013005001700017008004100034010001700075040001300092050001200105100001700117245003000134260001200164263000900176300001100185 11224466 DLC00000000000000.0910710c19910701nju 00010 eng  a 11224466  aDLCcDLC00a123-xyz10aJack Collins10aHow to program a computer1 aPenguin a8710 ap. cm. \ No newline at end of file diff --git a/test/tstmarc.sh b/test/tstmarc.sh deleted file mode 100755 index 97b1ab4..0000000 --- a/test/tstmarc.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh -# $Id: tstmarc.sh,v 1.2 2004-11-16 17:12:28 adam Exp $ -srcdir=${srcdir:-.} -ecode=0 -for f in ${srcdir}/marc?; do - NEW=`basename ${f}`.new.xml - OLD=${f}.xml - DIFF=`basename ${f}`.diff - ../util/yaz-marcdump -f iso-8859-1 -t utf-8 -X $f > $NEW - if test $? != "0"; then - echo "Failed decode of $f" - ecode=1 - elif test -f $OLD; then - if diff $OLD $NEW >$DIFF; then - rm $DIFF - rm $NEW - else - echo "Differ in $f" - ecode=1 - fi - else - echo "Making test $f for the first time" - if test -x /usr/bin/xmllint; then - if xmllint --noout $NEW >out 2>stderr; then - echo "XML for $f is OK" - mv $NEW $OLD - else - echo "XML for $f is invalid" - ecode=1 - fi - else - echo "xmllint not found. install libxml2-utils" - ecode=1 - fi - fi -done -exit $ecode - diff --git a/test/tstmarciso.sh b/test/tstmarciso.sh new file mode 100755 index 0000000..9bf324d --- /dev/null +++ b/test/tstmarciso.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# $Id: tstmarciso.sh,v 1.1 2006-04-19 10:05:04 adam Exp $ +# Tests reading of ISO2709 and checks that we get identical MARCXML +srcdir=${srcdir:-.} +ecode=0 +for f in ${srcdir}/marc?; do + NEW=`basename ${f}`.new.xml + OLD=${f}.xml + DIFF=`basename ${f}`.diff + ../util/yaz-marcdump -f `cat ${f}.chr` -t utf-8 -X $f > $NEW + if test $? != "0"; then + echo "Failed decode of $f" + ecode=1 + elif test -f $OLD; then + if diff $OLD $NEW >$DIFF; then + rm $DIFF + rm $NEW + else + echo "Differ in $f" + ecode=1 + fi + else + echo "Making test $f for the first time" + if test -x /usr/bin/xmllint; then + if xmllint --noout $NEW >out 2>stderr; then + echo "XML for $f is OK" + mv $NEW $OLD + else + echo "XML for $f is invalid" + ecode=1 + fi + else + echo "xmllint not found. install libxml2-utils" + ecode=1 + fi + fi +done +exit $ecode + diff --git a/test/tstmarcxml.sh b/test/tstmarcxml.sh new file mode 100755 index 0000000..7a970fb --- /dev/null +++ b/test/tstmarcxml.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# $Id: tstmarcxml.sh,v 1.1 2006-04-19 10:05:04 adam Exp $ +# Tests reading of MARCXML and checks that we get identical ISO2709 output. +srcdir=${srcdir:-.} +ecode=0 +# Skip this test if Libxml2 support is not enabled +../util/yaz-marcdump -x >/dev/null 2>&1 +if test $? = "3"; then + exit 0 +fi +for f in ${srcdir}/marc?.xml; do + NEW=`basename ${f}`.new.marc + OLD=${f}.marc + DIFF=`basename ${f}`.diff + ../util/yaz-marcdump -f utf-8 -t utf-8 -x -I $f > $NEW + if test $? != "0"; then + echo "Failed decode of $f" + ecode=1 + elif test -f $OLD; then + if diff $OLD $NEW >$DIFF; then + rm $DIFF + rm $NEW + else + echo "Differ in $f" + ecode=1 + fi + else + echo "Making test $f for the first time" + mv $NEW $OLD + fi +done +exit $ecode + diff --git a/util/marcdump.c b/util/marcdump.c index 217feee..e056593 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdump.c,v 1.35 2005-12-18 15:58:02 adam Exp $ + * $Id: marcdump.c,v 1.36 2006-04-19 10:05:04 adam Exp $ */ #define _FILE_OFFSET_BITS 64 @@ -45,76 +45,214 @@ #define SEEK_END 2 #endif + +static char *prog; + static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n", + fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] [-I] [-v] [-s splitfname] file...\n", prog); } #if HAVE_XML2 -void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) { - xmlNodePtr cur; - int size; - int i; - - assert(output); - size = (nodes) ? nodes->nodeNr : 0; - - fprintf(output, "Result (%d nodes):\n", size); - for(i = 0; i < size; ++i) { - assert(nodes->nodeTab[i]); +static void marcdump_read_xml(yaz_marc_t mt, const char *fname) +{ + xmlNodePtr ptr; + xmlDocPtr doc = xmlParseFile(fname); + if (!doc) + return; + + ptr = xmlDocGetRootElement(doc); + if (ptr) + { + int r; + WRBUF wrbuf = wrbuf_alloc(); + r = yaz_marc_read_xml(mt, ptr); + if (r) + fprintf(stderr, "yaz_marc_read_xml failed\n"); - if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) + yaz_marc_write_mode(mt, wrbuf); + + fputs(wrbuf_buf(wrbuf), stdout); + + wrbuf_free(wrbuf, 1); + } + xmlFreeDoc(doc); +} +#endif + +static void dump(const char *fname, const char *from, const char *to, + int read_xml, int xml, + int print_offset, const char *split_fname, int verbose, + FILE *cfile) +{ + yaz_marc_t mt = yaz_marc_create(); + yaz_iconv_t cd = 0; + + if (from && to) + { + cd = yaz_iconv_open(to, from); + if (!cd) { - xmlNsPtr ns; - - ns = (xmlNsPtr)nodes->nodeTab[i]; - cur = (xmlNodePtr)ns->next; - if(cur->ns) { - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", - ns->prefix, ns->href, cur->ns->href, cur->name); - } else { - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", - ns->prefix, ns->href, cur->name); - } - } - else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) + fprintf(stderr, "conversion from %s to %s " + "unsupported\n", from, to); + exit(2); + } + yaz_marc_iconv(mt, cd); + } + yaz_marc_xml(mt, xml); + yaz_marc_debug(mt, verbose); + + if (read_xml) + { +#if HAVE_XML2 + marcdump_read_xml(mt, fname); +#else + return; +#endif + } + else + { + FILE *inf = fopen(fname, "rb"); + int count = 0; + int num = 1; + if (!inf) { - cur = nodes->nodeTab[i]; - if(cur->ns) { - fprintf(output, "= element node \"%s:%s\"\n", - cur->ns->href, cur->name); - } - else - { - fprintf(output, "= element node \"%s\"\n", - cur->name); - } + fprintf (stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror (errno)); + exit(1); } - else + if (cfile) + fprintf (cfile, "char *marc_records[] = {\n"); + if (1) { - cur = nodes->nodeTab[i]; - fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type); + int marc_no = 0; + for(;; marc_no++) + { + int len; + char *result = 0; + int rlen; + size_t r; + char buf[100001]; + + r = fread (buf, 1, 5, inf); + if (r < 5) + { + if (r && print_offset && verbose) + printf ("\n", r); + break; + } + while (*buf < '0' || *buf > '9') + { + int i; + long off = ftell(inf) - 5; + if (verbose || print_offset) + printf("\n", + *buf & 0xff, *buf & 0xff, + off, off); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + r = fread(buf+4, 1, 1, inf); + if (r < 1) + break; + } + if (r < 1) + { + if (verbose || print_offset) + printf ("\n"); + break; + } + if (print_offset) + { + long off = ftell(inf) - 5; + printf ("\n", + num, off, off); + } + len = atoi_n(buf, 5); + if (len < 25 || len > 100000) + { + long off = ftell(inf) - 5; + printf("Bad Length %d read at offset %ld (%lx)\n", + len, (long) off, (long) off); + break; + } + rlen = len - 5; + r = fread (buf + 5, 1, rlen, inf); + if (r < rlen) + break; + if (split_fname) + { + char fname[256]; + FILE *sf; + sprintf(fname, "%.200s%07d", split_fname, marc_no); + sf = fopen(fname, "wb"); + if (!sf) + { + fprintf(stderr, "Could not open %s\n", fname); + split_fname = 0; + } + else + { + if (fwrite(buf, 1, len, sf) != len) + { + fprintf(stderr, "Could write content to %s\n", + fname); + split_fname = 0; + } + fclose(sf); + } + } + r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen); + if (r > 0 && result) + { + fwrite (result, rlen, 1, stdout); + } + if (r > 0 && cfile) + { + char *p = buf; + int i; + if (count) + fprintf (cfile, ","); + fprintf (cfile, "\n"); + for (i = 0; i < r; i++) + { + if ((i & 15) == 0) + fprintf (cfile, " \""); + fprintf (cfile, "\\x%02X", p[i] & 255); + + if (i < r - 1 && (i & 15) == 15) + fprintf (cfile, "\"\n"); + + } + fprintf (cfile, "\"\n"); + } + num++; + if (verbose) + printf("\n"); + } + count++; } + if (cfile) + fprintf (cfile, "};\n"); + fclose(inf); } + if (cd) + yaz_iconv_close(cd); + yaz_marc_destroy(mt); } -#endif int main (int argc, char **argv) { int r; - int libxml_dom_test = 0; int print_offset = 0; char *arg; int verbose = 0; - FILE *inf; - char buf[100001]; - char *prog = *argv; int no = 0; int xml = 0; FILE *cfile = 0; char *from = 0, *to = 0; - int num = 1; + int read_xml = 0; const char *split_fname = 0; #if HAVE_LOCALE_H @@ -126,9 +264,9 @@ int main (int argc, char **argv) #endif #endif - while ((r = options("pvc:xOeXIf:t:2s:", argv, argc, &arg)) != -2) + prog = *argv; + while ((r = options("pvc:xOeXIf:t:s:", argv, argc, &arg)) != -2) { - int count; no++; switch (r) { @@ -144,10 +282,18 @@ int main (int argc, char **argv) cfile = fopen(arg, "w"); break; case 'x': - xml = YAZ_MARC_SIMPLEXML; +#if HAVE_XML2 + read_xml = 1; +#else + fprintf(stderr, "%s: -x not supported." + " YAZ not compiled with Libxml2 support\n", prog); + exit(3); +#endif break; case 'O': - xml = YAZ_MARC_OAIMARC; + fprintf(stderr, "%s: OAI MARC no longer supported." + " Use MARCXML instead.\n", prog); + exit(1); break; case 'e': xml = YAZ_MARC_XCHANGE; @@ -161,187 +307,12 @@ int main (int argc, char **argv) case 'p': print_offset = 1; break; - case '2': - libxml_dom_test = 1; - break; case 's': split_fname = arg; break; case 0: - inf = fopen(arg, "rb"); - count = 0; - if (!inf) - { - fprintf (stderr, "%s: cannot open %s:%s\n", - prog, arg, strerror (errno)); - exit(1); - } - if (cfile) - fprintf (cfile, "char *marc_records[] = {\n"); - if (1) - { - yaz_marc_t mt = yaz_marc_create(); - yaz_iconv_t cd = 0; - int marc_no = 0; - - if (from && to) - { - cd = yaz_iconv_open(to, from); - if (!cd) - { - fprintf(stderr, "conversion from %s to %s " - "unsupported\n", from, to); - exit(2); - } - yaz_marc_iconv(mt, cd); - } - yaz_marc_xml(mt, xml); - yaz_marc_debug(mt, verbose); - for(;; marc_no++) - { - int len; - char *result = 0; - int rlen; - - r = fread (buf, 1, 5, inf); - if (r < 5) - { - if (r && print_offset && verbose) - printf ("\n", r); - break; - } - while (*buf < '0' || *buf > '9') - { - int i; - long off = ftell(inf) - 5; - if (verbose || print_offset) - printf("\n", - *buf & 0xff, *buf & 0xff, - off, off); - for (i = 0; i<4; i++) - buf[i] = buf[i+1]; - r = fread(buf+4, 1, 1, inf); - if (r < 1) - break; - } - if (r < 1) - { - if (verbose || print_offset) - printf ("\n"); - break; - } - if (print_offset) - { - long off = ftell(inf) - 5; - printf ("\n", - num, off, off); - } - len = atoi_n(buf, 5); - if (len < 25 || len > 100000) - { - long off = ftell(inf) - 5; - printf("Bad Length %d read at offset %ld (%lx)\n", - len, (long) off, (long) off); - break; - } - rlen = len - 5; - r = fread (buf + 5, 1, rlen, inf); - if (r < rlen) - break; - if (split_fname) - { - char fname[256]; - FILE *sf; - sprintf(fname, "%.200s%07d", split_fname, marc_no); - sf = fopen(fname, "wb"); - if (!sf) - { - fprintf(stderr, "Could not open %s\n", fname); - split_fname = 0; - } - else - { - if (fwrite(buf, 1, len, sf) != len) - { - fprintf(stderr, "Could write content to %s\n", - fname); - split_fname = 0; - } - fclose(sf); - } - } - r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen); - if (result) - fwrite (result, rlen, 1, stdout); -#if HAVE_XML2 - if (r > 0 && libxml_dom_test) - { - xmlDocPtr doc = xmlParseMemory(result, rlen); - if (!doc) - fprintf(stderr, "xmLParseMemory failed\n"); - else - { - int i; - xmlXPathContextPtr xpathCtx; - xmlXPathObjectPtr xpathObj; - static const char *xpathExpr[] = { - "/record/datafield[@tag='245']/subfield[@code='a']", - "/record/datafield[@tag='100']/subfield", - "/record/datafield[@tag='245']/subfield[@code='a']", - "/record/datafield[@tag='650']/subfield", - "/record/datafield[@tag='650']", - 0}; - - xpathCtx = xmlXPathNewContext(doc); - - for (i = 0; xpathExpr[i]; i++) { - xpathObj = xmlXPathEvalExpression(BAD_CAST xpathExpr[i], xpathCtx); - if(xpathObj == NULL) { - fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]); - } - else - { - print_xpath_nodes(xpathObj->nodesetval, stdout); - xmlXPathFreeObject(xpathObj); - } - } - xmlXPathFreeContext(xpathCtx); - xmlFreeDoc(doc); - } - } -#endif - if (r > 0 && cfile) - { - char *p = buf; - int i; - if (count) - fprintf (cfile, ","); - fprintf (cfile, "\n"); - for (i = 0; i < r; i++) - { - if ((i & 15) == 0) - fprintf (cfile, " \""); - fprintf (cfile, "\\x%02X", p[i] & 255); - - if (i < r - 1 && (i & 15) == 15) - fprintf (cfile, "\"\n"); - - } - fprintf (cfile, "\"\n"); - } - num++; - if (verbose) - printf("\n"); - } - count++; - if (cd) - yaz_iconv_close(cd); - yaz_marc_destroy(mt); - } - if (cfile) - fprintf (cfile, "};\n"); - fclose(inf); + dump(arg, from, to, read_xml, xml, + print_offset, split_fname, verbose, cfile); break; case 'v': verbose++;