Possible compatibility problems with earlier versions marked with '*'.
+MARC conversion can now generate ISO2709 output in another
+character set. yaz-marcdump uses this facility if you invoke it
+with option -O.
+
Added missing C decl macros for include/yaz/{soap.h,srw.h}, so that
functions from there can be used from C++.
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
-<!-- $Id: yaz-marcdump-man.xml,v 1.2 2003-11-18 20:34:34 adam Exp $ -->
+<!-- $Id: yaz-marcdump-man.xml,v 1.3 2003-12-11 00:37:21 adam Exp $ -->
<refentry id="yaz-marcdump">
<refmeta>
<refentrytitle>yaz-marcdump</refentrytitle>
<command>yaz-marcdump</command>
<arg choice="opt"><option>-x</option></arg>
<arg choice="opt"><option>-X</option></arg>
+ <arg choice="opt"><option>-I</option></arg>
<arg choice="opt"><option>-O</option></arg>
<arg choice="opt"><option>-f <replaceable>from</replaceable></option></arg>
<arg choice="opt"><option>-t <replaceable>to</replaceable></option></arg>
</varlistentry>
<varlistentry>
+ <term>-I</term>
+ <listitem><para>
+ Print MARC records in ISO2709 format.
+ This format is equivalent to YAZ_MARC_ISO2709 in
+ <filename>yaz/marcdisp.h</filename>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term>-O</term>
<listitem><para>
Print MARC records in OAIMARC. Another XML variant.
<refsect1><title>EXAMPLES</title>
<para>
- The following command converts MARC21/USMARC using MARC-8 encoding to
- MARCXML in UTF-8 encoding.
+ The following command converts MARC21/USMARC in MARC-8 encoding to
+ MARC21/USMARC in UTF-8 encoding. (Both input and output is in ISO2709).
+ <screen>
+ yaz-marcdump -f MARC-8 -t UTF-8 -I marc21.raw >marc21.utf8.raw
+ </screen>
+ </para>
+ <para>
+ The same records may be converted to MARCXML instead in UTF-8:
<screen>
yaz-marcdump -f MARC-8 -t UTF-8 -X marc21.raw >marcxml.xml
</screen>
* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*
- * $Id: marcdisp.h,v 1.8 2003-01-06 08:20:27 adam Exp $
+ * $Id: marcdisp.h,v 1.9 2003-12-11 00:37:21 adam Exp $
*/
#ifndef MARCDISP_H
#define YAZ_MARC_SIMPLEXML 1
#define YAZ_MARC_OAIMARC 2
#define YAZ_MARC_MARCXML 3
+#define YAZ_MARC_ISO2709 4
+
+/* supply iconv handle for character set conversion .. */
+YAZ_EXPORT void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd);
/* set debug level, 0=none, 1=more, 2=even more, .. */
YAZ_EXPORT void yaz_marc_debug(yaz_marc_t mt, int level);
* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*
- * $Id: wrbuf.h,v 1.9 2003-07-14 12:58:18 adam Exp $
+ * $Id: wrbuf.h,v 1.10 2003-12-11 00:37:21 adam Exp $
*
*/
#define WRBUF_H
#include <yaz/xmalloc.h>
+#include <yaz/yaz-iconv.h>
YAZ_BEGIN_CDECL
YAZ_EXPORT int wrbuf_puts(WRBUF b, const char *buf);
YAZ_EXPORT int wrbuf_xmlputs(WRBUF b, const char *cp);
YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...);
+YAZ_EXPORT int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf,
+ int size);
#define wrbuf_len(b) ((b)->pos)
#define wrbuf_buf(b) ((b)->buf)
* Copyright (c) 1995-2003, Index Data
* See the file LICENSE for details.
*
- * $Id: marcdisp.c,v 1.1 2003-10-27 12:21:30 adam Exp $
+ * $Id: marcdisp.c,v 1.2 2003-12-11 00:37:22 adam Exp $
*/
#if HAVE_CONFIG_H
WRBUF m_wr;
int xml;
int debug;
+ yaz_iconv_t iconv_cd;
};
yaz_marc_t yaz_marc_create(void)
mt->xml = YAZ_MARC_LINE;
mt->debug = 0;
mt->m_wr = wrbuf_alloc();
+ mt->iconv_cd = 0;
return mt;
}
static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
{
size_t i;
- for (i = 0; i<len; i++)
+ if (mt->xml == YAZ_MARC_ISO2709)
{
- if (mt->xml)
- {
- switch (buf[i]) {
- case '<':
- wrbuf_puts(wr, "<");
- break;
- case '>':
- wrbuf_puts(wr, ">");
- break;
- case '&':
- wrbuf_puts(wr, "&");
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
+ }
+ else if (mt->xml == YAZ_MARC_LINE)
+ {
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
+ }
+ else
+ {
+ int j = 0;
+ for (i = 0; i<len; i++)
+ {
+ switch (buf[i]) {
+ case '<':
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
+ wrbuf_puts(wr, "<");
+ j=i+1;
+ break;
+ case '>':
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
+ wrbuf_puts(wr, ">");
+ j=i+1;
+ break;
+ case '&':
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
+ wrbuf_puts(wr, "&");
+ j=i+1;
break;
case '"':
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
wrbuf_puts(wr, """);
+ j=i+1;
break;
case '\'':
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
wrbuf_puts(wr, "'");
+ j=i+1;
break;
- default:
- wrbuf_putc(wr, buf[i]);
- }
- }
- else
- wrbuf_putc(wr, buf[i]);
- }
-}
-
-#if 0
-static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len)
-{
- if (!mt->cd)
- marc_cdata2 (mt, buf, len);
- else
- {
- char outbuf[12];
- size_t inbytesleft = len;
- const char *inp = buf;
-
- while (inbytesleft)
- {
- size_t outbytesleft = sizeof(outbuf);
- char *outp = outbuf;
- size_t r = yaz_iconv (mt->cd, (char**) &inp, &inbytesleft,
- &outp, &outbytesleft);
- if (r == (size_t) (-1))
- {
- int e = yaz_iconv_error(mt->cd);
- if (e != YAZ_ICONV_E2BIG)
- break;
}
- marc_cdata2 (mt, outbuf, outp - outbuf);
- }
+ }
+ if (i > j)
+ wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j);
}
}
-#endif
int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
{
}
return -1;
}
- /* ballout if bsize is known and record_length is than that */
+ /* ballout if bsize is known and record_length is less than that */
if (bsize != -1 && record_length > bsize)
return -1;
if (isdigit(buf[10]))
length_starting = atoi_n (buf+21, 1);
length_implementation = atoi_n (buf+22, 1);
- if (mt->xml)
+ if (mt->xml != YAZ_MARC_LINE)
{
char str[80];
int i;
switch(mt->xml)
{
+ case YAZ_MARC_ISO2709:
+ break;
case YAZ_MARC_SIMPLEXML:
wrbuf_puts (wr, "<iso2709\n");
sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
wrbuf_puts (wr, "-->\n");
}
+ /* first pass. determine length of directory & base of data */
for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
{
entry_p += 3+length_data_entry+length_starting;
return -1;
}
base_address = entry_p+1;
+
+ if (mt->xml == YAZ_MARC_ISO2709)
+ {
+ WRBUF wr_head = wrbuf_alloc();
+ WRBUF wr_dir = wrbuf_alloc();
+ WRBUF wr_tmp = wrbuf_alloc();
+
+ int data_p = 0;
+ /* second pass. create directory for ISO2709 output */
+ for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ {
+ int data_length, data_offset, end_offset;
+ int i, sz1, sz2;
+
+ wrbuf_write(wr_dir, buf+entry_p, 3);
+ entry_p += 3;
+
+ data_length = atoi_n (buf+entry_p, length_data_entry);
+ entry_p += length_data_entry;
+ data_offset = atoi_n (buf+entry_p, length_starting);
+ entry_p += length_starting;
+ i = data_offset + base_address;
+ end_offset = i+data_length-1;
+
+ while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
+ i < end_offset)
+ i++;
+ sz1 = 1+i - (data_offset + base_address);
+ if (mt->iconv_cd)
+ {
+ sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
+ buf + data_offset+base_address, sz1);
+ wrbuf_rewind(wr_tmp);
+ }
+ else
+ sz2 = sz1;
+ wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
+ wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
+ data_p += sz2;
+ }
+ wrbuf_putc(wr_dir, ISO2709_FS);
+ wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
+ wrbuf_write(wr_head, buf+5, 7);
+ wrbuf_printf(wr_head, "%05d", base_address);
+ wrbuf_write(wr_head, buf+17, 7);
+
+ wrbuf_write(wr, wrbuf_buf(wr_head), 24);
+ wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
+ wrbuf_free(wr_head, 1);
+ wrbuf_free(wr_dir, 1);
+ wrbuf_free(wr_tmp, 1);
+ }
+ /* third pass. create data output */
for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
{
int data_length;
{
switch(mt->xml)
{
+ case YAZ_MARC_ISO2709:
+ wrbuf_putc(wr, buf[i]);
+ break;
case YAZ_MARC_LINE:
if (mt->debug)
wrbuf_puts (wr, " Ind: ");
- wrbuf_putc (wr, buf[i]);
+ wrbuf_putc(wr, buf[i]);
break;
case YAZ_MARC_SIMPLEXML:
- wrbuf_printf (wr, " Indicator%d=\"%c\"", j+1, buf[i]);
+ wrbuf_printf(wr, " Indicator%d=\"%c\"", j+1, buf[i]);
break;
case YAZ_MARC_OAIMARC:
- wrbuf_printf (wr, " i%d=\"%c\"", j+1, buf[i]);
+ wrbuf_printf(wr, " i%d=\"%c\"", j+1, buf[i]);
break;
case YAZ_MARC_MARCXML:
- wrbuf_printf (wr, " ind%d=\"%c\"", j+1, buf[i]);
+ wrbuf_printf(wr, " ind%d=\"%c\"", j+1, buf[i]);
}
}
}
- if (mt->xml)
+ if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
+ || mt->xml == YAZ_MARC_OAIMARC)
{
wrbuf_puts (wr, ">");
if (identifier_flag)
wrbuf_puts (wr, "\n");
}
- else
+ if (mt->xml == YAZ_MARC_LINE)
{
- if (mt->debug && !mt->xml)
+ if (mt->debug)
wrbuf_puts (wr, " Fields: ");
}
if (identifier_flag)
i++;
switch(mt->xml)
{
+ case YAZ_MARC_ISO2709:
+ --i;
+ wrbuf_iconv_write(wr, mt->iconv_cd,
+ buf+i, identifier_length);
+ i += identifier_length;
+ break;
case YAZ_MARC_LINE:
wrbuf_puts (wr, " $");
for (j = 1; j<identifier_length; j++, i++)
buf[i] != ISO2709_FS && i < end_offset)
i++;
marc_cdata(mt, buf + i0, i - i0, wr);
-
- if (mt->xml)
+
+ if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
+ marc_cdata(mt, buf + i, 1, wr);
+
+ if (mt->xml == YAZ_MARC_SIMPLEXML ||
+ mt->xml == YAZ_MARC_MARCXML ||
+ mt->xml == YAZ_MARC_OAIMARC)
wrbuf_puts (wr, "</subfield>\n");
}
}
int i0 = i;
while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
i++;
- marc_cdata(mt, buf + i0, i - i0, wr);
+ marc_cdata(mt, buf + i0, i - i0, wr);
+ if (mt->xml == YAZ_MARC_ISO2709)
+ marc_cdata(mt, buf + i, 1, wr);
}
- if (!mt->xml)
+ if (mt->xml == YAZ_MARC_LINE)
wrbuf_putc (wr, '\n');
if (i < end_offset)
wrbuf_puts (wr, " <!-- separator but not at end of field -->\n");
case YAZ_MARC_MARCXML:
wrbuf_puts (wr, "</record>\n");
break;
+ case YAZ_MARC_ISO2709:
+ wrbuf_putc (wr, ISO2709_RS);
+ break;
}
return record_length;
}
mt->debug = level;
}
+void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
+{
+ mt->iconv_cd = cd;
+}
+
/* depricated */
int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
{
* Copyright (c) 1995-2003, Index Data.
* See the file LICENSE for details.
*
- * $Id: wrbuf.c,v 1.1 2003-10-27 12:21:36 adam Exp $
+ * $Id: wrbuf.c,v 1.2 2003-12-11 00:37:22 adam Exp $
*/
/*
#include <stdarg.h>
#include <yaz/wrbuf.h>
+#include <yaz/yaz-iconv.h>
WRBUF wrbuf_alloc(void)
{
va_end(ap);
}
+int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size)
+{
+ if (cd)
+ {
+ char outbuf[12];
+ size_t inbytesleft = size;
+ const char *inp = buf;
+ while (inbytesleft)
+ {
+ size_t outbytesleft = sizeof(outbuf);
+ char *outp = outbuf;
+ size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft,
+ &outp, &outbytesleft);
+ if (r == (size_t) (-1))
+ {
+ int e = yaz_iconv_error(cd);
+ if (e != YAZ_ICONV_E2BIG)
+ break;
+ }
+ wrbuf_write(b, outbuf, outp - outbuf);
+ }
+ }
+ else
+ wrbuf_write(b, buf, size);
+ return wrbuf_len(b);
+}
* Copyright (c) 2000-2003, Index Data
* See the file LICENSE for details.
*
- * $Id: zoom-c.c,v 1.10 2003-12-04 12:57:30 adam Exp $
+ * $Id: zoom-c.c,v 1.11 2003-12-11 00:37:22 adam Exp $
*
* ZOOM layer for C, connections, result sets, queries.
*/
ZOOM_options_get(c->options, "implementationName"),
odr_prepend(c->odr_out, "ZOOM-C", ireq->implementationName));
- version = odr_strdup(c->odr_out, "$Revision: 1.10 $");
+ version = odr_strdup(c->odr_out, "$Revision: 1.11 $");
if (strlen(version) > 10) /* check for unexpanded CVS strings */
version[strlen(version)-2] = '\0';
ireq->implementationVersion = odr_prepend(c->odr_out,
xfree (rec);
}
+static const char *marc_iconv_return(ZOOM_record rec, int marc_type,
+ int *len,
+ const char *buf, int sz,
+ const char *record_charset)
+{
+ char to[40];
+ char from[40];
+ yaz_iconv_t cd = 0;
+ yaz_marc_t mt = yaz_marc_create();
+
+ *from = '\0';
+ strcpy(to, "UTF-8");
+ if (record_charset && *record_charset)
+ {
+ /* Use "from,to" or just "from" */
+ const char *cp =strchr(record_charset, ',');
+ int clen = strlen(record_charset);
+ if (cp && cp[1])
+ {
+ strncpy( to, cp+1, sizeof(to)-1);
+ to[sizeof(to)-1] = '\0';
+ clen = cp - record_charset;
+ }
+ if (clen > sizeof(from)-1)
+ clen = sizeof(from)-1;
+
+ if (clen)
+ strncpy(from, record_charset, clen);
+ from[clen] = '\0';
+ }
+
+ if (*from && *to)
+ {
+ cd = yaz_iconv_open(to, from);
+ yaz_marc_iconv(mt, cd);
+ }
+
+ yaz_marc_xml(mt, marc_type);
+ if (!rec->wrbuf_marc)
+ rec->wrbuf_marc = wrbuf_alloc();
+ wrbuf_rewind (rec->wrbuf_marc);
+ if (yaz_marc_decode_wrbuf (mt, buf, sz, rec->wrbuf_marc) > 0)
+ {
+ yaz_marc_destroy(mt);
+ if (cd)
+ yaz_iconv_close(cd);
+ if (len)
+ *len = wrbuf_len(rec->wrbuf_marc);
+ return wrbuf_buf(rec->wrbuf_marc);
+ }
+ yaz_marc_destroy(mt);
+ if (cd)
+ yaz_iconv_close(cd);
+ return 0;
+}
+
static const char *record_iconv_return(ZOOM_record rec, int *len,
const char *buf, int sz,
const char *record_charset)
else if (r->which == Z_External_octet)
{
yaz_marc_t mt;
+ const char *ret_buf;
switch (ent->value)
{
case VAL_SOIF:
case VAL_APPLICATION_XML:
break;
default:
- if (!rec->wrbuf_marc)
- rec->wrbuf_marc = wrbuf_alloc();
-
- mt = yaz_marc_create();
- wrbuf_rewind (rec->wrbuf_marc);
- if (yaz_marc_decode_wrbuf (
- mt, (const char *) r->u.octet_aligned->buf,
- r->u.octet_aligned->len,
- rec->wrbuf_marc) > 0)
- {
- yaz_marc_destroy(mt);
- return record_iconv_return(rec, len,
- wrbuf_buf(rec->wrbuf_marc),
- wrbuf_len(rec->wrbuf_marc),
- charset);
- }
- yaz_marc_destroy(mt);
+ ret_buf = marc_iconv_return(
+ rec, YAZ_MARC_LINE, len,
+ (const char *) r->u.octet_aligned->buf,
+ r->u.octet_aligned->len,
+ charset);
+ if (ret_buf)
+ return ret_buf;
}
return record_iconv_return(rec, len,
(const char *) r->u.octet_aligned->buf,
charset);
else if (r->which == Z_External_octet)
{
+ const char *ret_buf;
yaz_marc_t mt;
int marc_decode_type = YAZ_MARC_MARCXML;
case VAL_APPLICATION_XML:
break;
default:
- if (!rec->wrbuf_marc)
- rec->wrbuf_marc = wrbuf_alloc();
- wrbuf_rewind (rec->wrbuf_marc);
- mt = yaz_marc_create();
-
- yaz_marc_xml(mt, YAZ_MARC_MARCXML);
- if (yaz_marc_decode_wrbuf (
- mt, (const char *) r->u.octet_aligned->buf,
- r->u.octet_aligned->len,
- rec->wrbuf_marc) > 0)
- {
- yaz_marc_destroy(mt);
- return record_iconv_return(rec, len,
- wrbuf_buf(rec->wrbuf_marc),
- wrbuf_len(rec->wrbuf_marc),
- charset);
- }
- yaz_marc_destroy(mt);
+ ret_buf = marc_iconv_return(
+ rec, marc_decode_type, len,
+ (const char *) r->u.octet_aligned->buf,
+ r->u.octet_aligned->len,
+ charset);
+ if (ret_buf)
+ return ret_buf;
}
return record_iconv_return(rec, len,
(const char *) r->u.octet_aligned->buf,
* See the file LICENSE for details.
* Sebastian Hammer, Adam Dickmeiss
*
- * $Id: marcdump.c,v 1.22 2003-02-25 18:35:49 adam Exp $
+ * $Id: marcdump.c,v 1.23 2003-12-11 00:37:23 adam Exp $
*/
#if HAVE_CONFIG_H
static void usage(const char *prog)
{
- fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-v] file...\n",
+ fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n",
prog);
}
#endif
#endif
- while ((r = options("vc:xOXf:t:", argv, argc, &arg)) != -2)
+ while ((r = options("vc:xOXIf:t:", argv, argc, &arg)) != -2)
{
int count;
no++;
case 'X':
xml = YAZ_MARC_MARCXML;
break;
+ case 'I':
+ xml = YAZ_MARC_ISO2709;
+ break;
case 0:
- inf = fopen (arg, "r");
+ inf = fopen (arg, "rb");
count = 0;
if (!inf)
{
"unsupported\n", from, to);
exit(2);
}
+ yaz_marc_iconv(mt, cd);
}
yaz_marc_xml(mt, xml);
yaz_marc_debug(mt, verbose);
r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
if (r <= 0)
break;
+#if 1
+ fwrite (result, rlen, 1, stdout);
+#else
if (!cd)
fwrite (result, rlen, 1, stdout);
else
fwrite (outbuf, outp - outbuf, 1, stdout);
}
}
-
+#endif
if (cfile)
{
char *p = buf;
count++;
if (cd)
yaz_iconv_close(cd);
+ yaz_marc_destroy(mt);
}
if (cfile)
fprintf (cfile, "};\n");
+ fclose(inf);
break;
case 'v':
verbose++;