From 6d01d7cb60123e1e00db766198edc3ba8488d0ba Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 22 May 2003 22:44:50 +0000 Subject: [PATCH] Fix MARC8 conversion --- CHANGELOG | 2 ++ util/Makefile.am | 8 ++++---- util/charconv.tcl | 6 +++--- util/tsticonv.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d7331c2..dcf97c0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Possible compatibility problems with earlier versions marked with '*'. +Bug fix: some MARC8 sequences were not converted. + New ZOOM option "step" which specifies number of records to be retrieved in one chunk. Used in conjunction with "start" and "count". diff --git a/util/Makefile.am b/util/Makefile.am index 8fb85d8..3d72fc8 100644 --- a/util/Makefile.am +++ b/util/Makefile.am @@ -1,10 +1,12 @@ ## Copyright (C) 1994-2003, Index Data ## All rights reserved. -## $Id: Makefile.am,v 1.18 2003-05-06 10:07:33 adam Exp $ +## $Id: Makefile.am,v 1.19 2003-05-22 22:44:50 adam Exp $ noinst_LTLIBRARIES = libutil.la -TESTS = tsticonv tstnmem tstmatchstr tstwrbuf +check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf + +TESTS = $(check_PROGRAMS) bin_SCRIPTS = yaz-comp @@ -14,8 +16,6 @@ AM_CPPFLAGS=-I$(top_srcdir)/include noinst_PROGRAMS = marcdump yaziconv -EXTRA_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf - # MARC dumper utility marcdump_LDADD = libutil.la marcdump_SOURCES = marcdump.c diff --git a/util/charconv.tcl b/util/charconv.tcl index befad0f..ae4e15e 100755 --- a/util/charconv.tcl +++ b/util/charconv.tcl @@ -2,7 +2,7 @@ # the next line restats using tclsh \ exec tclsh "$0" "$@" # -# $Id: charconv.tcl,v 1.1 2002-12-16 13:13:53 adam Exp $ +# $Id: charconv.tcl,v 1.2 2003-05-22 22:44:50 adam Exp $ proc usage {} { puts {charconv.tcl: [-p prefix] [-s split] input output} @@ -103,7 +103,7 @@ proc dump_trie {ofile} { foreach m $trie($this,content) { puts -nonewline $f " \{\"" foreach d [lindex $m 0] { - puts -nonewline $f "\\0x$d" + puts -nonewline $f "\\x$d" } puts -nonewline $f "\", 0x[lindex $m 1]" puts $f "\}," @@ -175,7 +175,7 @@ proc dump_trie {ofile} { while (flat->from) { size_t len = strlen(flat->from); - if (len >= inbytesleft) + if (len <= inbytesleft) { if (memcmp(flat->from, inp, len) == 0) { diff --git a/util/tsticonv.c b/util/tsticonv.c index 8f4653f..f5b71dc 100644 --- a/util/tsticonv.c +++ b/util/tsticonv.c @@ -2,7 +2,7 @@ * Copyright (c) 2002-2003, Index Data * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.2 2003-05-06 10:07:33 adam Exp $ + * $Id: tsticonv.c,v 1.3 2003-05-22 22:44:50 adam Exp $ */ #if HAVE_CONFIG_H @@ -17,10 +17,58 @@ /* some test strings in ISO-8859-1 format */ const char *buf[] = { - "ax" , - "\330", - "eneb\346r", - 0 }; + "ax" , + "\330", + "eneb\346r", + "\xfc", + "\xfb", + "\xfbr", + 0 }; + +/* some test strings in MARC-8 format */ +const char *marc8_strings[] = { + "ax", + "\xa2", /* latin capital letter o with stroke */ + "eneb\xb5r", /* latin small letter ae */ + "\xe8\x75", /* latin small letter u with umlaut */ + "\xe3\x75", /* latin small letter u with circumflex */ + "\xe3\x75r", /* latin small letter u with circumflex */ + 0 +}; + +static marc8_tst() +{ + int i; + yaz_iconv_t cd; + + cd = yaz_iconv_open("ISO-8859-1", "MARC8"); + for (i = 0; buf[i]; i++) + { + size_t r; + char *inbuf= (char*) marc8_strings[i]; + size_t inbytesleft = strlen(inbuf); + char outbuf0[24]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0); + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t) (-1)) + { + int e = yaz_iconv_error(cd); + + printf ("tsticonv 6 i=%d e=%d\n", i, e); + exit(6); + } + if ((outbuf - outbuf0) != strlen(buf[i]) + || memcmp(outbuf0, buf[i], strlen(buf[i]))) + { + printf ("tsticonv 7 i=%d\n", i); + printf ("buf=%s out=%s\n", buf[i], outbuf0); + exit(7); + } + } + yaz_iconv_close(cd); +} static dconvert(int mandatory, const char *tmpcode) { @@ -91,5 +139,6 @@ int main (int argc, char **argv) dconvert(1, "ISO-8859-1"); dconvert(1, "UCS4"); dconvert(0, "CP865"); + marc8_tst(); exit (0); } -- 1.7.10.4