to UTF-8/UCS conversion is now only based on codetables.xml.
Thanks to Larry Dixson for reporting this error.
Possible compatibility problems with earlier versions marked with '*'.
+Fixes for MARC-8 in yaz_iconv character set utilies. The MARC-8
+to UTF-8/UCS conversion is now only based on codetables.xml.
+
+yaz_marc_decode_buf sets leader pos 9 to "a" for MARCXML output.
+
--- 2.0.22 2004/08/06
Add support for more "commit changes" in ZOOM (uses Extended Services).
## Copyright (C) 1994-2004, Index Data
## All rights reserved.
-## $Id: Makefile.am,v 1.14 2004-08-07 08:06:57 adam Exp $
+## $Id: Makefile.am,v 1.15 2004-08-07 08:18:19 adam Exp $
if ISTHR
thrlib=libyazthread.la
illdata_DATA=ill9702.asn item-req.asn ill.tcl
EXTRA_DIST=$(tabdata_DATA) $(illdata_DATA) \
- charconv.tcl codetables.xml charconv.sgm
+ charconv.tcl codetables.xml
YAZCOMP = $(top_srcdir)/util/yaz-asncomp
YAZCOMPLINE = $(YAZCOMP) -d z.tcl -i yaz -I../include $(YCFLAGS)
AM_YFLAGS=-p cql_
THREADED_FLAGS = @CFLAGSTHREADS@
-# MARC8 conversion is generated from charconv.sgm + codetables.xml
-marc8.c: charconv.tcl charconv.sgm codetables.xml
+# MARC8 conversion is generated from codetables.xml
+marc8.c: charconv.tcl codetables.xml
cd $(srcdir); ./charconv.tcl -p marc8 codetables.xml -o marc8.c
libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \
* Copyright (c) 1997-2004, Index Data
* See the file LICENSE for details.
*
- * $Id: siconv.c,v 1.5 2004-03-16 13:12:43 adam Exp $
+ * $Id: siconv.c,v 1.6 2004-08-07 08:18:19 adam Exp $
*/
/* mini iconv and wrapper for system iconv library (if present) */
#include <yaz/yaz-util.h>
-unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+ size_t *no_read, int *combining);
struct yaz_iconv_struct {
int my_errno;
size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
char **outbuf, size_t *outbytesleft);
int marc8_esc_mode;
+ int marc8_comb_x;
+ int marc8_comb_no_read;
#if HAVE_ICONV_H
iconv_t iconv_cd;
#endif
static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
size_t inbytesleft, size_t *no_read)
{
+ if (cd->marc8_comb_x)
+ {
+ unsigned long x = cd->marc8_comb_x;
+ *no_read = cd->marc8_comb_no_read;
+ cd->marc8_comb_x = 0;
+ return x;
+ }
*no_read = 0;
while(inbytesleft >= 1 && inp[0] == 27)
{
else
{
unsigned long x;
+ int comb = 0;
size_t no_read_sub = 0;
switch(cd->marc8_esc_mode)
case 'B': /* Basic ASCII */
case 'E': /* ANSEL */
case 's': /* ASCII */
- x = yaz_marc8_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case 'g': /* Greek */
- x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case 'b': /* Subscripts */
- x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case 'p': /* Superscripts */
- x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case '2': /* Basic Hebrew */
- x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case 'N': /* Basic Cyrillic */
case 'Q': /* Extended Cyrillic */
- x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case '3': /* Basic Arabic */
case '4': /* Extended Arabic */
- x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case 'S': /* Greek */
- x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
case '1': /* Chinese, Japanese, Korean (EACC) */
- x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub);
+ x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
break;
default:
*no_read = 0;
cd->my_errno = YAZ_ICONV_EILSEQ;
return 0;
}
+#if 0
+ printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
+#endif
*no_read += no_read_sub;
+
+ if (comb && cd->marc8_comb_x == 0)
+ {
+ size_t tmp_read = 0;
+ unsigned long next_x;
+
+ /* read next char .. */
+ next_x = yaz_read_marc8(cd, inp + *no_read,
+ inbytesleft - *no_read, &tmp_read);
+ /* save this x for later .. */
+ cd->marc8_comb_x = x;
+ /* save next read for later .. */
+ cd->marc8_comb_no_read = tmp_read;
+ /* return next x - thereby swap */
+ x = next_x;
+ }
return x;
}
}
cd->init_handle = 0;
cd->my_errno = YAZ_ICONV_UNKNOWN;
cd->marc8_esc_mode = 'B';
+ cd->marc8_comb_x = 0;
/* a useful hack: if fromcode has leading @,
the library not use YAZ's own conversions .. */
* Copyright (c) 2002-2004, Index Data
* See the file LICENSE for details.
*
- * $Id: tsticonv.c,v 1.2 2004-03-15 21:39:06 adam Exp $
+ * $Id: tsticonv.c,v 1.3 2004-08-07 08:18:19 adam Exp $
*/
#if HAVE_CONFIG_H
"ax" ,
"\330",
"eneb\346r",
- "\xfc",
- "\xfb",
- "\xfbr",
0 };
/* same test strings in MARC-8 format */
"ax",
"\xa2", /* latin capital letter o with stroke */
"eneb\xb5r", /* latin small letter ae */
- "\xe8\x75", /* latin small letter u with umlaut */
- "\xe3\x75", /* latin small letter u with circumflex */
- "\xe3\x75r", /* latin small letter u with circumflex */
0
};
static void marc8_tst_b()
{
static const char *marc8_b[] = {
+ /* 0 */
"\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o",
+ /* 1 */
"\033$1" "\x6F\x77\x29" /* AE0E */ "\x6F\x52\x7C" /* c0F4 */ "\033(B",
+ /* 2 */
"\033$1"
- "\x21\x50\x6E" /* 7CFB */
- "\x21\x51\x31" /* 7D71 */
- "\x21\x3A\x67" /* 5B89 */
- "\x21\x33\x22" /* 5168 */
- "\x21\x33\x53" /* 5206 */
- "\x21\x44\x2B" /* 6790 */
+ "\x21\x50\x6E" /* UCS 7CFB */
+ "\x21\x51\x31" /* UCS 7D71 */
+ "\x21\x3A\x67" /* UCS 5B89 */
+ "\x21\x33\x22" /* UCS 5168 */
+ "\x21\x33\x53" /* UCS 5206 */
+ "\x21\x44\x2B" /* UCS 6790 */
"\033(B",
+ /* 3 */
+ "\xB0\xB2", /* AYN and oSLASH */
+ /* 4 */
+ "\xF6\x61", /* a underscore */
+ /* 5 */
+ "\x61\xC2", /* a, phonorecord mark */
0
};
static const char *ucs4_b[] = {
"\x00\x00\x51\x68"
"\x00\x00\x52\x06"
"\x00\x00\x67\x90",
+ "\x00\x00\x02\xBB" "\x00\x00\x00\xF8",
+ "\x00\x00\x00\x61" "\x00\x00\x03\x32",
+ "\x00\x00\x00\x61" "\x00\x00\x21\x17",
0
};
int i;
{
size_t r;
size_t len;
- size_t expect_len = (i == 2 ? 24 : 8);
+ size_t expect_len = i == 2 ? 24 : 8;
char *inbuf= (char*) marc8_b[i];
size_t inbytesleft = strlen(inbuf);
char outbuf0[24];
int main (int argc, char **argv)
{
+ yaz_log_init_file("tsticonv.log");
dconvert(1, "UTF-8");
dconvert(1, "ISO-8859-1");
dconvert(1, "UCS4");
## Copyright (C) 1994-2004, Index Data
## All rights reserved.
-## $Id: Makefile.am,v 1.27 2004-05-01 23:32:20 adam Exp $
-
-TESTS = $(check_PROGRAMS)
+## $Id: Makefile.am,v 1.28 2004-08-07 08:18:20 adam Exp $
bin_SCRIPTS = yaz-asncomp yaz-config
DISTCLEANFILES = yaz-config
-AM_CPPFLAGS=-I$(top_srcdir)/include
+AM_CPPFLAGS=-I$(top_srcdir)/include $(XML2_CFLAGS)
bin_PROGRAMS = yaz-marcdump yaz-iconv
noinst_PROGRAMS = cclsh cql2pqf cql2xcql srwtst yaz-benchmark
* Copyright (c) 1995-2004, Index Data
* See the file LICENSE for details.
*
- * $Id: marcdump.c,v 1.24 2004-08-04 09:30:30 adam Exp $
+ * $Id: marcdump.c,v 1.25 2004-08-07 08:18:20 adam Exp $
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif
+#if HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <assert.h>
#if HAVE_LOCALE_H
#include <locale.h>
prog);
}
+#if HAVE_XML2
+void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
+ xmlNodePtr cur;
+ int size;
+ int i;
+
+ assert(output);
+ size = (nodes) ? nodes->nodeNr : 0;
+
+ fprintf(output, "Result (%d nodes):\n", size);
+ for(i = 0; i < size; ++i) {
+ assert(nodes->nodeTab[i]);
+
+ if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
+ {
+ xmlNsPtr ns;
+
+ ns = (xmlNsPtr)nodes->nodeTab[i];
+ cur = (xmlNodePtr)ns->next;
+ if(cur->ns) {
+ fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n",
+ ns->prefix, ns->href, cur->ns->href, cur->name);
+ } else {
+ fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n",
+ ns->prefix, ns->href, cur->name);
+ }
+ }
+ else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
+ {
+ cur = nodes->nodeTab[i];
+ if(cur->ns) {
+ fprintf(output, "= element node \"%s:%s\"\n",
+ cur->ns->href, cur->name);
+ }
+ else
+ {
+ fprintf(output, "= element node \"%s\"\n",
+ cur->name);
+ }
+ }
+ else
+ {
+ cur = nodes->nodeTab[i];
+ fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
+ }
+ }
+}
+#endif
+
int main (int argc, char **argv)
{
int r;
+ int libxml_dom_test = 0;
+ int print_offset = 0;
char *arg;
int verbose = 0;
FILE *inf;
int xml = 0;
FILE *cfile = 0;
char *from = 0, *to = 0;
-
+ int num = 1;
#if HAVE_LOCALE_H
setlocale(LC_CTYPE, "");
#endif
#endif
- while ((r = options("vc:xOXIf:t:", argv, argc, &arg)) != -2)
+ while ((r = options("pvc:xOXIf:t:2", argv, argc, &arg)) != -2)
{
int count;
no++;
case 'I':
xml = YAZ_MARC_ISO2709;
break;
+ case 'p':
+ print_offset = 1;
+ break;
+ case '2':
+ libxml_dom_test = 1;
+ break;
case 0:
inf = fopen (arg, "rb");
count = 0;
r = fread (buf, 1, 5, inf);
if (r < 5)
+ {
+ if (r && print_offset)
+ printf ("Extra %d bytes", r);
break;
+ }
+ if (print_offset)
+ {
+ long off = ftell(inf);
+ printf ("Record %d offset %ld\n", num, (long) off);
+ }
len = atoi_n(buf, 5);
if (len < 25 || len > 100000)
break;
if (r <= 0)
break;
fwrite (result, rlen, 1, stdout);
+#if HAVE_XML2
+ if (libxml_dom_test)
+ {
+ xmlDocPtr doc = xmlParseMemory(result, rlen);
+ if (!doc)
+ fprintf(stderr, "xmLParseMemory failed\n");
+ else
+ {
+ int i;
+ xmlXPathContextPtr xpathCtx;
+ xmlXPathObjectPtr xpathObj;
+ static const char *xpathExpr[] = {
+ "/record/datafield[@tag='245']/subfield[@code='a']",
+ "/record/datafield[@tag='100']/subfield",
+ "/record/datafield[@tag='245']/subfield[@code='a']",
+ "/record/datafield[@tag='650']/subfield",
+ "/record/datafield[@tag='650']",
+ 0};
+
+ xpathCtx = xmlXPathNewContext(doc);
+
+ for (i = 0; xpathExpr[i]; i++) {
+ xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx);
+ if(xpathObj == NULL) {
+ fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
+ }
+ else
+ {
+ print_xpath_nodes(xpathObj->nodesetval, stdout);
+ xmlXPathFreeObject(xpathObj);
+ }
+ }
+ xmlXPathFreeContext(xpathCtx);
+ xmlFreeDoc(doc);
+ }
+ }
+#endif
if (cfile)
{
char *p = buf;
}
fprintf (cfile, "\"\n");
}
+ num++;
}
count++;
if (cd)
# Copyright (C) 1994-2004, Index Data
# All rights reserved.
-# $Id: makefile,v 1.78 2004-05-10 11:56:33 adam Exp $
+# $Id: makefile,v 1.79 2004-08-07 08:18:20 adam Exp $
#
# Programmed by
# HL: Heikki Levanto, Index Data
$(TCL) $(TCLOPT) -d ill.tcl item-req.asn
@cd $(WINDIR)
-$(SRCDIR)\marc8.c: $(SRCDIR)\charconv.sgm $(SRCDIR)\codetables.xml $(SRCDIR)\charconv.tcl
+$(SRCDIR)\marc8.c: $(SRCDIR)\codetables.xml $(SRCDIR)\charconv.tcl
@cd $(SRCDIR)
- $(TCL) charconv.tcl -O 1 -p marc8 charconv.sgm codetables.xml -o marc8.c
+ $(TCL) charconv.tcl -p marc8 codetables.xml -o marc8.c
!endif
-; $Id: yaz.nsi,v 1.49 2004-08-06 08:31:03 adam Exp $
+; $Id: yaz.nsi,v 1.50 2004-08-07 08:18:20 adam Exp $
!define VERSION "2.0.22"
File ..\src\*.y
File ..\src\*.tcl
File ..\src\*.asn
- File ..\src\charconv.sgm
File ..\src\codetables.xml
SetOutPath $INSTDIR\zoom
File ..\zoom\*.c