mime.c mime.h oid_util.c tokenizer.c \
record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \
copy_types.c match_glob.c poll.c daemon.c \
- iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c
+ iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \
+ iconv_decode_marc8.c
libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
#include "iconv-p.h"
-unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_advancegreek(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
unsigned long x = 0;
int shift = 0;
return 0;
}
+yaz_iconv_decoder_t yaz_advancegreek_decoder(const char *name,
+ yaz_iconv_decoder_t d)
+{
+ if (!yaz_matchstr(name, "advancegreek"))
+ {
+ d->read_handle = read_advancegreek;
+ return d;
+ }
+ return 0;
+}
+
/*
* Local variables:
* c-basic-offset: 4
*/
/**
* \file
- * \brief Internal header for conv
+ * \brief Internal header for iconv
*/
#ifndef ICONV_P_H
void yaz_iconv_set_errno(yaz_iconv_t cd, int no);
-unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-
-size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-
-
-unsigned long yaz_read_UCS4(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-unsigned long yaz_read_UCS4LE(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read);
-
typedef struct yaz_iconv_encoder_s *yaz_iconv_encoder_t;
struct yaz_iconv_encoder_s {
void *data;
int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2,
unsigned long *y);
+typedef struct yaz_iconv_decoder_s *yaz_iconv_decoder_t;
+struct yaz_iconv_decoder_s {
+ void *data;
+ size_t (*init_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inbuf,
+ size_t inbytesleft, size_t *no_read);
+ unsigned long (*read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inbuf,
+ size_t inbytesleft, size_t *no_read);
+ void (*destroy_handle)(yaz_iconv_decoder_t d);
+};
+
+yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_ucs4_decoder(const char *tocode,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_iso_8859_1_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_iso_5428_decoder(const char *name,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_advancegreek_decoder(const char *name,
+ yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_wchar_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d);
+
#endif
/*
* Local variables:
--- /dev/null
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2008 Index Data
+ * See the file LICENSE for details.
+ */
+/**
+ * \file
+ * \brief MARC-8 decoding
+ *
+ * MARC-8 reference:
+ * http://www.loc.gov/marc/specifications/speccharmarc8.html
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <yaz/xmalloc.h>
+#include "iconv-p.h"
+
+struct decoder_data {
+ int g0_mode;
+ int g1_mode;
+
+ int comb_offset;
+ int comb_size;
+ unsigned long comb_x[8];
+ size_t comb_no_read[8];
+};
+
+yaz_conv_func_t yaz_marc8_42_conv;
+yaz_conv_func_t yaz_marc8_45_conv;
+yaz_conv_func_t yaz_marc8_67_conv;
+yaz_conv_func_t yaz_marc8_62_conv;
+yaz_conv_func_t yaz_marc8_70_conv;
+yaz_conv_func_t yaz_marc8_32_conv;
+yaz_conv_func_t yaz_marc8_4E_conv;
+yaz_conv_func_t yaz_marc8_51_conv;
+yaz_conv_func_t yaz_marc8_33_conv;
+yaz_conv_func_t yaz_marc8_34_conv;
+yaz_conv_func_t yaz_marc8_53_conv;
+yaz_conv_func_t yaz_marc8_31_conv;
+
+
+static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+ struct decoder_data *data,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read,
+ int *comb);
+
+static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ struct decoder_data *data = d->data;
+ unsigned long x;
+ if (data->comb_offset < data->comb_size)
+ {
+ *no_read = data->comb_no_read[data->comb_offset];
+ x = data->comb_x[data->comb_offset];
+
+ /* special case for double-diacritic combining characters,
+ INVERTED BREVE and DOUBLE TILDE.
+ We'll increment the no_read counter by 1, since we want to skip over
+ the processing of the closing ligature character
+ */
+ /* this code is no longer necessary.. our handlers code in
+ yaz_marc8_?_conv (generated by charconv.tcl) now returns
+ 0 and no_read=1 when a sequence does not match the input.
+ The SECOND HALFs in codetables.xml produces a non-existant
+ entry in the conversion trie.. Hence when met, the input byte is
+ skipped as it should (in yaz_iconv)
+ */
+#if 0
+ if (x == 0x0361 || x == 0x0360)
+ *no_read += 1;
+#endif
+ data->comb_offset++;
+ return x;
+ }
+
+ data->comb_offset = 0;
+ for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
+ {
+ int comb = 0;
+
+ if (inbytesleft == 0 && data->comb_size)
+ {
+ yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
+ x = 0;
+ *no_read = 0;
+ break;
+ }
+ x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
+ if (!comb || !x)
+ break;
+ data->comb_x[data->comb_size] = x;
+ data->comb_no_read[data->comb_size] = *no_read;
+ inp += *no_read;
+ inbytesleft = inbytesleft - *no_read;
+ }
+ return x;
+}
+
+static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ struct decoder_data *data = d->data;
+ unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
+ if (x && data->comb_size == 1)
+ {
+ if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
+ {
+ *no_read += data->comb_no_read[0];
+ data->comb_size = 0;
+ }
+ }
+ return x;
+}
+
+static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+ struct decoder_data *data,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read,
+ int *comb)
+{
+ *no_read = 0;
+ while (inbytesleft > 0 && *inp == 27)
+ {
+ int *modep = &data->g0_mode;
+ size_t inbytesleft0 = inbytesleft;
+
+ inbytesleft--;
+ inp++;
+ if (inbytesleft == 0)
+ goto incomplete;
+ if (*inp == '$') /* set with multiple bytes */
+ {
+ inbytesleft--;
+ inp++;
+ }
+ if (inbytesleft == 0)
+ goto incomplete;
+ if (*inp == '(' || *inp == ',') /* G0 */
+ {
+ inbytesleft--;
+ inp++;
+ }
+ else if (*inp == ')' || *inp == '-') /* G1 */
+ {
+ inbytesleft--;
+ inp++;
+ modep = &data->g1_mode;
+ }
+ if (inbytesleft == 0)
+ goto incomplete;
+ if (*inp == '!') /* ANSEL is a special case */
+ {
+ inbytesleft--;
+ inp++;
+ }
+ if (inbytesleft == 0)
+ goto incomplete;
+ *modep = *inp++; /* Final character */
+ inbytesleft--;
+
+ (*no_read) += inbytesleft0 - inbytesleft;
+ }
+ if (inbytesleft == 0)
+ return 0;
+ else if (*inp == ' ')
+ {
+ *no_read += 1;
+ return ' ';
+ }
+ else
+ {
+ unsigned long x;
+ size_t no_read_sub = 0;
+ int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
+ *comb = 0;
+
+ switch(mode)
+ {
+ case 'B': /* Basic ASCII */
+ case 's': /* ASCII */
+ x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'E': /* ANSEL */
+ x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
+ break;
+ case 'g': /* Greek */
+ x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'b': /* Subscripts */
+ x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'p': /* Superscripts */
+ x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case '2': /* Basic Hebrew */
+ x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'N': /* Basic Cyrillic */
+ x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'Q': /* Extended Cyrillic */
+ x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case '3': /* Basic Arabic */
+ x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case '4': /* Extended Arabic */
+ x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case 'S': /* Greek */
+ x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ case '1': /* Chinese, Japanese, Korean (EACC) */
+ x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
+ default:
+ *no_read = 0;
+ yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
+ return 0;
+ }
+ *no_read += no_read_sub;
+ return x;
+ }
+incomplete:
+ *no_read = 0;
+ yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
+ return 0;
+}
+
+
+static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ struct decoder_data *data = d->data;
+ data->g0_mode = 'B';
+ data->g1_mode = 'E';
+ data->comb_offset = data->comb_size = 0;
+ return 0;
+}
+
+void destroy_marc8(yaz_iconv_decoder_t d)
+{
+ struct decoder_data *data = d->data;
+ xfree(data);
+}
+
+yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d)
+{
+ if (!yaz_matchstr(fromcode, "MARC8"))
+ d->read_handle = read_marc8;
+ else if (!yaz_matchstr(fromcode, "MARC8s"))
+ d->read_handle = read_marc8s;
+ else
+ return 0;
+ {
+ struct decoder_data *data = xmalloc(sizeof(*data));
+ d->data = data;
+ d->init_handle = init_marc8;
+ d->destroy_handle = destroy_marc8;
+ }
+ return d;
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
#include <string.h>
#include <ctype.h>
-#if HAVE_ICONV_H
-#include <iconv.h>
-#endif
-
#include <yaz/xmalloc.h>
-#include <yaz/nmem.h>
#include "iconv-p.h"
struct encoder_data
return 0;
}
+static unsigned long read_ISO8859_1(yaz_iconv_t cd,
+ yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ unsigned long x = inp[0];
+ *no_read = 1;
+ return x;
+}
+
+yaz_iconv_decoder_t yaz_iso_8859_1_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d)
+
+{
+ if (!yaz_matchstr(fromcode, "iso88591"))
+ {
+ d->read_handle = read_ISO8859_1;
+ return d;
+ }
+ return 0;
+}
+
/*
* Local variables:
#include <ctype.h>
#include <yaz/xmalloc.h>
-#include <yaz/nmem.h>
#include <yaz/snprintf.h>
#include "iconv-p.h"
#endif
#include <yaz/xmalloc.h>
-#include <yaz/nmem.h>
-#include <yaz/snprintf.h>
#include "iconv-p.h"
struct encoder_data
return 0;
}
+#if HAVE_WCHAR_H
+static unsigned long read_wchar_t(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ unsigned long x = 0;
+
+ if (inbytesleft < sizeof(wchar_t))
+ {
+ yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); /* incomplete input */
+ *no_read = 0;
+ }
+ else
+ {
+ wchar_t wch;
+ memcpy(&wch, inp, sizeof(wch));
+ x = wch;
+ *no_read = sizeof(wch);
+ }
+ return x;
+}
+#endif
+
+yaz_iconv_decoder_t yaz_wchar_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d)
+
+{
+#if HAVE_WCHAR_H
+ if (!yaz_matchstr(fromcode, "wchar_t"))
+ {
+ d->read_handle = read_wchar_t;
+ return d;
+ }
+#endif
+ return 0;
+}
+
/*
* Local variables:
#include "iconv-p.h"
-unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_iso_5428_1984(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
unsigned long x = 0;
int tonos = 0;
return 0;
}
+yaz_iconv_decoder_t yaz_iso_5428_decoder(const char *name,
+ yaz_iconv_decoder_t d)
+{
+ if (!yaz_matchstr(name, "iso54281984")
+ || !yaz_matchstr(name, "iso5428:1984"))
+ {
+ d->read_handle = read_iso_5428_1984;
+ return d;
+ }
+ return 0;
+}
+
+
/*
* Local variables:
#include <errno.h>
#include <string.h>
#include <ctype.h>
-#if HAVE_WCHAR_H
-#include <wchar.h>
-#endif
#if HAVE_ICONV_H
#include <iconv.h>
#include <yaz/nmem.h>
#include "iconv-p.h"
-yaz_conv_func_t yaz_marc8_42_conv;
-yaz_conv_func_t yaz_marc8_45_conv;
-yaz_conv_func_t yaz_marc8_67_conv;
-yaz_conv_func_t yaz_marc8_62_conv;
-yaz_conv_func_t yaz_marc8_70_conv;
-yaz_conv_func_t yaz_marc8_32_conv;
-yaz_conv_func_t yaz_marc8_4E_conv;
-yaz_conv_func_t yaz_marc8_51_conv;
-yaz_conv_func_t yaz_marc8_33_conv;
-yaz_conv_func_t yaz_marc8_34_conv;
-yaz_conv_func_t yaz_marc8_53_conv;
-yaz_conv_func_t yaz_marc8_31_conv;
-
struct yaz_iconv_struct {
int my_errno;
int init_flag;
+#if 0
size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
size_t inbytesleft, size_t *no_read);
unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
size_t inbytesleft, size_t *no_read);
- int g0_mode;
- int g1_mode;
-
- int comb_offset;
- int comb_size;
- unsigned long comb_x[8];
- size_t comb_no_read[8];
+#endif
size_t no_read_x;
unsigned long unget_x;
#if HAVE_ICONV_H
iconv_t iconv_cd;
#endif
struct yaz_iconv_encoder_s encoder;
+ struct yaz_iconv_decoder_s decoder;
};
-static unsigned long yaz_read_ISO8859_1(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = inp[0];
- *no_read = 1;
- return x;
-}
-
-#if HAVE_WCHAR_H
-static unsigned long yaz_read_wchar_t(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = 0;
-
- if (inbytesleft < sizeof(wchar_t))
- {
- cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
- *no_read = 0;
- }
- else
- {
- wchar_t wch;
- memcpy(&wch, inp, sizeof(wch));
- x = wch;
- *no_read = sizeof(wch);
- }
- return x;
-}
-#endif
-
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb);
-
-static unsigned long yaz_read_marc8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x;
- if (cd->comb_offset < cd->comb_size)
- {
- *no_read = cd->comb_no_read[cd->comb_offset];
- x = cd->comb_x[cd->comb_offset];
-
- /* special case for double-diacritic combining characters,
- INVERTED BREVE and DOUBLE TILDE.
- We'll increment the no_read counter by 1, since we want to skip over
- the processing of the closing ligature character
- */
- /* this code is no longer necessary.. our handlers code in
- yaz_marc8_?_conv (generated by charconv.tcl) now returns
- 0 and no_read=1 when a sequence does not match the input.
- The SECOND HALFs in codetables.xml produces a non-existant
- entry in the conversion trie.. Hence when met, the input byte is
- skipped as it should (in yaz_iconv)
- */
-#if 0
- if (x == 0x0361 || x == 0x0360)
- *no_read += 1;
-#endif
- cd->comb_offset++;
- return x;
- }
-
- cd->comb_offset = 0;
- for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
- {
- int comb = 0;
-
- if (inbytesleft == 0 && cd->comb_size)
- {
- cd->my_errno = YAZ_ICONV_EINVAL;
- x = 0;
- *no_read = 0;
- break;
- }
- x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
- if (!comb || !x)
- break;
- cd->comb_x[cd->comb_size] = x;
- cd->comb_no_read[cd->comb_size] = *no_read;
- inp += *no_read;
- inbytesleft = inbytesleft - *no_read;
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
- if (x && cd->comb_size == 1)
- {
- if (yaz_iso_8859_1_lookup_x12(x, cd->comb_x[0], &x))
- {
- *no_read += cd->comb_no_read[0];
- cd->comb_size = 0;
- }
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb)
-{
- *no_read = 0;
- while (inbytesleft > 0 && *inp == 27)
- {
- int *modep = &cd->g0_mode;
- size_t inbytesleft0 = inbytesleft;
-
- inbytesleft--;
- inp++;
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '$') /* set with multiple bytes */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '(' || *inp == ',') /* G0 */
- {
- inbytesleft--;
- inp++;
- }
- else if (*inp == ')' || *inp == '-') /* G1 */
- {
- inbytesleft--;
- inp++;
- modep = &cd->g1_mode;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '!') /* ANSEL is a special case */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- *modep = *inp++; /* Final character */
- inbytesleft--;
-
- (*no_read) += inbytesleft0 - inbytesleft;
- }
- if (inbytesleft == 0)
- return 0;
- else if (*inp == ' ')
- {
- *no_read += 1;
- return ' ';
- }
- else
- {
- unsigned long x;
- size_t no_read_sub = 0;
- int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
- *comb = 0;
-
- switch(mode)
- {
- case 'B': /* Basic ASCII */
- case 's': /* ASCII */
- x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'E': /* ANSEL */
- x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
- break;
- case 'g': /* Greek */
- x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'b': /* Subscripts */
- x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'p': /* Superscripts */
- x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '2': /* Basic Hebrew */
- x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'N': /* Basic Cyrillic */
- x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'Q': /* Extended Cyrillic */
- x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '3': /* Basic Arabic */
- x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '4': /* Extended Arabic */
- x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'S': /* Greek */
- x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '1': /* Chinese, Japanese, Korean (EACC) */
- x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- default:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return 0;
- }
- *no_read += no_read_sub;
- return x;
- }
-incomplete:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EINVAL;
- return 0;
-}
-
-
-
int yaz_iconv_isbuiltin(yaz_iconv_t cd)
{
- return cd->read_handle && cd->encoder.write_handle;
+ return cd->decoder.read_handle && cd->encoder.write_handle;
}
return 0;
}
+static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
+{
+ if (yaz_marc8_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_utf8_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_ucs4_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_iso_5428_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_advancegreek_decoder(tocode, &cd->decoder))
+ return 1;
+ if (yaz_wchar_decoder(tocode, &cd->decoder))
+ return 1;
+ return 0;
+}
+
yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
{
yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
cd->encoder.init_handle = 0;
cd->encoder.destroy_handle = 0;
- cd->read_handle = 0;
- cd->init_handle = 0;
+ cd->decoder.data = 0;
+ cd->decoder.read_handle = 0;
+ cd->decoder.init_handle = 0;
+ cd->decoder.destroy_handle = 0;
+
cd->my_errno = YAZ_ICONV_UNKNOWN;
/* a useful hack: if fromcode has leading @,
fromcode++;
else
{
- if (!yaz_matchstr(fromcode, "UTF8"))
- {
- cd->read_handle = yaz_read_UTF8;
- cd->init_handle = yaz_init_UTF8;
- }
- else if (!yaz_matchstr(fromcode, "ISO88591"))
- cd->read_handle = yaz_read_ISO8859_1;
- else if (!yaz_matchstr(fromcode, "UCS4"))
- cd->read_handle = yaz_read_UCS4;
- else if (!yaz_matchstr(fromcode, "UCS4LE"))
- cd->read_handle = yaz_read_UCS4LE;
- else if (!yaz_matchstr(fromcode, "MARC8"))
- cd->read_handle = yaz_read_marc8;
- else if (!yaz_matchstr(fromcode, "MARC8s"))
- cd->read_handle = yaz_read_marc8s;
- else if (!yaz_matchstr(fromcode, "advancegreek"))
- cd->read_handle = yaz_read_advancegreek;
- else if (!yaz_matchstr(fromcode, "iso54281984"))
- cd->read_handle = yaz_read_iso5428_1984;
- else if (!yaz_matchstr(fromcode, "iso5428:1984"))
- cd->read_handle = yaz_read_iso5428_1984;
-#if HAVE_WCHAR_H
- else if (!yaz_matchstr(fromcode, "WCHAR_T"))
- cd->read_handle = yaz_read_wchar_t;
-#endif
prepare_encoders(cd, tocode);
+ prepare_decoders(cd, fromcode);
}
- if (cd->read_handle && cd->encoder.write_handle)
+ if (cd->decoder.read_handle && cd->encoder.write_handle)
{
#if HAVE_ICONV_H
- cd->iconv_cd = 0;
+ cd->iconv_cd = (iconv_t) (-1);
#endif
;
}
size_t r = 0;
#if HAVE_ICONV_H
- if (cd->iconv_cd)
+ if (cd->iconv_cd != (iconv_t) (-1))
{
size_t r =
iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
if (cd->init_flag)
{
cd->my_errno = YAZ_ICONV_UNKNOWN;
- cd->g0_mode = 'B';
- cd->g1_mode = 'E';
- cd->comb_offset = cd->comb_size = 0;
-
if (cd->encoder.init_handle)
(*cd->encoder.init_handle)(&cd->encoder);
cd->unget_x = 0;
cd->no_read_x = 0;
- if (cd->init_handle && inbuf && *inbuf)
+ if (cd->decoder.init_handle)
{
size_t no_read = 0;
- size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
- *inbytesleft, &no_read);
+ size_t r = (cd->decoder.init_handle)(
+ cd, &cd->decoder,
+ inbuf ? (unsigned char *) *inbuf : 0,
+ inbytesleft ? *inbytesleft : 0,
+ &no_read);
if (r)
{
if (cd->my_errno == YAZ_ICONV_EINVAL)
cd->init_flag = 0;
return r;
}
- *inbytesleft -= no_read;
- *inbuf += no_read;
+ if (inbytesleft)
+ *inbytesleft -= no_read;
+ if (inbuf)
+ *inbuf += no_read;
}
}
cd->init_flag = 0;
r = *inbuf - inbuf0;
break;
}
- x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
- &no_read);
+ x = (*cd->decoder.read_handle)(
+ cd, &cd->decoder,
+ (unsigned char *) *inbuf, *inbytesleft, &no_read);
if (no_read == 0)
{
r = (size_t)(-1);
int yaz_iconv_close(yaz_iconv_t cd)
{
#if HAVE_ICONV_H
- if (cd->iconv_cd)
+ if (cd->iconv_cd != (iconv_t) (-1))
iconv_close(cd->iconv_cd);
#endif
if (cd->encoder.destroy_handle)
(*cd->encoder.destroy_handle)(&cd->encoder);
+ if (cd->decoder.destroy_handle)
+ (*cd->decoder.destroy_handle)(&cd->decoder);
xfree(cd);
return 0;
}
#include "iconv-p.h"
-unsigned long yaz_read_UCS4(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_UCS4(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
unsigned long x = 0;
return x;
}
-unsigned long yaz_read_UCS4LE(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_UCS4LE(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
unsigned long x = 0;
return e;
}
+yaz_iconv_decoder_t yaz_ucs4_decoder(const char *tocode,
+ yaz_iconv_decoder_t d)
+
+{
+ if (!yaz_matchstr(tocode, "UCS4"))
+ d->read_handle = read_UCS4;
+ else if (!yaz_matchstr(tocode, "UCS4LE"))
+ d->read_handle = read_UCS4LE;
+ else
+ return 0;
+ return d;
+}
+
/*
#include "iconv-p.h"
-size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
if (inp[0] != 0xef)
{
return x;
}
-unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
int err = 0;
int r = yaz_read_UTF8_char(inp, inbytesleft, no_read, &err);
return 0;
}
+yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d)
+{
+ if (!yaz_matchstr(fromcode, "UTF8"))
+ {
+ d->init_handle = init_utf8;
+ d->read_handle = read_utf8;
+ return d;
+ }
+ return 0;
+}
+
/*
* Local variables:
$(OBJDIR)\daemon.obj \
$(OBJDIR)\iconv_encode_iso_8859_1.obj \
$(OBJDIR)\iconv_encode_marc8.obj \
+ $(OBJDIR)\iconv_decode_marc8.obj \
$(OBJDIR)\iconv_encode_wchar.obj
Z3950_OBJS= \