YAZ_EXPORT int yaz_marc_read_json_node(yaz_marc_t mt, struct json_node *n);
+/** \brief check if MARC21 is UTF-8 encoded
+ \param charset that is given by user
+ \param marc_buf ISO2709 buf
+ \param sz ISO2709 size
+ \retval 1 is probably UTF-8
+ \retval 0 is not UTF-8
+*/
+YAZ_EXPORT
+int yaz_marc_check_marc21_coding(const char *charset,
+ const char *marc_buf, int sz);
+
+YAZ_EXPORT
+int yaz_opac_check_marc21_coding(const char *charset, Z_OPACRecord *r);
+
YAZ_END_CDECL
#endif
mt->write_using_libxml2 = enable;
}
+int yaz_marc_check_marc21_coding(const char *charset,
+ const char *marc_buf, int sz)
+{
+ if ((!yaz_matchstr(charset, "MARC8?") ||
+ !yaz_matchstr(charset, "MARC8")) && marc_buf && sz > 25
+ && marc_buf[9] == 'a')
+ return 1;
+ return 0;
+}
+
/*
* Local variables:
* c-basic-offset: 4
yaz_opac_decode_wrbuf2(mt, r, wrbuf, 0);
}
+int yaz_opac_check_marc21_coding(const char *charset, Z_OPACRecord *r)
+{
+ if (r->bibliographicRecord)
+ {
+ Z_External *ext = r->bibliographicRecord;
+ if (ext->which == Z_External_octet)
+ {
+ return yaz_marc_check_marc21_coding(
+ charset,
+ (const char *) ext->u.octet_aligned->buf,
+ ext->u.octet_aligned->len);
+ }
+ }
+ return 0;
+}
+
/*
* Local variables:
* c-basic-offset: 4
static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
{
struct marc_info *mi = info;
+ const char *input_charset = mi->input_charset;
int ret = 0;
-
- yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
yaz_marc_t mt = yaz_marc_create();
yaz_marc_xml(mt, mi->output_format_mode);
if (mi->leader_spec)
yaz_marc_leader_spec(mt, mi->leader_spec);
- if (cd)
- yaz_marc_iconv(mt, cd);
if (mi->input_format_mode == YAZ_MARC_ISO2709)
{
int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
wrbuf_len(record));
if (sz > 0)
+ {
+ if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record),
+ wrbuf_len(record)))
+ input_charset = "utf-8";
ret = 0;
+ }
else
ret = -1;
}
}
if (ret == 0)
{
+ yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset);
+
+ if (cd)
+ yaz_marc_iconv(mt, cd);
+
wrbuf_rewind(record);
ret = yaz_marc_write_mode(mt, record);
if (ret)
wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
+ if (cd)
+ yaz_iconv_close(cd);
}
- if (cd)
- yaz_iconv_close(cd);
yaz_marc_destroy(mt);
return ret;
}
else
{
struct marc_info *mi = r->info;
+ const char *input_charset = mi->input_charset;
+ yaz_iconv_t cd;
WRBUF res = wrbuf_alloc();
yaz_marc_t mt = yaz_marc_create();
- yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
- mi->input_charset);
+
+ if (yaz_opac_check_marc21_coding(input_charset, input_record))
+ input_charset = "utf-8";
+ cd = yaz_iconv_open(mi->output_charset, input_charset);
wrbuf_rewind(p->wr_error);
yaz_marc_xml(mt, mi->output_format_mode);
#endif
static yaz_iconv_t iconv_create_charset(const char *record_charset,
- yaz_iconv_t *cd2)
+ yaz_iconv_t *cd2,
+ const char *marc_buf,
+ int sz)
{
char charset_buf[40];
yaz_iconv_t cd = 0;
}
if (from_set1)
+ {
+ if (yaz_marc_check_marc21_coding(from_set1, marc_buf, sz))
+ from_set1 = "utf-8";
cd = yaz_iconv_open(to_set, from_set1);
+ }
if (cd2)
{
if (from_set2)
const char *buf, int sz,
const char *record_charset)
{
- yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
+ yaz_iconv_t cd = iconv_create_charset(record_charset, 0, buf, sz);
yaz_marc_t mt = yaz_marc_create();
const char *ret_string = 0;
Z_OPACRecord *opac_rec,
const char *record_charset)
{
- yaz_iconv_t cd2;
- yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
+ yaz_iconv_t cd, cd2;
+ const char *marc_buf = 0;
+ int marc_sz = 0;
yaz_marc_t mt = yaz_marc_create();
+ if (opac_rec->bibliographicRecord)
+ {
+ Z_External *ext = opac_rec->bibliographicRecord;
+ if (ext->which == Z_External_octet)
+ {
+ marc_buf = (const char *) ext->u.octet_aligned->buf;
+ marc_sz = ext->u.octet_aligned->len;
+ }
+ }
+ cd = iconv_create_charset(record_charset, &cd2, marc_buf, marc_sz);
+
if (cd)
yaz_marc_iconv(mt, cd);
yaz_marc_xml(mt, marc_type);
const char *buf, int sz,
const char *record_charset)
{
- yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
+ yaz_iconv_t cd = iconv_create_charset(record_charset, 0, 0, 0);
if (cd)
{
yaz_record_conv_t p = 0;
const char *iso2709_rec =
- "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
+ "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x20\x32\x32\x30\x30\x30\x34"
"\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
"\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
"\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
size_t len_result;
size_t r;
char buf[100001];
+ yaz_iconv_t cd1 = 0;
r = fread(buf, 1, 5, inf);
if (r < 5)
}
}
len_result = rlen;
+
+ if (yaz_marc_check_marc21_coding(from, buf, 26))
+ {
+ cd1 = yaz_iconv_open(to, "utf-8");
+ if (cd1)
+ yaz_marc_iconv(mt, cd);
+ }
r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
+
+ if (cd1)
+ {
+ yaz_iconv_close(cd1);
+ yaz_marc_iconv(mt, cd);
+ }
+
if (r == -1)
no_errors++;
if (r > 0 && result && len_result)