{
unsigned char *outp = (unsigned char *) *outbuf;
- if (x == '@')
+ if (x == '@' || x == '*' || x == 0xa4)
{
if (*outbytesleft < 2)
{
yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
return (size_t)(-1);
}
- *outp++ = x;
+ *outp++ = '@';
(*outbytesleft)--;
*outp++ = x;
(*outbytesleft)--;
(*outbytesleft)--;
}
else
- { /* full unicode, emit @XXXX */
+ {
if (*outbytesleft < 6)
{
yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
return (size_t)(-1);
}
- sprintf(*outbuf, "@%04lX", x);
- outp += 5;
- (*outbytesleft) -= 5;
+ switch (x)
+ {
+ case 0xa733:
+ *outp++ = '@';
+ *outp++ = 0xe5;
+ (*outbytesleft) -= 2;
+ break;
+ case 0xa732:
+ *outp++ = '@';
+ *outp++ = 0xc5;
+ (*outbytesleft) -= 2;
+ break;
+ default:
+ /* full unicode, emit @XXXX */
+ sprintf(*outbuf, "@%04lX", x);
+ outp += 5;
+ (*outbytesleft) -= 5;
+ break;
+ }
}
*outbuf = (char *) outp;
return 0;
YAZ_CHECK(utf8_check(100000000));
}
-static void tst_danmarc_to_latin1(void)
+static void tst_danmarc_to_utf8(void)
{
- yaz_iconv_t cd = yaz_iconv_open("iso-8859-1", "danmarc");
+ yaz_iconv_t cd = yaz_iconv_open("utf-8", "danmarc");
YAZ_CHECK(cd);
if (!cd)
YAZ_CHECK(tst_convert(cd, "a@@b", "a@b"));
YAZ_CHECK(tst_convert(cd, "a@@@@b", "a@@b"));
- YAZ_CHECK(tst_convert(cd, "@000ab", "\nb"));
- YAZ_CHECK(tst_convert(cd, "@\xe5", "aa"));
- YAZ_CHECK(tst_convert(cd, "@\xc5.", "Aa."));
+ YAZ_CHECK(tst_convert(cd, "@*", "*"));
+ YAZ_CHECK(tst_convert(cd, "@@", "@"));
+ YAZ_CHECK(tst_convert(cd, "@\xa4", "\xC2\xA4"));
+ YAZ_CHECK(tst_convert(cd, "@\xe5", "\xEA\x9C\xB3"));
+ YAZ_CHECK(tst_convert(cd, "@\xc5.", "\xEA\x9C\xB2" "."));
+
+ YAZ_CHECK(tst_convert(cd, "@a733", "\xEA\x9C\xB3"));
+ YAZ_CHECK(tst_convert(cd, "@a732.", "\xEA\x9C\xB2" "."));
+
+ YAZ_CHECK(tst_convert(cd, "a@03BBb", "a\xce\xbb" "b")); /* lambda */
yaz_iconv_close(cd);
}
return;
YAZ_CHECK(tst_convert(cd, "ax", "ax"));
+
+ YAZ_CHECK(tst_convert(cd, "a@b", "a@@b"));
+ YAZ_CHECK(tst_convert(cd, "a@@b", "a@@@@b"));
+
+ YAZ_CHECK(tst_convert(cd, "*", "@*"));
YAZ_CHECK(tst_convert(cd, "@", "@@"));
+ YAZ_CHECK(tst_convert(cd, "\xC2\xA4", "@\xa4"));
+
YAZ_CHECK(tst_convert(cd, "a\xc3\xa5" "b", "a\xe5" "b")); /* aring */
YAZ_CHECK(tst_convert(cd, "a\xce\xbb" "b", "a@03BBb")); /* lambda */
+ YAZ_CHECK(tst_convert(cd, "\xEA\x9C\xB2" ".", "@\xc5."));
+ YAZ_CHECK(tst_convert(cd, "\xEA\x9C\xB3", "@\xe5"));
+
yaz_iconv_close(cd);
}
tst_utf8_to_marc8("marc8lossy");
tst_utf8_to_marc8("marc8lossless");
- tst_danmarc_to_latin1();
+ tst_danmarc_to_utf8();
tst_utf8_to_danmarc();
tst_latin1_to_marc8();