From 7412ef1489d4d607641834d1009adc51d422608b Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Mon, 7 May 2007 12:18:34 +0000 Subject: [PATCH] updated ICU casemap wrappers to use dynamic buffers, all ICU tests succeed --- src/icu_I18N.c | 56 ++++++++++++++-- src/icu_I18N.h | 8 ++- src/test_icu_I18N.c | 181 ++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 217 insertions(+), 28 deletions(-) diff --git a/src/icu_I18N.c b/src/icu_I18N.c index 6dd150e..846ad4b 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.c,v 1.5 2007-05-07 09:31:36 marc Exp $ +/* $Id: icu_I18N.c,v 1.6 2007-05-07 12:18:34 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -202,7 +202,8 @@ UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, (const char *) src8->utf8, src8->utf8_len, status); } - if (*status != U_BUFFER_OVERFLOW_ERROR + //if (*status != U_BUFFER_OVERFLOW_ERROR + if (U_SUCCESS(*status) && utf16_len < dest16->utf16_cap) dest16->utf16_len = utf16_len; else { @@ -239,7 +240,8 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, src8cstr, src8cstr_len, status); } - if (*status != U_BUFFER_OVERFLOW_ERROR + // if (*status != U_BUFFER_OVERFLOW_ERROR + if (U_SUCCESS(*status) && utf16_len < dest16->utf16_cap) dest16->utf16_len = utf16_len; else { @@ -251,6 +253,45 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, }; + + +UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, + struct icu_buf_utf16 * src16, + UErrorCode * status) +{ + int32_t utf8_len = 0; + + u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap, + &utf8_len, + src16->utf16, src16->utf16_len, status); + + // check for buffer overflow, resize and retry + if (*status == U_BUFFER_OVERFLOW_ERROR + //|| dest8->utf8_len > dest8->utf8_cap + ){ + icu_buf_utf8_resize(dest8, utf8_len * 2); + *status = U_ZERO_ERROR; + u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap, + &utf8_len, + src16->utf16, src16->utf16_len, status); + + } + + //if (*status != U_BUFFER_OVERFLOW_ERROR + if (U_SUCCESS(*status) + && utf8_len < dest8->utf8_cap) + dest8->utf8_len = utf8_len; + else { + dest8->utf8[0] = (uint8_t) 0; + dest8->utf8_len = 0; + } + + return *status; +}; + + + + UErrorCode icu_sortkey8_from_utf16(UCollator *coll, struct icu_buf_utf8 * dest8, struct icu_buf_utf16 * src16, @@ -269,9 +310,14 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll, dest8->utf8, dest8->utf8_cap); } - if (sortkey_len > 0) + if (U_SUCCESS(*status) + && sortkey_len > 0) dest8->utf8_len = sortkey_len; - + else { + dest8->utf8[0] = (UChar) 0; + dest8->utf8_len = 0; + } + return *status; }; diff --git a/src/icu_I18N.h b/src/icu_I18N.h index eb44204..b0a91d9 100644 --- a/src/icu_I18N.h +++ b/src/icu_I18N.h @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.h,v 1.5 2007-05-07 09:31:36 marc Exp $ +/* $Id: icu_I18N.h,v 1.6 2007-05-07 12:18:34 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -76,6 +76,12 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, const char * src8cstr, UErrorCode * status); + +UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, + struct icu_buf_utf16 * src16, + UErrorCode * status); + + UErrorCode icu_sortkey8_from_utf16(UCollator *coll, struct icu_buf_utf8 * dest8, struct icu_buf_utf16 * src16, diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 065a8f0..93428ad 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.8 2007-05-07 09:31:36 marc Exp $ +/* $Id: test_icu_I18N.c,v 1.9 2007-05-07 12:18:34 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -37,18 +37,11 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_ICU #include "icu_I18N.h" + #include #include -#include - - -#include /* some more string fcns*/ -#include /* char names */ -//#include -//#include /* Basic ICU data types */ -#include - +#include // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 @@ -56,22 +49,160 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA struct icu_termmap { - uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated - char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string + uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated + char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string }; int icu_termmap_cmp(const void *vp1, const void *vp2) { - struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1; - struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2; + struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1; + struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2; + + int cmp = 0; + + cmp = strcmp((const char *)itmp1->sort_key, + (const char *)itmp2->sort_key); + return cmp; +}; + + + +int icu_utf16_casemap(struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16, + const char *locale, char action, + UErrorCode *status) +{ + int32_t dest16_len = 0; + + switch(action) { + case 'l': + dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + locale, status); + break; + case 'u': + dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + locale, status); + break; + case 't': + dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + 0, locale, status); + break; + case 'f': + dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + U_FOLD_CASE_DEFAULT, status); + break; + + default: + return U_UNSUPPORTED_ERROR; + break; + } + + // check for buffer overflow, resize and retry + if (*status == U_BUFFER_OVERFLOW_ERROR + //|| dest16_len > dest16->utf16_cap + ){ + icu_buf_utf16_resize(dest16, dest16_len * 2); + *status = U_ZERO_ERROR; - int cmp = 0; - cmp = strcmp((const char *)itmp1->sort_key, - (const char *)itmp2->sort_key); - return cmp; + switch(action) { + case 'l': + dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + locale, status); + break; + case 'u': + dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + locale, status); + break; + case 't': + dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + 0, locale, status); + break; + case 'f': + dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap, + src16->utf16, src16->utf16_len, + U_FOLD_CASE_DEFAULT, status); + break; + + default: + return U_UNSUPPORTED_ERROR; + break; + } + } + + if (U_SUCCESS(*status) + && dest16_len < dest16->utf16_cap) + dest16->utf16_len = dest16_len; + else { + dest16->utf16[0] = (UChar) 0; + dest16->utf16_len = 0; + } + + return *status; +}; + + + +int test_icu_casemap(const char * locale, char action, + const char * src8cstr, const char * chk8cstr) +{ + int success = 0; + UErrorCode status = U_ZERO_ERROR; + + struct icu_buf_utf8 * src8 = icu_buf_utf8_create(0); + struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0); + struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0); + struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0); + + + int src8cstr_len = strlen(src8cstr); + int chk8cstr_len = strlen(chk8cstr); + + // converting to UTF16 + icu_utf16_from_utf8_cstr(src16, src8cstr, &status); + + // perform case mapping + icu_utf16_casemap(dest16, src16, locale, action, &status); + + // converting to UTF8 + icu_utf16_to_utf8(dest8, dest16, &status); + + + + // determine success + if (dest8->utf8 + && (dest8->utf8_len == strlen(chk8cstr)) + && !strcmp(chk8cstr, (const char *) dest8->utf8)) + success = 1; + else + success = 0; + + // report failures + if (!success){ + printf("\nERROR\n"); + printf("original string: '%s' (%d)\n", src8cstr, src8cstr_len); + printf("icu_casemap '%s:%c' '%s' (%d)\n", + locale, action, dest8->utf8, dest8->utf8_len); + printf("expected string: '%s' (%d)\n", chk8cstr, chk8cstr_len); + } + + // clean the buffers + icu_buf_utf8_destroy(src8); + icu_buf_utf8_destroy(dest8); + icu_buf_utf16_destroy(src16); + icu_buf_utf16_destroy(dest16); + + + return success; } @@ -109,6 +240,9 @@ int test_icu_casemap(const char * locale, char action, return sucess; } +#endif + + // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 void test_icu_I18N_casemap(int argc, char **argv) @@ -129,11 +263,10 @@ void test_icu_I18N_casemap(int argc, char **argv) "A ReD fOx hunTS sQUirriLs", "a red fox hunts squirrils")); - // this one fails and needs more investigation .. - YAZ_CHECK(0 == test_icu_casemap("en", 't', + YAZ_CHECK(test_icu_casemap("en", 't', "A ReD fOx hunTS sQUirriLs", "A Red Fox Hunts Squirrils")); - + // Locale 'da' @@ -176,6 +309,8 @@ void test_icu_I18N_casemap(int argc, char **argv) } +#if 0 + // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 void test_icu_I18N_casemap_failures(int argc, char **argv) @@ -228,6 +363,8 @@ void test_icu_I18N_casemap_failures(int argc, char **argv) nmem_destroy(nmem); } + + #endif // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 @@ -436,7 +573,7 @@ int main(int argc, char **argv) #ifdef HAVE_ICU //test_icu_I18N_casemap_failures(argc, argv); - //test_icu_I18N_casemap(argc, argv); + test_icu_I18N_casemap(argc, argv); test_icu_I18N_sortmap(argc, argv); #else -- 1.7.10.4