-/* $Id: icu_I18N.c,v 1.5 2007-05-07 09:31:36 marc Exp $
+/* $Id: icu_I18N.c,v 1.6 2007-05-07 12:18:34 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
(const char *) src8->utf8, src8->utf8_len, status);
}
- if (*status != U_BUFFER_OVERFLOW_ERROR
+ //if (*status != U_BUFFER_OVERFLOW_ERROR
+ if (U_SUCCESS(*status)
&& utf16_len < dest16->utf16_cap)
dest16->utf16_len = utf16_len;
else {
src8cstr, src8cstr_len, status);
}
- if (*status != U_BUFFER_OVERFLOW_ERROR
+ // if (*status != U_BUFFER_OVERFLOW_ERROR
+ if (U_SUCCESS(*status)
&& utf16_len < dest16->utf16_cap)
dest16->utf16_len = utf16_len;
else {
};
+
+
+UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
+ struct icu_buf_utf16 * src16,
+ UErrorCode * status)
+{
+ int32_t utf8_len = 0;
+
+ u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
+ &utf8_len,
+ src16->utf16, src16->utf16_len, status);
+
+ // check for buffer overflow, resize and retry
+ if (*status == U_BUFFER_OVERFLOW_ERROR
+ //|| dest8->utf8_len > dest8->utf8_cap
+ ){
+ icu_buf_utf8_resize(dest8, utf8_len * 2);
+ *status = U_ZERO_ERROR;
+ u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
+ &utf8_len,
+ src16->utf16, src16->utf16_len, status);
+
+ }
+
+ //if (*status != U_BUFFER_OVERFLOW_ERROR
+ if (U_SUCCESS(*status)
+ && utf8_len < dest8->utf8_cap)
+ dest8->utf8_len = utf8_len;
+ else {
+ dest8->utf8[0] = (uint8_t) 0;
+ dest8->utf8_len = 0;
+ }
+
+ return *status;
+};
+
+
+
+
UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
struct icu_buf_utf8 * dest8,
struct icu_buf_utf16 * src16,
dest8->utf8, dest8->utf8_cap);
}
- if (sortkey_len > 0)
+ if (U_SUCCESS(*status)
+ && sortkey_len > 0)
dest8->utf8_len = sortkey_len;
-
+ else {
+ dest8->utf8[0] = (UChar) 0;
+ dest8->utf8_len = 0;
+ }
+
return *status;
};
-/* $Id: test_icu_I18N.c,v 1.8 2007-05-07 09:31:36 marc Exp $
+/* $Id: test_icu_I18N.c,v 1.9 2007-05-07 12:18:34 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
#ifdef HAVE_ICU
#include "icu_I18N.h"
+
#include <string.h>
#include <stdlib.h>
-#include <stdio.h>
-
-
-#include <unicode/ustring.h> /* some more string fcns*/
-#include <unicode/uchar.h> /* char names */
-//#include <unicode/ustdio.h>
-//#include <unicode/utypes.h> /* Basic ICU data types */
-#include <unicode/ucol.h>
-
+#include <unicode/ustring.h>
// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
struct icu_termmap
{
- uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
- char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string
+ uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
+ char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string
};
int icu_termmap_cmp(const void *vp1, const void *vp2)
{
- struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
- struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
+ struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
+ struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
+
+ int cmp = 0;
+
+ cmp = strcmp((const char *)itmp1->sort_key,
+ (const char *)itmp2->sort_key);
+ return cmp;
+};
+
+
+
+int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ const char *locale, char action,
+ UErrorCode *status)
+{
+ int32_t dest16_len = 0;
+
+ switch(action) {
+ case 'l':
+ dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ locale, status);
+ break;
+ case 'u':
+ dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ locale, status);
+ break;
+ case 't':
+ dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ 0, locale, status);
+ break;
+ case 'f':
+ dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ U_FOLD_CASE_DEFAULT, status);
+ break;
+
+ default:
+ return U_UNSUPPORTED_ERROR;
+ break;
+ }
+
+ // check for buffer overflow, resize and retry
+ if (*status == U_BUFFER_OVERFLOW_ERROR
+ //|| dest16_len > dest16->utf16_cap
+ ){
+ icu_buf_utf16_resize(dest16, dest16_len * 2);
+ *status = U_ZERO_ERROR;
- int cmp = 0;
- cmp = strcmp((const char *)itmp1->sort_key,
- (const char *)itmp2->sort_key);
- return cmp;
+ switch(action) {
+ case 'l':
+ dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ locale, status);
+ break;
+ case 'u':
+ dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ locale, status);
+ break;
+ case 't':
+ dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ 0, locale, status);
+ break;
+ case 'f':
+ dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
+ src16->utf16, src16->utf16_len,
+ U_FOLD_CASE_DEFAULT, status);
+ break;
+
+ default:
+ return U_UNSUPPORTED_ERROR;
+ break;
+ }
+ }
+
+ if (U_SUCCESS(*status)
+ && dest16_len < dest16->utf16_cap)
+ dest16->utf16_len = dest16_len;
+ else {
+ dest16->utf16[0] = (UChar) 0;
+ dest16->utf16_len = 0;
+ }
+
+ return *status;
+};
+
+
+
+int test_icu_casemap(const char * locale, char action,
+ const char * src8cstr, const char * chk8cstr)
+{
+ int success = 0;
+ UErrorCode status = U_ZERO_ERROR;
+
+ struct icu_buf_utf8 * src8 = icu_buf_utf8_create(0);
+ struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0);
+ struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
+ struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0);
+
+
+ int src8cstr_len = strlen(src8cstr);
+ int chk8cstr_len = strlen(chk8cstr);
+
+ // converting to UTF16
+ icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
+
+ // perform case mapping
+ icu_utf16_casemap(dest16, src16, locale, action, &status);
+
+ // converting to UTF8
+ icu_utf16_to_utf8(dest8, dest16, &status);
+
+
+
+ // determine success
+ if (dest8->utf8
+ && (dest8->utf8_len == strlen(chk8cstr))
+ && !strcmp(chk8cstr, (const char *) dest8->utf8))
+ success = 1;
+ else
+ success = 0;
+
+ // report failures
+ if (!success){
+ printf("\nERROR\n");
+ printf("original string: '%s' (%d)\n", src8cstr, src8cstr_len);
+ printf("icu_casemap '%s:%c' '%s' (%d)\n",
+ locale, action, dest8->utf8, dest8->utf8_len);
+ printf("expected string: '%s' (%d)\n", chk8cstr, chk8cstr_len);
+ }
+
+ // clean the buffers
+ icu_buf_utf8_destroy(src8);
+ icu_buf_utf8_destroy(dest8);
+ icu_buf_utf16_destroy(src16);
+ icu_buf_utf16_destroy(dest16);
+
+
+ return success;
}
return sucess;
}
+#endif
+
+
// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
void test_icu_I18N_casemap(int argc, char **argv)
"A ReD fOx hunTS sQUirriLs",
"a red fox hunts squirrils"));
- // this one fails and needs more investigation ..
- YAZ_CHECK(0 == test_icu_casemap("en", 't',
+ YAZ_CHECK(test_icu_casemap("en", 't',
"A ReD fOx hunTS sQUirriLs",
"A Red Fox Hunts Squirrils"));
-
+
// Locale 'da'
}
+#if 0
+
// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
void test_icu_I18N_casemap_failures(int argc, char **argv)
nmem_destroy(nmem);
}
+
+
#endif
// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
#ifdef HAVE_ICU
//test_icu_I18N_casemap_failures(argc, argv);
- //test_icu_I18N_casemap(argc, argv);
+ test_icu_I18N_casemap(argc, argv);
test_icu_I18N_sortmap(argc, argv);
#else