-/* $Id: test_icu_I18N.c,v 1.13 2007-05-10 11:53:47 marc Exp $
+/* $Id: test_icu_I18N.c,v 1.14 2007-05-10 12:11:42 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
-void test_icu_I18N_normmap(int argc, char **argv)
+void test_icu_I18N_transliterator(int argc, char **argv)
{
+ /* setting up transliterator */
+
+#if 0
+
+ UErrorCode status = U_ZERO_ERROR;
+ UParseError parse_error[256];
+
+ int32_t id_cap = 256;
+ UChar id[256];
+ id[0] = 0;
+
+ trans = utrans_openU(id, id_len, UTRANS_FORWARD,
+ 0, 0, parse_error, &status);
+
+
+ if(U_FAILURE(status)) {
+ printf("Parse Error: line %d offset %d \n",
+ parse_error->line, parse_error->offset);
+ }
+ icu_check_status(status);
+
+
+ int32_t ustr16_lim = *ustr16_len;
+ /* Transliterate a segment of a UChar* string */
+
+ utrans_transUChars (trans, ustr16, &*ustr16_len,
+ ustr16_cap,
+ 0, &ustr16_lim, &status);
+
+ utrans_close (trans);
+
+ printf("\n\nUnicode Set Patterns:\n"
+ " Pattern Description\n"
+ " Ranges [a-z] The lower case letters a through z\n"
+ " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n"
+ " String [abc{def}] chars a, b and c, and string 'def'\n
+"
+ " Categories [\\p{Letter}] Perl General Category 'Letter'.\n
+"
+ " Categories [:Letter:] Posix General Category 'Letter'.\n"
+ "\n"
+ " Combination Example\n"
+ " Union [[:Greek:] [:letter:]]\n"
+ " Intersection [[:Greek:] & [:letter:]]\n"
+ " Set Complement [[:Greek:] - [:letter:]]\n"
+ " Complement [^[:Greek:] [:letter:]]\n"
+ "\n"
+ "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n"
+ "\n"
+ "Examples:\n"
+ " [:Punctuation:] Any-Remove\n"
+ " [:Cased-Letter:] Any-Upper\n"
+ " [:Control:] Any-Remove\n"
+ " [:Decimal_Number:] Any-Remove\n"
+ " [:Final_Punctuation:] Any-Remove\n"
+ " [:Georgian:] Any-Upper\n"
+ " [:Katakana:] Any-Remove\n"
+ " [:Arabic:] Any-Remove\n"
+ " [:Punctuation:] Remove\n"
+ " [[:Punctuation:]-[.,]] Remove\n"
+ " [:Line_Separator:] Any-Remove\n"
+ " [:Math_Symbol:] Any-Remove\n"
+ " Lower; [:^Letter:] Remove (word tokenization)\n"
+ " [:^Number:] Remove (numeric tokenization)\n"
+ " [:^Katagana:] Remove (remove everything except Katagana)\n"
+ " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization
+)\n"
+ " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from ch
+aracters)\n"
+ " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transform
+s latin and katagana to hiragana)\n"
+ " [[:separator:][:start punctuation:][:initial punctuation:]] Rem
+ove \n"
+ "\n"
+ "see http://icu.sourceforge.net/userguide/Transform.html\n"
+ " http://www.unicode.org/Public/UNIDATA/UCD.html\n"
+ " http://icu.sourceforge.net/userguide/Transform.html\n"
+ " http://icu.sourceforge.net/userguide/TransformRule.html\n"
+ );
+#endif
}
//test_icu_I18N_casemap_failures(argc, argv);
test_icu_I18N_casemap(argc, argv);
test_icu_I18N_sortmap(argc, argv);
- test_icu_I18N_normmap(argc, argv);
+ test_icu_I18N_transliterator(argc, argv);
test_icu_I18N_tokenizer(argc, argv);
#else // HAVE_ICU