-struct icu_normalizer
+struct icu_transform
{
char action;
struct icu_buf_utf16 * rules16;
UTransliterator * trans;
};
-struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
+struct icu_transform * icu_transform_create(const char *rules, char action,
UErrorCode *status);
-void icu_normalizer_destroy(struct icu_normalizer * normalizer);
+void icu_transform_destroy(struct icu_transform * transform);
-int icu_normalizer_normalize(struct icu_normalizer * normalizer,
- struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16,
- UErrorCode *status);
+int icu_transform_trans(struct icu_transform * transform,
+ struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ UErrorCode *status);
enum icu_chain_step_type {
ICU_chain_step_type_none,
ICU_chain_step_type_display, /* convert to utf8 display format */
ICU_chain_step_type_casemap, /* apply utf16 charmap */
- ICU_chain_step_type_normalize, /* apply utf16 normalization */
+ ICU_chain_step_type_transform, /* apply utf16 transform */
ICU_chain_step_type_tokenize /* apply utf16 tokenization */
};
enum icu_chain_step_type type;
union {
struct icu_casemap * casemap;
- struct icu_normalizer * normalizer;
+ struct icu_transform * transform;
struct icu_tokenizer * tokenizer;
} u;
/* temprary post-action utf16 buffer */
-struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
- UErrorCode *status)
+struct icu_transform * icu_transform_create(const char *rules, char action,
+ UErrorCode *status)
{
- struct icu_normalizer * normalizer
- = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer));
+ struct icu_transform * transform
+ = (struct icu_transform *) xmalloc(sizeof(struct icu_transform));
- normalizer->action = action;
- normalizer->trans = 0;
- normalizer->rules16 = icu_buf_utf16_create(0);
- icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
-
- switch(normalizer->action) {
+ transform->action = action;
+ transform->trans = 0;
+ transform->rules16 = icu_buf_utf16_create(0);
+ icu_utf16_from_utf8_cstr(transform->rules16, rules, status);
+
+ switch(transform->action)
+ {
case 'f':
case 'F':
- normalizer->trans
- = utrans_openU(normalizer->rules16->utf16,
- normalizer->rules16->utf16_len,
+ transform->trans
+ = utrans_openU(transform->rules16->utf16,
+ transform->rules16->utf16_len,
UTRANS_FORWARD,
0, 0,
- &normalizer->parse_error, status);
+ &transform->parse_error, status);
break;
case 'r':
case 'R':
- normalizer->trans
- = utrans_openU(normalizer->rules16->utf16,
- normalizer->rules16->utf16_len,
+ transform->trans
+ = utrans_openU(transform->rules16->utf16,
+ transform->rules16->utf16_len,
UTRANS_REVERSE ,
0, 0,
- &normalizer->parse_error, status);
+ &transform->parse_error, status);
break;
default:
*status = U_UNSUPPORTED_ERROR;
}
if (U_SUCCESS(*status))
- return normalizer;
+ return transform;
/* freeing if failed */
- icu_normalizer_destroy(normalizer);
+ icu_transform_destroy(transform);
return 0;
}
-void icu_normalizer_destroy(struct icu_normalizer * normalizer){
- if (normalizer) {
- if (normalizer->rules16)
- icu_buf_utf16_destroy(normalizer->rules16);
- if (normalizer->trans)
- utrans_close(normalizer->trans);
- xfree(normalizer);
+void icu_transform_destroy(struct icu_transform * transform){
+ if (transform) {
+ if (transform->rules16)
+ icu_buf_utf16_destroy(transform->rules16);
+ if (transform->trans)
+ utrans_close(transform->trans);
+ xfree(transform);
}
}
-int icu_normalizer_normalize(struct icu_normalizer * normalizer,
- struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16,
- UErrorCode *status)
+int icu_transform_trans(struct icu_transform * transform,
+ struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ UErrorCode *status)
{
- if (!normalizer || !normalizer->trans
+ if (!transform || !transform->trans
|| !src16
|| !dest16)
return 0;
return 0;
- utrans_transUChars (normalizer->trans,
+ utrans_transUChars (transform->trans,
dest16->utf16, &(dest16->utf16_len),
dest16->utf16_cap,
0, &(src16->utf16_len), status);
case ICU_chain_step_type_casemap:
step->u.casemap = icu_casemap_create(rule[0], status);
break;
- case ICU_chain_step_type_normalize:
- step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status);
+ case ICU_chain_step_type_transform:
+ step->u.transform = icu_transform_create((char *) rule, 'f', status);
break;
case ICU_chain_step_type_tokenize:
step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
icu_casemap_destroy(step->u.casemap);
icu_buf_utf16_destroy(step->buf16);
break;
- case ICU_chain_step_type_normalize:
- icu_normalizer_destroy(step->u.normalizer);
+ case ICU_chain_step_type_transform:
+ icu_transform_destroy(step->u.transform);
icu_buf_utf16_destroy(step->buf16);
break;
case ICU_chain_step_type_tokenize:
step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
(const uint8_t *) xml_rule, status);
else if (!strcmp((const char *) node->name, "transform"))
- step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
(const uint8_t *) xml_rule, status);
else if (!strcmp((const char *) node->name, "tokenize"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
{
yaz_log(YLOG_WARN, "Element %s is deprecated. "
"Use transform instead", node->name);
- step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
(const uint8_t *) xml_rule, status);
}
else if (!strcmp((const char *) node->name, "index")
case ICU_chain_step_type_casemap:
buf16 = icu_buf_utf16_create(0);
break;
- case ICU_chain_step_type_normalize:
+ case ICU_chain_step_type_transform:
buf16 = icu_buf_utf16_create(0);
break;
case ICU_chain_step_type_tokenize:
step->buf16, src16, status,
chain->locale);
break;
- case ICU_chain_step_type_normalize:
- icu_normalizer_normalize(step->u.normalizer,
- step->buf16, src16, status);
+ case ICU_chain_step_type_transform:
+ icu_transform_trans(step->u.transform,
+ step->buf16, src16, status);
break;
case ICU_chain_step_type_tokenize:
/* attach to new src16 token only first time during splitting */
struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0);
struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0);
- struct icu_normalizer * normalizer
- = icu_normalizer_create(rules8cstr, 'f', &status);
+ struct icu_transform * transform
+ = icu_transform_create(rules8cstr, 'f', &status);
icu_check_status(status);
icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
icu_check_status(status);
- icu_normalizer_normalize(normalizer, dest16, src16, &status);
+ icu_transform_trans(transform, dest16, src16, &status);
icu_check_status(status);
icu_utf16_to_utf8(dest8, dest16, &status);
}
- icu_normalizer_destroy(normalizer);
+ icu_transform_destroy(transform);
icu_buf_utf16_destroy(src16);
icu_buf_utf16_destroy(dest16);
icu_buf_utf8_destroy(dest8);