From 527a008f2f3b1a19a968a7a1275132a04aa4d945 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 1 Feb 2010 16:32:38 +0100 Subject: [PATCH] Working on cloning / thread safety for ICU stuff --- include/yaz/icu_I18N.h | 5 ++++- src/icu_casemap.c | 8 +++++++ src/icu_chain.c | 58 +++++++++++++++++++++++++++++++++++++++++------- src/icu_tokenizer.c | 33 ++++++++++++++++++++++----- src/icu_transform.c | 14 ++++++++++++ test/tst_icu_I18N.c | 18 +++++++-------- 6 files changed, 112 insertions(+), 24 deletions(-) diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h index b330446..c6b8672 100644 --- a/include/yaz/icu_I18N.h +++ b/include/yaz/icu_I18N.h @@ -102,6 +102,8 @@ struct icu_casemap; struct icu_casemap * icu_casemap_create(char action, UErrorCode *status); +struct icu_casemap *icu_casemap_clone(struct icu_casemap *old); + void icu_casemap_destroy(struct icu_casemap * casemap); int icu_casemap_casemap(struct icu_casemap * casemap, @@ -124,6 +126,7 @@ struct icu_tokenizer; struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action, UErrorCode *status); +struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old); void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer); int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, @@ -140,7 +143,7 @@ struct icu_transform; struct icu_transform * icu_transform_create(const char *id, char action, const char *rules, UErrorCode *status); - +struct icu_transform *icu_transform_clone(struct icu_transform *old); void icu_transform_destroy(struct icu_transform * transform); int icu_transform_trans(struct icu_transform * transform, diff --git a/src/icu_casemap.c b/src/icu_casemap.c index d0a7051..26aa737 100644 --- a/src/icu_casemap.c +++ b/src/icu_casemap.c @@ -54,6 +54,14 @@ struct icu_casemap * icu_casemap_create(char action, UErrorCode *status) return casemap; } +struct icu_casemap *icu_casemap_clone(struct icu_casemap *old) +{ + struct icu_casemap * casemap + = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap)); + casemap->action = old->action; + return casemap; +} + void icu_casemap_destroy(struct icu_casemap * casemap) { xfree(casemap); diff --git a/src/icu_chain.c b/src/icu_chain.c index d72e9cd..512c2b1 100644 --- a/src/icu_chain.c +++ b/src/icu_chain.c @@ -60,7 +60,7 @@ struct icu_chain struct icu_buf_utf8 * norm8; /* linked list of chain steps */ - struct icu_chain_step * steps; + struct icu_chain_step * csteps; }; int icu_check_status(UErrorCode status) @@ -80,7 +80,7 @@ static struct icu_chain_step *icu_chain_step_create( { struct icu_chain_step * step = 0; - if(!chain || !type || !rule) + if (!chain || !type || !rule) return 0; step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step)); @@ -142,6 +142,39 @@ static void icu_chain_step_destroy(struct icu_chain_step * step) xfree(step); } +struct icu_chain_step *icu_chain_step_clone(struct icu_chain_step *old) +{ + struct icu_chain_step *step = 0; + struct icu_chain_step **sp = &step; + while (old) + { + *sp = (struct icu_chain_step *) xmalloc(sizeof(**sp)); + (*sp)->type = old->type; + + switch ((*sp)->type) + { + case ICU_chain_step_type_display: + break; + case ICU_chain_step_type_casemap: + (*sp)->u.casemap = icu_casemap_clone(old->u.casemap); + break; + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: + (*sp)->u.transform = icu_transform_clone(old->u.transform); + break; + case ICU_chain_step_type_tokenize: + (*sp)->u.tokenizer = icu_tokenizer_clone(old->u.tokenizer); + break; + case ICU_chain_step_type_none: + break; + } + old = old->previous; + sp = &(*sp)->previous; + } + *sp = 0; + return step; +} + struct icu_chain *icu_chain_create(const char *locale, int sort, UErrorCode * status) { @@ -161,7 +194,7 @@ struct icu_chain *icu_chain_create(const char *locale, int sort, return 0; chain->norm8 = icu_buf_utf8_create(0); - chain->steps = 0; + chain->csteps = 0; return chain; } @@ -176,7 +209,7 @@ void icu_chain_destroy(struct icu_chain * chain) icu_buf_utf8_destroy(chain->norm8); if (chain->iter) icu_iter_destroy(chain->iter); - icu_chain_step_destroy(chain->steps); + icu_chain_step_destroy(chain->csteps); xfree(chain->locale); xfree(chain); } @@ -266,6 +299,7 @@ struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, return chain; } + static struct icu_chain_step *icu_chain_insert_step( struct icu_chain * chain, enum icu_chain_step_type type, const uint8_t * rule, UErrorCode *status) @@ -278,8 +312,8 @@ static struct icu_chain_step *icu_chain_insert_step( step = icu_chain_step_create(chain, type, rule, status); - step->previous = chain->steps; - chain->steps = step; + step->previous = chain->csteps; + chain->csteps = step; return step; } @@ -292,6 +326,7 @@ struct icu_iter { struct icu_buf_utf8 *sort8; struct icu_buf_utf16 *input; int token_count; + struct icu_chain_step *steps; }; void icu_utf16_print(struct icu_buf_utf16 *src16) @@ -388,13 +423,17 @@ struct icu_iter *icu_iter_create(struct icu_chain *chain, iter->sort8 = icu_buf_utf8_create(0); iter->token_count = 0; iter->last = 0; /* no last returned string (yet) */ +#if 0 + iter->steps = icu_chain_step_clone(chain->csteps); +#else + iter->steps = 0; +#endif /* fill and assign input string.. It will be 0 after first iteration */ iter->input = icu_buf_utf16_create(0); icu_utf16_from_utf8_cstr(iter->input, src8cstr, &iter->status); return iter; - } } @@ -406,6 +445,7 @@ void icu_iter_destroy(struct icu_iter *iter) icu_buf_utf8_destroy(iter->sort8); if (iter->input) icu_buf_utf16_destroy(iter->input); + icu_chain_step_destroy(iter->steps); xfree(iter); } } @@ -417,7 +457,9 @@ int icu_iter_next(struct icu_iter *iter, struct icu_buf_utf8 *result) else { /* on first call, iter->input is the input string. Thereafter: 0. */ - iter->last = icu_iter_invoke(iter, iter->chain->steps, iter->input); + iter->last = icu_iter_invoke(iter, iter->steps ? + iter->steps : iter->chain->csteps, + iter->input); iter->input = 0; if (!iter->last) diff --git a/src/icu_tokenizer.c b/src/icu_tokenizer.c index f9b4926..e8d3123 100644 --- a/src/icu_tokenizer.c +++ b/src/icu_tokenizer.c @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -45,12 +46,9 @@ struct icu_tokenizer */ }; -struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, - UErrorCode *status) +static void icu_tokenizer_reset(struct icu_tokenizer *tokenizer, + char action) { - struct icu_tokenizer * tokenizer - = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer)); - tokenizer->action = action; tokenizer->bi = 0; tokenizer->buf16 = icu_buf_utf16_create(0); @@ -58,7 +56,32 @@ struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, tokenizer->token_id = 0; tokenizer->token_start = 0; tokenizer->token_end = 0; + tokenizer->bi = 0; +} + +struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old) +{ + uint32_t bufferSize = 10000; + UErrorCode status = 0; + struct icu_tokenizer * tokenizer + = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer)); + + assert(old); + icu_tokenizer_reset(tokenizer, old->action); + assert(old->bi); + tokenizer->bi = ubrk_safeClone(old->bi, NULL, &bufferSize, &status); + if (U_SUCCESS(status)) + return tokenizer; + return tokenizer; +} + +struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, + UErrorCode *status) +{ + struct icu_tokenizer * tokenizer + = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer)); + icu_tokenizer_reset(tokenizer, action); switch (tokenizer->action) { case 'l': diff --git a/src/icu_transform.c b/src/icu_transform.c index a3d9e16..b7ae9c3 100644 --- a/src/icu_transform.c +++ b/src/icu_transform.c @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -32,6 +33,19 @@ struct icu_transform UTransliterator * trans; }; +struct icu_transform *icu_transform_clone(struct icu_transform *old) +{ + struct icu_transform *transform + = (struct icu_transform *) xmalloc(sizeof(struct icu_transform)); + UErrorCode status; + assert(old); + transform->action = old->action; + assert(old->trans); + transform->trans = utrans_clone(old->trans, &status); + assert(transform->trans); + return transform; +} + struct icu_transform * icu_transform_create(const char *id, char action, const char *rules, UErrorCode *status) diff --git a/test/tst_icu_I18N.c b/test/tst_icu_I18N.c index 484e9cc..d4dc7c1 100644 --- a/test/tst_icu_I18N.c +++ b/test/tst_icu_I18N.c @@ -455,11 +455,10 @@ static void check_icu_chain(void) while (icu_chain_next_token(chain, &status)) { - ; - /* printf("%d '%s' '%s'\n", - icu_chain_token_number(chain), - icu_chain_token_norm(chain), - icu_chain_token_display(chain)); */ + yaz_log(YLOG_LOG, "%d '%s' '%s'", + icu_chain_token_number(chain), + icu_chain_token_norm(chain), + icu_chain_token_display(chain)); } YAZ_CHECK_EQ(icu_chain_token_number(chain), 7); @@ -469,11 +468,10 @@ static void check_icu_chain(void) while (icu_chain_next_token(chain, &status)) { - ; - /* printf("%d '%s' '%s'\n", - icu_chain_token_number(chain), - icu_chain_token_norm(chain), - icu_chain_token_display(chain)); */ + yaz_log(YLOG_LOG, "%d '%s' '%s'", + icu_chain_token_number(chain), + icu_chain_token_norm(chain), + icu_chain_token_display(chain)); } -- 1.7.10.4