changed ICU chain to return all tokens, also those which became empty under normaliza...
authorMarc Cromme <marc@indexdata.dk>
Mon, 29 Oct 2007 13:50:57 +0000 (13:50 +0000)
committerMarc Cromme <marc@indexdata.dk>
Mon, 29 Oct 2007 13:50:57 +0000 (13:50 +0000)
include/yaz/icu_I18N.h
src/icu_I18N.c
test/tst_icu_I18N.c

index 61dbf70..c2c5042 100644 (file)
@@ -273,10 +273,6 @@ int icu_chain_next_token(struct icu_chain * chain,
 
 int icu_chain_token_number(struct icu_chain * chain);
 
-/*int icu_chain_token_start(struct icu_chain * chain); */
-
-/*int icu_chain_token_end(struct icu_chain * chain); */
-
 const char * icu_chain_token_display(struct icu_chain * chain);
 
 const char * icu_chain_token_norm(struct icu_chain * chain);
index a4aa244..932c1b2 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: icu_I18N.c,v 1.9 2007-10-29 10:22:23 marc Exp $
+ * $Id: icu_I18N.c,v 1.10 2007-10-29 13:50:57 marc Exp $
  */
 
 #if HAVE_CONFIG_H
@@ -1035,6 +1035,7 @@ int icu_chain_step_next_token(struct icu_chain * chain,
     if (step->previous){
         src16 = step->previous->buf16;
         /* tokens might be killed in previous steps, therefore looping */
+
         while (step->need_new_token 
                && step->previous->more_tokens
                && !got_new_token)
@@ -1088,8 +1089,10 @@ int icu_chain_step_next_token(struct icu_chain * chain,
         /* make sure to get new previous token if this one had been used up
            by recursive call to _same_ step */
 
-        if (!step->more_tokens)
+        if (!step->more_tokens){
             step->more_tokens = icu_chain_step_next_token(chain, step, status);
+            return step->more_tokens;  // avoid one token count too much!
+        }
 
         break;
     default:
@@ -1101,11 +1104,8 @@ int icu_chain_step_next_token(struct icu_chain * chain,
         return 0;
 
     /* if token disappered into thin air, tell caller */
-    if (!step->buf16->utf16_len)
-        return 0;
-    
-    if (U_FAILURE(*status))
-        return 0;
+    /* if (!step->buf16->utf16_len && !step->more_tokens) */ 
+    /*    return 0; */ 
 
     return 1;
 }
@@ -1173,7 +1173,7 @@ int icu_chain_next_token(struct icu_chain * chain,
 
         while(!got_token && chain->steps && chain->steps->more_tokens)
             got_token = icu_chain_step_next_token(chain, chain->steps, status);
-    
+
         if (got_token){
             chain->token_count++;
 
index 07aab52..1d6e205 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: tst_icu_I18N.c,v 1.10 2007-10-29 10:22:23 marc Exp $
+/* $Id: tst_icu_I18N.c,v 1.11 2007-10-29 13:50:57 marc Exp $
    Copyright (c) 2006-2007, Index Data.
 
    This file is part of Pazpar2.
@@ -506,8 +506,6 @@ void test_icu_I18N_chain(int argc, char **argv)
     xmlNode *xml_node = xmlDocGetRootElement(doc);
     YAZ_CHECK(xml_node);
 
-    // printf("ICU chain:\ninput: '%s'\n", en_str);
-
 
     chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 
@@ -516,12 +514,13 @@ void test_icu_I18N_chain(int argc, char **argv)
 
     YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status));
 
+    //printf("ICU chain:\ninput: '%s'\n", en_str);
     while (icu_chain_next_token(chain, &status)){
         ;
-        // printf("%d '%s' '%s'\n",
-        //       icu_chain_token_number(chain),
-        //       icu_chain_token_norm(chain),
-        //       icu_chain_token_display(chain));
+        /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
     }
 
     YAZ_CHECK_EQ(icu_chain_token_number(chain), 7);
@@ -531,10 +530,10 @@ void test_icu_I18N_chain(int argc, char **argv)
 
     while (icu_chain_next_token(chain, &status)){
         ;
-        //printf("%d '%s' '%s'\n",
-        //       icu_chain_token_number(chain),
-        //       icu_chain_token_norm(chain),
-        //       icu_chain_token_display(chain));
+        /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
     }
 
 
@@ -577,10 +576,10 @@ void test_bug_1140(void)
 
     while (icu_chain_next_token(chain, &status)){    
         ;
-        //printf("%d '%s' '%s'\n",
-        //       icu_chain_token_number(chain),
-        //       icu_chain_token_norm(chain),
-        //       icu_chain_token_display(chain));
+        /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
 
     }
     
@@ -591,10 +590,10 @@ void test_bug_1140(void)
 
     while (icu_chain_next_token(chain, &status)){
        ;
-       //printf("%d '%s' '%s'\n",
-       //        icu_chain_token_number(chain),
-       //        icu_chain_token_norm(chain),
-       //        icu_chain_token_display(chain));
+       /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
     }
 
     /* we expect 'what' 'is' 'this', i.e. 3 tokens */
@@ -625,18 +624,18 @@ void test_chain_empty_token(void)
     YAZ_CHECK(chain);
     
     YAZ_CHECK(icu_chain_assign_cstr(
-                  chain,  "a string with 15 wordbreaks and 8 tokens",
+                  chain,  "a string with 15 tokenss and 8 displays",
                   &status));
 
     while (icu_chain_next_token(chain, &status)){
         ;
-        //printf("%d '%s' '%s'\n",
-        //       icu_chain_token_number(chain),
-        //       icu_chain_token_norm(chain),
-        //       icu_chain_token_display(chain));
+        /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
     }
 
-    YAZ_CHECK_EQ(icu_chain_token_number(chain), 8);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 15);
 
     icu_chain_destroy(chain);
 }
@@ -667,15 +666,15 @@ void test_chain_empty_chain(void)
 
     while (icu_chain_next_token(chain, &status)){
         ;
-        //printf("%d '%s' '%s'\n",
-        //       icu_chain_token_number(chain),
-        //       icu_chain_token_norm(chain),
-        //       icu_chain_token_display(chain));
+        /* printf("%d '%s' '%s'\n",
+               icu_chain_token_number(chain),
+               icu_chain_token_norm(chain),
+               icu_chain_token_display(chain)); */
     }
 
     YAZ_CHECK_EQ(icu_chain_token_number(chain), 1);
 
-    dest8 = icu_chain_token_norm(chain);
+    dest8 = (char *) icu_chain_token_norm(chain);
     YAZ_CHECK_EQ(strcmp(src8, dest8), 0);