Make ICU stuff compatible with old C comps. YAZ License.

author Adam Dickmeiss <adam@indexdata.dk>

Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)

committer Adam Dickmeiss <adam@indexdata.dk>

Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)
author Adam Dickmeiss <adam@indexdata.dk>
Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)
committer Adam Dickmeiss <adam@indexdata.dk>
Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)
diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h

index efcd033..8e73a0b 100644 (file)
--- a/include/yaz/icu_I18N.h
+++ b/include/yaz/icu_I18N.h
@@ -1,23 +1,29 @@
-/* $Id: icu_I18N.h,v 1.1 2007-10-22 12:21:39 adam Exp $
-   Copyright (c) 2006-2007, Index Data.
-
-   This file is part of Pazpar2.
-
-   Pazpar2 is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 2, or (at your option) any later
-   version.
-
-   Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with Pazpar2; see the file LICENSE.  If not, write to the
-   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.
-*/
+/*
+ * Copyright (c) 1995-2007, Index Data
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Index Data nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
  
  #ifndef ICU_I18NL_H
  #define ICU_I18NL_H
@@ -30,18 +36,13 @@
  #include <unicode/utypes.h>   /* Basic ICU data types */
  #include <unicode/uchar.h>    /* char names           */
  
-//#include <unicode/ustdio.h>
  #include <unicode/ucol.h> 
-//#include <unicode/ucnv.h>     /* C   Converter API    */
-//#include <unicode/ustring.h>  /* some more string fcns*/
-//#include <unicode/uloc.h>
  #include <unicode/ubrk.h>
-//#include <unicode/unistr.h>
  #include <unicode/utrans.h>
  
  
  
-// declared structs and functions
+/* declared structs and functions */
  
  int icu_check_status (UErrorCode status);
  
@@ -63,9 +64,9 @@ void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
  
  struct icu_buf_utf8
  {
-  uint8_t * utf8;
-  int32_t utf8_len;
-  int32_t utf8_cap;
+    uint8_t * utf8;
+    int32_t utf8_len;
+    int32_t utf8_cap;
  };
  
  struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
@@ -89,8 +90,8 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
  
  struct icu_casemap
  {
-  char locale[16];
-  char action;
+    char locale[16];
+    char action;
  };
  
  struct icu_casemap * icu_casemap_create(const char *locale, char action,
@@ -115,20 +116,22 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
  
  struct icu_tokenizer
  {
-  char locale[16];
-  char action;
-  UBreakIterator* bi;
-  struct icu_buf_utf16 * buf16;
-  int32_t token_count;
-  int32_t token_id;
-  int32_t token_start;
-  int32_t token_end;
-  // keep always invariant
-  // 0 <= token_start 
-  //   <= token_end 
-  //   <= buf16->utf16_len
-  // and invariant
-  // 0 <= token_id <= token_count
+    char locale[16];
+    char action;
+    UBreakIterator* bi;
+    struct icu_buf_utf16 * buf16;
+    int32_t token_count;
+    int32_t token_id;
+    int32_t token_start;
+    int32_t token_end;
+/*
+  keep always invariant
+  0 <= token_start 
+  <= token_end 
+  <= buf16->utf16_len
+  and invariant
+  0 <= token_id <= token_count
+*/
  };
  
  struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
@@ -153,10 +156,10 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
  
  struct icu_normalizer
  {
-  char action;
-  struct icu_buf_utf16 * rules16;
-  UParseError parse_error[256];
-  UTransliterator * trans;
+    char action;
+    struct icu_buf_utf16 * rules16;
+    UParseError parse_error[256];
+    UTransliterator * trans;
  };
  
  struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
@@ -170,44 +173,32 @@ int icu_normalizer_normalize(struct icu_normalizer * normalizer,
                               struct icu_buf_utf16 * src16,
                               UErrorCode *status);
  
-
-#if 0
-struct icu_token
-{
-  int32_t token_id;
-  uint8_t * display8;
-  uint8_t * norm8;
-  uint8_t * sort8;
-}
-#endif
-
-
  enum icu_chain_step_type {
-    ICU_chain_step_type_none,      // 
-    ICU_chain_step_type_display,   // convert to utf8 display format 
-    ICU_chain_step_type_index,     // convert to utf8 index format 
-    ICU_chain_step_type_sortkey,   // convert to utf8 sortkey format 
-    ICU_chain_step_type_casemap,   // apply utf16 charmap
-    ICU_chain_step_type_normalize, // apply utf16 normalization
-    ICU_chain_step_type_tokenize   // apply utf16 tokenization 
+    ICU_chain_step_type_none,
+    ICU_chain_step_type_display,   /* convert to utf8 display format */
+    ICU_chain_step_type_index,     /* convert to utf8 index format  */
+    ICU_chain_step_type_sortkey,   /* convert to utf8 sortkey format */
+    ICU_chain_step_type_casemap,   /* apply utf16 charmap */
+    ICU_chain_step_type_normalize, /* apply utf16 normalization */
+    ICU_chain_step_type_tokenize   /* apply utf16 tokenization */
  };
  
  
  
  struct icu_chain_step
  {
-  // type and action object
-  enum icu_chain_step_type type;
-  union {
-    struct icu_casemap * casemap;
-    struct icu_normalizer * normalizer;
-    struct icu_tokenizer * tokenizer;  
-  } u;
-  // temprary post-action utf16 buffer
-  struct icu_buf_utf16 * buf16;  
-  struct icu_chain_step * previous;
-  int more_tokens;
-  int need_new_token;
+    /* type and action object */
+    enum icu_chain_step_type type;
+    union {
+       struct icu_casemap * casemap;
+       struct icu_normalizer * normalizer;
+       struct icu_tokenizer * tokenizer;  
+    } u;
+    /* temprary post-action utf16 buffer */
+    struct icu_buf_utf16 * buf16;  
+    struct icu_chain_step * previous;
+    int more_tokens;
+    int need_new_token;
  };
  
  
@@ -225,22 +216,22 @@ void icu_chain_step_destroy(struct icu_chain_step * step);
  
  struct icu_chain
  {
-  uint8_t identifier[128];
-  uint8_t locale[16];
-
-  // number of tokens returned so far
-  int32_t token_count;
-
-  // utf8 output buffers
-  struct icu_buf_utf8 * display8;
-  struct icu_buf_utf8 * norm8;
-  struct icu_buf_utf8 * sort8;
-
-  // utf16 source buffer
-  struct icu_buf_utf16 * src16;
-
-  // linked list of chain steps
-  struct icu_chain_step * steps;
+    uint8_t identifier[128];
+    uint8_t locale[16];
+    
+    /* number of tokens returned so far */
+    int32_t token_count;
+    
+    /* utf8 output buffers */
+    struct icu_buf_utf8 * display8;
+    struct icu_buf_utf8 * norm8;
+    struct icu_buf_utf8 * sort8;
+    
+    /* utf16 source buffer */
+    struct icu_buf_utf16 * src16;
+    
+    /* linked list of chain steps */
+    struct icu_chain_step * steps;
  };
  
  struct icu_chain * icu_chain_create(const uint8_t * identifier, 
@@ -277,8 +268,12 @@ const char * icu_chain_get_norm(struct icu_chain * chain);
  
  const char * icu_chain_get_sort(struct icu_chain * chain);
  
+#endif /* ICU_I18NL_H */
  
-
-
-
-#endif // ICU_I18NL_H
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
diff --git a/src/icu_I18N.c b/src/icu_I18N.c

index a085caa..9e6cdd9 100644 (file)
--- a/src/icu_I18N.c
+++ b/src/icu_I18N.c
@@ -1,26 +1,12 @@
-/* $Id: icu_I18N.c,v 1.1 2007-10-22 12:21:39 adam Exp $
-   Copyright (c) 2006-2007, Index Data.
-
-   This file is part of Pazpar2.
-
-   Pazpar2 is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 2, or (at your option) any later
-   version.
-
-   Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with Pazpar2; see the file LICENSE.  If not, write to the
-   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.
-*/
+/*
+ * Copyright (C) 1995-2007, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: icu_I18N.c,v 1.2 2007-10-22 17:32:07 adam Exp $
+ */
  
  #if HAVE_CONFIG_H
-#include "cconfig.h"
+#include "config.h"
  #endif
  
  #define USE_TIMING 0
@@ -29,7 +15,7 @@
  #endif
  
  
-#ifdef HAVE_ICU
+#if HAVE_ICU
  #include <yaz/icu_I18N.h>
  
  #include <yaz/log.h>
@@ -42,15 +28,7 @@
  #include <unicode/uchar.h>    /* char names           */
  
  
-//#include <unicode/ustdio.h>
-//#include <unicode/utypes.h>   /* Basic ICU data types */
  #include <unicode/ucol.h> 
-//#include <unicode/ucnv.h>     /* C   Converter API    */
-//#include <unicode/uloc.h>
-//#include <unicode/ubrk.h>
-/* #include <unicode/unistr.h> */
-
-
  
  
  int icu_check_status (UErrorCode status)
@@ -81,7 +59,7 @@ struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
          buf16->utf16_cap = capacity;
      }
      return buf16;
-};
+}
  
  struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
                                              size_t capacity)
@@ -107,7 +85,7 @@ struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
      }
  
      return buf16;
-};
+}
  
  
  struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
@@ -124,7 +102,7 @@ struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
      dest16->utf16_len = src16->utf16_len;
  
      return dest16;
-};
+}
  
  
  void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
@@ -134,7 +112,7 @@ void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
              free(buf16->utf16);
          free(buf16);
      }
-};
+}
  
  
  
@@ -156,7 +134,7 @@ struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
          buf8->utf8_cap = capacity;
      }
      return buf8;
-};
+}
  
  
  
@@ -183,7 +161,7 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
      }
  
      return buf8;
-};
+}
  
  
  struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8,
@@ -200,7 +178,7 @@ struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8,
      strncpy((char*) dest8->utf8, (char*) src8->utf8, src8->utf8_len);
  
      return dest8;
-};
+}
  
  
  const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
@@ -221,7 +199,7 @@ void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
              free(buf8->utf8);
          free(buf8);
      }
-};
+}
  
  
  
@@ -235,10 +213,9 @@ UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
                    &utf16_len,
                    (const char *) src8->utf8, src8->utf8_len, status);
    
-    // check for buffer overflow, resize and retry
-    if (*status == U_BUFFER_OVERFLOW_ERROR
-        //|| dest16->utf16_len > dest16->utf16_cap
-        ){
+    /* check for buffer overflow, resize and retry */
+    if (*status == U_BUFFER_OVERFLOW_ERROR)
+    {
          icu_buf_utf16_resize(dest16, utf16_len * 2);
          *status = U_ZERO_ERROR;
          u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
@@ -246,7 +223,6 @@ UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
                        (const char *) src8->utf8, src8->utf8_len, status);
      }
  
-    //if (*status != U_BUFFER_OVERFLOW_ERROR
      if (U_SUCCESS(*status)  
          && utf16_len <= dest16->utf16_cap)
          dest16->utf16_len = utf16_len;
@@ -256,7 +232,7 @@ UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
      }
    
      return *status;
-};
+}
  
   
  
@@ -273,10 +249,9 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
                    &utf16_len,
                    src8cstr, src8cstr_len, status);
    
-    // check for buffer overflow, resize and retry
-    if (*status == U_BUFFER_OVERFLOW_ERROR
-        //|| dest16->utf16_len > dest16->utf16_cap
-        ){
+    /* check for buffer overflow, resize and retry */
+    if (*status == U_BUFFER_OVERFLOW_ERROR)
+    {
          icu_buf_utf16_resize(dest16, utf16_len * 2);
          *status = U_ZERO_ERROR;
          u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
@@ -284,7 +259,6 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
                        src8cstr, src8cstr_len, status);
      }
  
-    //  if (*status != U_BUFFER_OVERFLOW_ERROR
      if (U_SUCCESS(*status)  
          && utf16_len <= dest16->utf16_cap)
          dest16->utf16_len = utf16_len;
@@ -294,7 +268,7 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
      }
    
      return *status;
-};
+}
  
  
  
@@ -309,10 +283,9 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
                  &utf8_len,
                  src16->utf16, src16->utf16_len, status);
    
-    // check for buffer overflow, resize and retry
-    if (*status == U_BUFFER_OVERFLOW_ERROR
-        //|| dest8->utf8_len > dest8->utf8_cap
-        ){
+    /* check for buffer overflow, resize and retry */
+    if (*status == U_BUFFER_OVERFLOW_ERROR)
+    {
          icu_buf_utf8_resize(dest8, utf8_len * 2);
          *status = U_ZERO_ERROR;
          u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
@@ -321,7 +294,6 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
  
      }
  
-    //if (*status != U_BUFFER_OVERFLOW_ERROR
      if (U_SUCCESS(*status)  
          && utf8_len <= dest8->utf8_cap)
          dest8->utf8_len = utf8_len;
@@ -331,7 +303,7 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
      }
    
      return *status;
-};
+}
  
  
  
@@ -358,13 +330,13 @@ struct icu_casemap * icu_casemap_create(const char *locale, char action,
      }
  
      return casemap;
-};
+}
  
  void icu_casemap_destroy(struct icu_casemap * casemap)
  {
      if (casemap) 
          free(casemap);
-};
+}
  
  
  int icu_casemap_casemap(struct icu_casemap * casemap,
@@ -377,7 +349,7 @@ int icu_casemap_casemap(struct icu_casemap * casemap,
      
      return icu_utf16_casemap(dest16, src16,
                               casemap->locale, casemap->action, status);
-};
+}
  
  
  int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
@@ -414,10 +386,9 @@ int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
          break;
      }
  
-    // check for buffer overflow, resize and retry
+    /* check for buffer overflow, resize and retry */
      if (*status == U_BUFFER_OVERFLOW_ERROR
-        && dest16 != src16        // do not resize if in-place conversion 
-        //|| dest16_len > dest16->utf16_cap
+        && dest16 != src16        /* do not resize if in-place conversion */
          ){
          icu_buf_utf16_resize(dest16, dest16_len * 2);
          *status = U_ZERO_ERROR;
@@ -460,7 +431,7 @@ int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
      }
    
      return *status;
-};
+}
  
  
  
@@ -475,7 +446,7 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
      sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
                                    dest8->utf8, dest8->utf8_cap);
  
-    // check for buffer overflow, resize and retry
+    /* check for buffer overflow, resize and retry */
      if (sortkey_len > dest8->utf8_cap) {
          icu_buf_utf8_resize(dest8, sortkey_len * 2);
          sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
@@ -491,7 +462,7 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
      }
  
      return sortkey_len;
-};
+}
  
  
  
@@ -543,14 +514,14 @@ struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
          break;
      }
      
-    // ICU error stuff is a very  funny business
+    /* ICU error stuff is a very  funny business */
      if (U_SUCCESS(*status))
          return tokenizer;
  
-    // freeing if failed
+    /* freeing if failed */
      icu_tokenizer_destroy(tokenizer);
      return 0;
-};
+}
  
  void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
  {
@@ -559,7 +530,7 @@ void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
              ubrk_close(tokenizer->bi);
          free(tokenizer);
      }
-};
+}
  
  int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, 
                           struct icu_buf_utf16 * src16, 
@@ -597,29 +568,31 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
          || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
          return 0;
  
-    // never change tokenizer->buf16 and keep always invariant
-    // 0 <= tokenizer->token_start 
-    //   <= tokenizer->token_end 
-    //   <= tokenizer->buf16->utf16_len
-    // returns length of token
+    /*
+    never change tokenizer->buf16 and keep always invariant
+    0 <= tokenizer->token_start 
+       <= tokenizer->token_end 
+       <= tokenizer->buf16->utf16_len
+    returns length of token
+    */
  
-    if (0 == tokenizer->token_end) // first call
+    if (0 == tokenizer->token_end) /* first call */
          tkn_start = ubrk_first(tokenizer->bi);
-    else //successive calls
+    else /* successive calls */
          tkn_start = tokenizer->token_end;
  
-    // get next position
+    /* get next position */
      tkn_end = ubrk_next(tokenizer->bi);
  
-    // repairing invariant at end of ubrk, which is UBRK_DONE = -1 
+    /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
      if (UBRK_DONE == tkn_end)
          tkn_end = tokenizer->buf16->utf16_len;
  
-    // copy out if everything is well
+    /* copy out if everything is well */
      if(U_FAILURE(*status))
          return 0;        
      
-    // everything OK, now update internal state
+    /* everything OK, now update internal state */
      tkn_len = tkn_end - tkn_start;
  
      if (0 < tkn_len){
@@ -632,7 +605,7 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
      tokenizer->token_end = tkn_end;
      
  
-    // copying into token buffer if it exists 
+    /* copying into token buffer if it exists */
      if (tkn16){
          if (tkn16->utf16_cap < tkn_len)
              icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
@@ -650,27 +623,27 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
  int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
  {
      return tokenizer->token_id;
-};
+}
  
  int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
  {
      return tokenizer->token_start;
-};
+}
  
  int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
  {
      return tokenizer->token_end;
-};
+}
  
  int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
  {
      return (tokenizer->token_end - tokenizer->token_start);
-};
+}
  
  int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
  {
      return tokenizer->token_count;
-};
+}
  
  
  
@@ -694,7 +667,6 @@ struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
                             UTRANS_FORWARD,
                             0, 0, 
                             normalizer->parse_error, status);
-        // yaz_log(YLOG_LOG, "utrans_open %p", normalizer->trans);
          break;
      case 'r':
          normalizer->trans
@@ -703,7 +675,6 @@ struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
                             UTRANS_REVERSE ,
                             0, 0,
                             normalizer->parse_error, status);
-        // yaz_log(YLOG_LOG, "utrans_open %p", normalizer->trans);
          break;
      default:
          *status = U_UNSUPPORTED_ERROR;
@@ -714,10 +685,10 @@ struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
      if (U_SUCCESS(*status))
          return normalizer;
  
-    // freeing if failed
+    /* freeing if failed */
      icu_normalizer_destroy(normalizer);
      return 0;
-};
+}
  
  
  void icu_normalizer_destroy(struct icu_normalizer * normalizer){
@@ -726,12 +697,11 @@ void icu_normalizer_destroy(struct icu_normalizer * normalizer){
              icu_buf_utf16_destroy(normalizer->rules16);
          if (normalizer->trans)
          {
-            // yaz_log(YLOG_LOG, "utrans_close %p", normalizer->trans);
              utrans_close(normalizer->trans);
          }
          free(normalizer);
      }
-};
+}
  
  
  
@@ -779,7 +749,7 @@ struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
  
      step->buf16 = buf16;
  
-    // create auxilary objects
+    /* create auxilary objects */
      switch(step->type) {
      case ICU_chain_step_type_display:
          break;
@@ -803,7 +773,7 @@ struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
      }
  
      return step;
-};
+}
  
  
  void icu_chain_step_destroy(struct icu_chain_step * step){
@@ -836,7 +806,7 @@ void icu_chain_step_destroy(struct icu_chain_step * step){
          break;
      }
      free(step);
-};
+}
  
  
  
@@ -863,7 +833,7 @@ struct icu_chain * icu_chain_create(const uint8_t * identifier,
      chain->steps = 0;
  
      return chain;
-};
+}
  
  
  void icu_chain_destroy(struct icu_chain * chain)
@@ -878,7 +848,7 @@ void icu_chain_destroy(struct icu_chain * chain)
          icu_chain_step_destroy(chain->steps);
          free(chain);
      }
-};
+}
  
  
  
@@ -893,29 +863,33 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
          || strcmp((const char *) xml_node->name, "icu_chain"))
  
          return 0;
-    
-    xmlChar *xml_id = xmlGetProp(xml_node, (xmlChar *) "id");
-    xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale");
-
-    if (!xml_id || !strlen((const char *) xml_id) 
-        || !xml_locale || !strlen((const char *) xml_locale))
-        return 0;
  
-    chain = icu_chain_create((const uint8_t *) xml_id, 
-                             (const uint8_t *) xml_locale);
-    
-    xmlFree(xml_id);
-    xmlFree(xml_locale);
+    {    
+        xmlChar *xml_id = xmlGetProp(xml_node, (xmlChar *) "id");
+        xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale");
+        
+        if (!xml_id || !strlen((const char *) xml_id) 
+            || !xml_locale || !strlen((const char *) xml_locale))
+            return 0;
+        
+        chain = icu_chain_create((const uint8_t *) xml_id, 
+                                 (const uint8_t *) xml_locale);
+        
+        xmlFree(xml_id);
+        xmlFree(xml_locale);
+    }
      if (!chain)
          return 0;
          
      for (node = xml_node->children; node; node = node->next)
      {
+        xmlChar *xml_rule;
+        struct icu_chain_step * step = 0;
+
          if (node->type != XML_ELEMENT_NODE)
              continue;
  
-        xmlChar *xml_rule = xmlGetProp(node, (xmlChar *) "rule");
-        struct icu_chain_step * step = 0;
+        xml_rule = xmlGetProp(node, (xmlChar *) "rule");
  
          if (!strcmp((const char *) node->name, 
                      (const char *) "casemap")){
@@ -958,7 +932,7 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
      }
  
      return chain;
-};
+}
  
  
  
@@ -974,7 +948,7 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
      if (!chain || !type || !rule)
          return 0;
  
-    // assign utf16 src buffers as needed 
+    /* assign utf16 src buffers as needed */
      if (chain->steps && chain->steps->buf16)
          src16 = chain->steps->buf16;
      else if (chain->src16)
@@ -983,7 +957,7 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
          return 0;
  
      
-    // create utf16 destination buffers as needed, or
+    /* create utf16 destination buffers as needed, or */
      switch(type) {
      case ICU_chain_step_type_display:
          buf16 = src16;
@@ -1007,14 +981,14 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
          break;
      }
  
-    // create actual chain step with this buffer
+    /* create actual chain step with this buffer */
      step = icu_chain_step_create(chain, type, rule, buf16, status);
  
      step->previous = chain->steps;
      chain->steps = step;
  
      return step;
-};
+}
  
  
  int icu_chain_step_next_token(struct icu_chain * chain,
@@ -1023,35 +997,31 @@ int icu_chain_step_next_token(struct icu_chain * chain,
  {
      struct icu_buf_utf16 * src16 = 0;
      
-    //printf("icu_chain_step_next_token %d\n", (int) step);
-
      if (!chain || !chain->src16 || !step || !step->more_tokens)
          return 0;
  
-    // assign utf16 src buffers as neeed, advance in previous steps
-    // tokens until non-zero token met, and setting stop condition
+    /* assign utf16 src buffers as neeed, advance in previous steps
+       tokens until non-zero token met, and setting stop condition
+    */
      if (step->previous){
          src16 = step->previous->buf16;
          if (step->need_new_token)
-            //while (step->more_tokens &&  !src16->utf16_len)
-                step->more_tokens 
-                    = icu_chain_step_next_token(chain, step->previous, status);
+            step->more_tokens 
+                = icu_chain_step_next_token(chain, step->previous, status);
      }
-    else { // first step can only work once on chain->src16 input buffer
+    else { /* first step can only work once on chain->src16 input buffer */
          src16 = chain->src16;
          step->more_tokens = 1;
      }
  
-    // stop if nothing to process 
-    // i.e new token source was not properly assigned
-    if (!step->more_tokens || !src16) // || !src16->utf16_len 
+    /* stop if nothing to process 
+       i.e new token source was not properly assigned
+    */
+    if (!step->more_tokens || !src16)
          return 0;
  
-    //printf("icu_chain_step_next_token %d working\n", (int) step);
-
-
-    // perform the work, eventually put this steps output in 
-    // step->buf16 or the chains UTF8 output buffers 
+    /* perform the work, eventually put this steps output in 
+       step->buf16 or the chains UTF8 output buffers  */
      switch(step->type) {
      case ICU_chain_step_type_display:
          icu_utf16_to_utf8(chain->display8, src16, status);
@@ -1071,16 +1041,16 @@ int icu_chain_step_next_token(struct icu_chain * chain,
                                   step->buf16, src16, status);
          break;
      case ICU_chain_step_type_tokenize:
-        // attach to new src16 token only first time during splitting
+        /* attach to new src16 token only first time during splitting */
          if (step->need_new_token){
              icu_tokenizer_attach(step->u.tokenizer, src16, status);
              step->need_new_token = 0;
          }
-        // splitting one src16 token into multiple buf16 tokens
+        /* splitting one src16 token into multiple buf16 tokens */
          step->more_tokens
              = icu_tokenizer_next_token(step->u.tokenizer,
                                         step->buf16, status);
-        // make sure to get new previous token if this one had been used up
+        /* make sure to get new previous token if this one had been used up */
          if (step->previous && !step->more_tokens){
              if (icu_chain_step_next_token(chain, step->previous, status)){
                  icu_tokenizer_attach(step->u.tokenizer, src16, status);
@@ -1100,20 +1070,17 @@ int icu_chain_step_next_token(struct icu_chain * chain,
  
  
  
-    // stop further token processing if last step and 
-    // new tokens are needed from previous (non-existing) step
+    /* stop further token processing if last step and 
+       new tokens are needed from previous (non-existing) step 
+    */
      if (!step->previous && step->need_new_token)
          step->more_tokens = 0;
  
-    //printf("%d %d %d\n", 
-    //       step->more_tokens, src16->utf16_len, step->buf16->utf16_len);
-
-
      if (U_FAILURE(*status))
          return 0;
  
      return 1;
-};
+}
  
  
  
@@ -1128,25 +1095,24 @@ int icu_chain_assign_cstr(struct icu_chain * chain,
  
      stp = chain->steps;
      
-    // clear token count
+    /* clear token count */
      chain->token_count = 0;
  
-    // clear all steps stop states
-
+    /* clear all steps stop states */
      while (stp){
          stp->more_tokens = 1;
          stp->need_new_token = 1;
          stp = stp->previous;
      }
      
-    // finally convert UTF8 to UTF16 string
+    /* finally convert UTF8 to UTF16 string */
      icu_utf16_from_utf8_cstr(chain->src16, src8cstr, status);
              
      if (U_FAILURE(*status))
          return 0;
  
      return 1;
-};
+}
  
  
  
@@ -1166,7 +1132,7 @@ int icu_chain_next_token(struct icu_chain * chain,
      }
  
      return 0;
-};
+}
  
  int icu_chain_get_token_count(struct icu_chain * chain)
  {
@@ -1174,7 +1140,7 @@ int icu_chain_get_token_count(struct icu_chain * chain)
          return 0;
      
      return chain->token_count;
-};
+}
  
  
  
@@ -1184,7 +1150,7 @@ const char * icu_chain_get_display(struct icu_chain * chain)
          return icu_buf_utf8_to_cstr(chain->display8);
      
      return 0;
-};
+}
  
  const char * icu_chain_get_norm(struct icu_chain * chain)
  {
@@ -1192,7 +1158,7 @@ const char * icu_chain_get_norm(struct icu_chain * chain)
          return icu_buf_utf8_to_cstr(chain->norm8);
      
      return 0;
-};
+}
  
  const char * icu_chain_get_sort(struct icu_chain * chain)
  {
@@ -1200,12 +1166,10 @@ const char * icu_chain_get_sort(struct icu_chain * chain)
          return icu_buf_utf8_to_cstr(chain->sort8);
      
      return 0;
-};
-
-
+}
  
  
-#endif // HAVE_ICU    
+#endif /* HAVE_ICU */
  
  
  
diff --git a/test/tst_icu_I18N.c b/test/tst_icu_I18N.c

index 768e387..655c459 100644 (file)
--- a/test/tst_icu_I18N.c
+++ b/test/tst_icu_I18N.c
@@ -1,4 +1,4 @@
-/* $Id: tst_icu_I18N.c,v 1.1 2007-10-22 12:21:39 adam Exp $
+/* $Id: tst_icu_I18N.c,v 1.2 2007-10-22 17:32:07 adam Exp $
     Copyright (c) 2006-2007, Index Data.
  
     This file is part of Pazpar2.
@@ -19,11 +19,11 @@
     02111-1307, USA.
  */
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
   
  
  #if HAVE_CONFIG_H
-#include "cconfig.h"
+#include "config.h"
  #endif
  
  #define USE_TIMING 0
@@ -33,23 +33,20 @@
  
  #include <yaz/test.h>
  
-
-
-#ifdef HAVE_ICU
+#if HAVE_ICU
  #include <yaz/icu_I18N.h>
  
  #include <string.h>
  #include <stdlib.h>
  
-//#include <unicode/ustring.h>  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  
  #define MAX_KEY_SIZE 256
  struct icu_termmap
  {
-    uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
-    char disp_term[MAX_KEY_SIZE];  // standard C utf-8 string
+    uint8_t sort_key[MAX_KEY_SIZE]; /* standard C string '\0' terminated */
+    char disp_term[MAX_KEY_SIZE];  /* standard C utf-8 string */
  };
  
  
@@ -84,18 +81,18 @@ int test_icu_casemap(const char * locale, char action,
      int src8cstr_len = strlen(src8cstr);
      int chk8cstr_len = strlen(chk8cstr);
  
-    // converting to UTF16
+    /* converting to UTF16 */
      icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
  
-    // perform case mapping
+    /* perform case mapping */
      icu_utf16_casemap(dest16, src16, locale, action, &status);
    
-    // converting to UTF8
+    /* converting to UTF8 */
      icu_utf16_to_utf8(dest8, dest16, &status);
        
  
    
-    // determine success
+    /* determine success */
      if (dest8->utf8 
          && (dest8->utf8_len == strlen(chk8cstr))
          && !strcmp(chk8cstr, (const char *) dest8->utf8))
@@ -103,7 +100,7 @@ int test_icu_casemap(const char * locale, char action,
      else
          success = 0;
  
-    // report failures
+    /* report failures */
      if (!success){
          printf("\nERROR\n");
          printf("original string:   '%s' (%d)\n", src8cstr, src8cstr_len);
@@ -112,7 +109,7 @@ int test_icu_casemap(const char * locale, char action,
          printf("expected string:   '%s' (%d)\n", chk8cstr, chk8cstr_len);
      }
    
-    // clean the buffers  
+    /* clean the buffers */
      icu_buf_utf8_destroy(src8);
      icu_buf_utf8_destroy(dest8);
      icu_buf_utf16_destroy(src16);
@@ -124,14 +121,14 @@ int test_icu_casemap(const char * locale, char action,
  
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  void test_icu_I18N_casemap(int argc, char **argv)
  {
  
-    // Locale 'en'
+    /* Locale 'en' */
  
-    // sucessful tests
+    /* successful tests */
      YAZ_CHECK(test_icu_casemap("en", 'l',
                                 "A ReD fOx hunTS sQUirriLs", 
                                 "a red fox hunts squirrils"));
@@ -149,9 +146,9 @@ void test_icu_I18N_casemap(int argc, char **argv)
                                 "A Red Fox Hunts Squirrils"));
      
  
-    // Locale 'da'
+    /* Locale 'da' */
  
-    // sucess expected    
+    /* success expected */
      YAZ_CHECK(test_icu_casemap("da", 'l',
                                 "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", 
                                 "åh æble, øs fløde i åen efter blåbærgrøden"));
@@ -168,9 +165,9 @@ void test_icu_I18N_casemap(int argc, char **argv)
                                 "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", 
                                 "Åh Æble, Øs Fløde I Åen Efter Blåbærgrøden"));
  
-    // Locale 'de'
+    /* Locale 'de' */
  
-    // sucess expected    
+    /* success expected */
      YAZ_CHECK(test_icu_casemap("de", 'l',
                                 "zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
                                 "zwölf ärgerliche würste rollen über die straße"));
@@ -190,7 +187,7 @@ void test_icu_I18N_casemap(int argc, char **argv)
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  int test_icu_sortmap(const char * locale, int src_list_len,
                       const char ** src_list, const char ** chk_list)
@@ -212,35 +209,33 @@ int test_icu_sortmap(const char * locale, int src_list_len,
      if(U_FAILURE(status))
          return 0;
  
-    // assigning display terms and sort keys using buf 8 and buf16
+    /* assigning display terms and sort keys using buf 8 and buf16 */
      for( i = 0; i < src_list_len; i++) 
          {
  
              list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap));
  
-            // copy display term
+            /* copy display term */
              strcpy(list[i]->disp_term, src_list[i]);    
  
-            // transforming to UTF16
+            /* transforming to UTF16 */
              icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status);
              icu_check_status(status);
  
-            // computing sortkeys
+            /* computing sortkeys */
              icu_sortkey8_from_utf16(coll, buf8, buf16, &status);
              icu_check_status(status);
      
-            // assigning sortkeys
+            /* assigning sortkeys */
              memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);    
-            //strncpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);    
-            //strcpy((char *) list[i]->sort_key, (const char *) buf8->utf8);
          } 
  
  
-    // do the sorting
+    /* do the sorting */
      qsort(list, src_list_len, 
            sizeof(struct icu_termmap *), icu_termmap_cmp);
  
-    // checking correct sorting
+    /* checking correct sorting */
      for (i = 0; i < src_list_len; i++){
          if (0 != strcmp(list[i]->disp_term, chk_list[i])){
              success = 0;
@@ -257,7 +252,6 @@ int test_icu_sortmap(const char * locale, int src_list_len,
          printf("ICU sort:  '%s' : ", locale); 
          for (i = 0; i < src_list_len; i++) {
              printf(" '%s'", list[i]->disp_term); 
-            //printf("(%d|%d)", list[i]->sort_key[0],list[i]->sort_key[1]); 
          }
          printf("\n"); 
          printf("Expected:  '%s' : ", locale); 
@@ -282,12 +276,12 @@ int test_icu_sortmap(const char * locale, int src_list_len,
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  void test_icu_I18N_sortmap(int argc, char **argv)
  {
  
-    // sucessful tests
+    /* successful tests */
      size_t en_1_len = 6;
      const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"};
      const char * en_1_cck[6] = {"a", "A", "k", "K", "z", "Z"};
@@ -297,25 +291,28 @@ void test_icu_I18N_sortmap(int argc, char **argv)
      YAZ_CHECK(test_icu_sortmap("en_GB", en_1_len, en_1_src, en_1_cck));
      YAZ_CHECK(test_icu_sortmap("en_US", en_1_len, en_1_src, en_1_cck));
      
-    // sucessful tests 
-    size_t da_1_len = 6;
-    const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
-    const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
-    YAZ_CHECK(test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck));
-    YAZ_CHECK(test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck));
-    
-    // sucessful tests
-    size_t de_1_len = 9;
-    const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
-    const char * de_1_cck[9] = {"a","ä", "o", "ö", "s", "ß", "t", "u", "ü"};
-    YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck));
-    YAZ_CHECK(test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck));
-    YAZ_CHECK(test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck));
+    /* successful tests */
+    {
+        size_t da_1_len = 6;
+        const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
+        const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
+        YAZ_CHECK(test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck));
+        YAZ_CHECK(test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck));
+    }
+    /* successful tests */
+    {
+        size_t de_1_len = 9;
+        const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
+        const char * de_1_cck[9] = {"a","ä", "o", "ö", "s", "ß", "t", "u", "ü"};
+        YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck));
+        YAZ_CHECK(test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck));
+        YAZ_CHECK(test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck));
+    }
      
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  
  
@@ -367,7 +364,7 @@ int test_icu_normalizer(const char * rules8cstr,
  };
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  void test_icu_I18N_normalizer(int argc, char **argv)
  {
@@ -405,7 +402,7 @@ void test_icu_I18N_normalizer(int argc, char **argv)
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  int test_icu_tokenizer(const char * locale, char action,
                       const char * src8cstr, int count)
@@ -416,38 +413,28 @@ int test_icu_tokenizer(const char * locale, char action,
      struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
      struct icu_buf_utf16 * tkn16 = icu_buf_utf16_create(0);
      struct icu_buf_utf8 * tkn8 = icu_buf_utf8_create(0);
+    struct icu_tokenizer * tokenizer = 0;
  
-    //printf("Input:  '%s'\n", src8cstr);
-
-    // transforming to UTF16
+    /* transforming to UTF16 */
      icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
      icu_check_status(status);
  
-    // set up tokenizer
-    struct icu_tokenizer * tokenizer 
-        = icu_tokenizer_create(locale, action, &status);
+    /* set up tokenizer */
+    tokenizer = icu_tokenizer_create(locale, action, &status);
      icu_check_status(status);
      YAZ_CHECK(tokenizer);
  
-    // attach text buffer to tokenizer
+    /* attach text buffer to tokenizer */
      icu_tokenizer_attach(tokenizer, src16, &status);    
      icu_check_status(status);
      YAZ_CHECK(tokenizer->bi);
  
-    // perform work on tokens
-    //printf("Tokens: ");
+    /* perform work on tokens */
      while(icu_tokenizer_next_token(tokenizer, tkn16, &status)){
          icu_check_status(status);
  
-        // converting to UTF8
+        /* converting to UTF8 */
          icu_utf16_to_utf8(tkn8, tkn16, &status);
-
-        //printf("token %d %d %d %d '%s'\n",
-        //       
-        //       icu_tokenizer_token_start(tokenizer),
-        //       icu_tokenizer_token_end(tokenizer),
-        //       icu_tokenizer_token_length(tokenizer),
-        //       tkn8->utf8);
      }
  
      if (count != icu_tokenizer_token_count(tokenizer)){
@@ -467,7 +454,7 @@ int test_icu_tokenizer(const char * locale, char action,
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  void test_icu_I18N_tokenizer(int argc, char **argv)
  {
@@ -483,14 +470,16 @@ void test_icu_I18N_tokenizer(int argc, char **argv)
  
  
  
-    const char * da_str 
-        = "Blåbærtærte. Denne kage stammer fra Finland. "
-        "Den er med blåbær, men alle sommerens forskellige bær kan bruges.";
-    
-    YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3));
-    YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17));
-    YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37));
-    YAZ_CHECK(test_icu_tokenizer("da", 'c', da_str, 110));
+    {
+        const char * da_str 
+            = "Blåbærtærte. Denne kage stammer fra Finland. "
+            "Den er med blåbær, men alle sommerens forskellige bær kan bruges.";
+        
+        YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3));
+        YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17));
+        YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37));
+        YAZ_CHECK(test_icu_tokenizer("da", 'c', da_str, 110));
+    }
  
  }
  
@@ -500,10 +489,7 @@ void test_icu_I18N_chain(int argc, char **argv)
      const char * en_str 
          = "O Romeo, Romeo! wherefore art thou\t Romeo?";
  
-    printf("ICU chain:\ninput: '%s'\n", en_str);
-
      UErrorCode status = U_ZERO_ERROR;
-    //struct icu_chain_step * step = 0;
      struct icu_chain * chain = 0;
      
  
@@ -522,6 +508,8 @@ void test_icu_I18N_chain(int argc, char **argv)
      xmlNode *xml_node = xmlDocGetRootElement(doc);
      YAZ_CHECK(xml_node);
  
+    printf("ICU chain:\ninput: '%s'\n", en_str);
+
  
      chain = icu_chain_xml_config(xml_node, &status);
  
@@ -591,13 +579,7 @@ void test_icu_I18N_chain(int argc, char **argv)
  
  void test_bug_1140(void)
  {
-    const char * en_str 
-        = "O Romeo, Romeo! wherefore art thou\t Romeo?";
-
-    printf("ICU chain:\ninput: '%s'\n", en_str);
-
      UErrorCode status = U_ZERO_ERROR;
-    //struct icu_chain_step * step = 0;
      struct icu_chain * chain = 0;
      
      const char * xml_str = "<icu_chain id=\"en:word\" locale=\"en\">"
@@ -650,7 +632,7 @@ void test_bug_1140(void)
  
  #endif // HAVE_ICU
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  int main(int argc, char **argv)
  {
@@ -658,9 +640,8 @@ int main(int argc, char **argv)
      YAZ_CHECK_INIT(argc, argv); 
      YAZ_CHECK_LOG();
  
-#ifdef HAVE_ICU
+#if HAVE_ICU
  
-    //test_icu_I18N_casemap_failures(argc, argv);
      test_icu_I18N_casemap(argc, argv);
      test_icu_I18N_sortmap(argc, argv);
      test_icu_I18N_normalizer(argc, argv);
@@ -668,19 +649,19 @@ int main(int argc, char **argv)
      test_icu_I18N_chain(argc, argv);
      test_bug_1140();
  
-#else // HAVE_ICU
+#else /* HAVE_ICU */
  
      printf("ICU unit tests omitted.\n"
             "Please install libicu36-dev and icu-doc or similar\n");
      YAZ_CHECK(0 == 0);
  
-#endif // HAVE_ICU
+#endif /* HAVE_ICU */
     
      YAZ_CHECK_TERM;
  }
  
  
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  
  
  
diff --git a/util/yaz-icu.c b/util/yaz-icu.c

index 4247f80..d929229 100644 (file)
--- a/util/yaz-icu.c
+++ b/util/yaz-icu.c
@@ -1,26 +1,12 @@
-/* $Id: yaz-icu.c,v 1.1 2007-10-22 12:21:40 adam Exp $
-   Copyright (c) 2006-2007, Index Data.
-
-This file is part of Pazpar2.
-
-Pazpar2 is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Pazpar2; see the file LICENSE.  If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+/*
+ * Copyright (C) 1995-2007, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: yaz-icu.c,v 1.2 2007-10-22 17:32:08 adam Exp $
   */
  
  #if HAVE_CONFIG_H
-#include "cconfig.h"
+#include "config.h"
  #endif
  
  #include <string.h>
@@ -28,11 +14,10 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include <stdio.h>
  #include <stdlib.h>
  
-//#include <yaz/xmalloc.h>
  #include <yaz/options.h>
  
  
-#ifdef HAVE_ICU
+#if HAVE_ICU
  
  #include <unicode/ucnv.h>
  #include <unicode/ustring.h>
@@ -118,7 +103,7 @@ void read_params(int argc, char **argv, struct config_t *p_config)
          || !config.outfile)
          
          print_option_error(p_config);
-};
+}
  
  
  /*     UConverter *conv; */
@@ -170,13 +155,10 @@ static void print_icu_converters(const struct config_t *p_config)
  
  static void print_icu_transliterators(const struct config_t *p_config)
  {
-    int32_t count;
-    int32_t i;
-    
-    count = utrans_countAvailableIDs();
-    
      int32_t buf_cap = 128;
      char buf[buf_cap];
+    int32_t i;
+    int32_t count = utrans_countAvailableIDs();
      
      if (p_config->xmloutput)
          fprintf(config.outfile, "<transliterators count=\"%d\">\n",  count);
@@ -435,7 +417,7 @@ static void print_info(const struct config_t *p_config)
          fprintf(config.outfile, "</icu>\n");
  
      exit(0);
-};
+}
  
  
  
@@ -476,7 +458,7 @@ static void process_text_file(const struct config_t *p_config)
                  "<icu>\n"
                  "<tokens>\n");
      
-    // read input lines for processing
+    /* read input lines for processing */
      while ((line=fgets(linebuf, sizeof(linebuf)-1, config.infile)))
      {
          success = icu_chain_assign_cstr(config.chain, line, &status);
@@ -515,15 +497,15 @@ static void process_text_file(const struct config_t *p_config)
      xmlFreeDoc(doc);
      if (line)
          free(line);
-};
+}
  
-#endif // HAVE_ICU
+#endif /* HAVE_ICU */
  
  
  int main(int argc, char **argv) 
  {
  
-#ifdef HAVE_ICU
+#if HAVE_ICU
  
      read_params(argc, argv, &config);
  
@@ -533,17 +515,17 @@ int main(int argc, char **argv)
      if (config.print && strlen(config.print))
          print_info(&config);
  
-#else // HAVE_ICU
+#else /* HAVE_ICU */
  
      printf("ICU not available on your system.\n"
             "Please install libicu36-dev and icu-doc or similar, "
             "re-configure and re-compile\n");
  
  
-#endif // HAVE_ICU
+#endif /* HAVE_ICU */
  
      return(0);
-};
+}
  
  
  /*
author	Adam Dickmeiss <adam@indexdata.dk>
	Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)
committer	Adam Dickmeiss <adam@indexdata.dk>
	Mon, 22 Oct 2007 17:32:07 +0000 (17:32 +0000)
include/yaz/icu_I18N.h		patch \| blob \| history
src/icu_I18N.c		patch \| blob \| history
test/tst_icu_I18N.c		patch \| blob \| history
util/yaz-icu.c		patch \| blob \| history