A function that checks whether a string appears to be UTF-8 encoded.
#define yaz_tolower(x) ((x) + ('a' - 'A'))
#define yaz_islower(x) ((x) >= 'a' && (x) <= 'z')
+/** \brief check whether string apppers to be UTF-8 encoded
+ \param cstr string to check
+ \retval 1 OK (appears to be UTF-8)
+ \retval 0 definitely not UTF-8
+*/
+YAZ_EXPORT int yaz_utf8_check(const char *cstr);
+
YAZ_END_CDECL
#endif
}
return 0;
}
-
+
+int yaz_utf8_check(const char *str)
+{
+ /* cast OK: yaz_read_UTF8_char is read-only */
+ unsigned char *inp = (unsigned char *) str;
+ size_t inbytesleft = strlen(str);
+
+ while (inbytesleft)
+ {
+ int error = 0;
+ size_t no_read;
+ yaz_read_UTF8_char(inp, inbytesleft, &no_read, &error);
+ if (error)
+ return 0;
+ inp += no_read;
+ inbytesleft -= no_read;
+ }
+ return 1;
+}
/*
* Local variables: