+static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen,
+ char *outbuf, size_t outbytesleft)
+{
+ size_t inbytesleft = inlen * sizeof(ucs4_t);
+ char *inbuf = (char*) from;
+ size_t ret;
+
+ if (t == 0)
+ *outbuf++ = *from; /* ISO-8859-1 is OK here */
+ else
+ {
+ ret = yaz_iconv (t, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ if (ret == (size_t) (-1))
+ {
+ yaz_log(LOG_LOG, "from: %2X %2X %2X %2X",
+ from[0], from[1], from[2], from[3]);
+ yaz_log (LOG_WARN|LOG_ERRNO, "bad unicode sequence");
+ return -1;
+ }
+ }
+ *outbuf = '\0';
+ return 0;
+}
+
+static int scan_string(char *s_native,
+ yaz_iconv_t t_unicode, yaz_iconv_t t_utf8,
+ void (*fun)(const char *c, void *data, int num),
+ void *data, int *num)
+{
+ char str[1024];
+
+ ucs4_t arg[512];
+ ucs4_t *s0, *s = arg;
+ ucs4_t c, begin, end;
+ size_t i;
+
+ if (t_unicode != 0)
+ {
+ char *outbuf = (char *) arg;
+ char *inbuf = s_native;
+ size_t outbytesleft = sizeof(arg)-4;
+ size_t inbytesleft = strlen(s_native);
+ size_t ret;
+ ret = yaz_iconv(t_unicode, &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ if (ret == (size_t)(-1))
+ return -1;
+ i = (outbuf - (char*) arg)/sizeof(ucs4_t);
+ }
+ else
+ {
+ for (i = 0; s_native[i]; i++)
+ arg[i] = s_native[i] & 255; /* ISO-8859-1 conversion */
+ }
+ arg[i] = 0; /* terminate */
+ if (s[0] == 0xfeff || s[0] == 0xfeff) /* skip byte Order Mark */
+ s++;