+#define REGEX_CHARS " ^[]()|.*+?!\"$"
+
+static void add_non_space(const char *start, const char *end,
+ WRBUF term_dict,
+ WRBUF display_term,
+ const char **map, int q_map_match)
+{
+ size_t sz = end - start;
+
+ wrbuf_write(display_term, start, sz);
+ if (!q_map_match)
+ {
+ while (start < end)
+ {
+ if (strchr(REGEX_CHARS, *start))
+ wrbuf_putc(term_dict, '\\');
+ wrbuf_putc(term_dict, *start);
+ start++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ wrbuf_puts(term_dict, map[0]);
+ }
+}
+
+
+static int term_100_icu(zebra_map_t zm,
+ const char **src, WRBUF term_dict, int space_split,
+ WRBUF display_term,
+ int right_trunc)
+{
+ int i;
+ const char *res_buf = 0;
+ size_t res_len = 0;
+ const char *display_buf;
+ size_t display_len;
+ if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
+ &display_buf, &display_len))
+ {
+ *src += strlen(*src);
+ return 0;
+ }
+ wrbuf_write(display_term, display_buf, display_len);
+ if (right_trunc)
+ {
+ /* ICU sort keys seem to be of the form
+ basechars \x01 accents \x01 length
+ For now we'll just right truncate from basechars . This
+ may give false hits due to accents not being used.
+ */
+ i = res_len;
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ if (i > 0)
+ {
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ }
+ if (i == 0)
+ { /* did not find base chars at all. Throw error */
+ return -1;
+ }
+ res_len = i; /* reduce res_len */
+ }
+ for (i = 0; i < res_len; i++)
+ {
+ if (strchr(REGEX_CHARS "\\", res_buf[i]))
+ wrbuf_putc(term_dict, '\\');
+ if (res_buf[i] < 32)
+ wrbuf_putc(term_dict, 1);
+
+ wrbuf_putc(term_dict, res_buf[i]);
+ }
+ if (right_trunc)
+ wrbuf_puts(term_dict, ".*");
+ return 1;
+}