* Sebastian Hammer, Adam Dickmeiss
*
* $Log: charmap.c,v $
- * Revision 1.1 1996-05-31 09:07:18 quinn
+ * Revision 1.10 1997-07-01 13:01:08 adam
+ * Bug fix in routine find_entry: didn't take into account the len arg.
+ *
+ * Revision 1.9 1996/10/29 13:48:14 adam
+ * Updated to use zebrautl.h instead of alexutil.h.
+ *
+ * Revision 1.8 1996/10/18 12:39:23 adam
+ * Uses LOG_DEBUG instead of LOG_WARN for "Character map overlap".
+ *
+ * Revision 1.7 1996/06/06 12:08:56 quinn
+ * Fixed bug.
+ *
+ * Revision 1.6 1996/06/04 13:28:00 quinn
+ * More work on charmapping
+ *
+ * Revision 1.5 1996/06/04 08:32:15 quinn
+ * Moved default keymap to keychars.c
+ *
+ * Revision 1.4 1996/06/03 16:32:13 quinn
+ * Temporary bug-fix
+ *
+ * Revision 1.3 1996/06/03 15:17:46 quinn
+ * Fixed bug.
+ *
+ * Revision 1.2 1996/06/03 10:15:09 quinn
+ * Fixed bug in mapping function.
+ *
+ * Revision 1.1 1996/05/31 09:07:18 quinn
* Work on character-set handling
*
*
*/
#include <ctype.h>
+#include <string.h>
+#include <assert.h>
-#include <alexutil.h>
+#include <zebrautl.h>
#include <yaz-util.h>
#include <charmap.h>
#include <tpath.h>
+#define CHR_MAXSTR 1024
+#define CHR_MAXEQUIV 32
+
const char *CHR_UNKNOWN = "\001";
const char *CHR_SPACE = "\002";
const char *CHR_BASE = "\003";
chr_t_entry **children; /* array of children */
unsigned char *target; /* target for this node, if any */
unsigned char *equiv; /* equivalent to, or sumthin */
-} t_entry;
+};
+
+/*
+ * General argument structure for callback functions (internal use only)
+ */
+typedef struct chrwork
+{
+ chrmaptab *map;
+ char string[CHR_MAXSTR+1];
+} chrwork;
/*
* Add an entry to the character map.
root->target = 0;
}
if (!len)
- root->target = (unsigned char *) xstrdup(to);
+ {
+ if (!root->target || (char*) root->target == CHR_SPACE ||
+ (char*) root->target == CHR_UNKNOWN)
+ root->target = (unsigned char *) xstrdup(to);
+ else if ((char*) to != CHR_SPACE)
+ logf(LOG_DEBUG, "Character map overlap");
+ }
else
{
if (!root->children)
for (i = 0; i < 256; i++)
root->children[i] = 0;
}
- root->children[(unsigned char) *from] =
+ if (!(root->children[(unsigned char) *from] =
set_map_string(root->children[(unsigned char) *from], from + 1,
- len - 1, to);
+ len - 1, to)))
+ return 0;
}
return root;
}
return i;
}
+#if 1
+
+static chr_t_entry *find_entry(chr_t_entry *t, char **from, int len)
+{
+ chr_t_entry *res;
+
+ if (len && t->children && t->children[(unsigned char) **from])
+ {
+ char *pos = *from;
+
+ (*from)++;
+ if ((res = find_entry(t->children[(unsigned char) *pos],
+ from, len - 1)))
+ return res;
+ /* no match */
+ *from = pos;
+ }
+ /* no children match. use ourselves, if we have a target */
+ return t->target ? t : 0;
+}
+
+char **chr_map_input(chr_t_entry *t, char **from, int len)
+{
+ static char *buf[2] = {0, 0};
+ chr_t_entry *res;
+
+ if (!(res = find_entry(t, from, len)))
+ abort();
+ buf[0] = (char *) res->target;
+ return buf;
+}
+
+#else
+
char **chr_map_input(chr_t_entry *t, char **from, int len)
{
static char *buf[2] = {0, 0}, str[2] = {0, 0};
/* return (char*) t->target; */
}
+#endif
+
static unsigned char prim(char **s)
{
unsigned char c;
tab->input = set_map_string(tab->input, s, strlen(s), (char*) CHR_SPACE);
}
+/*
+ * Create a string containing the mapped characters provided.
+ */
+static void fun_mkstring(char *s, void *data, int num)
+{
+ chrwork *arg = data;
+ char **res, *p = s;
+
+ res = chr_map_input(arg->map->input, &s, strlen(s));
+ if (*res == (char*) CHR_UNKNOWN)
+ logf(LOG_WARN, "Map: '%s' has no mapping", p);
+ strncat(arg->string, *res, CHR_MAXSTR - strlen(arg->string));
+ arg->string[CHR_MAXSTR] = '\0';
+}
+
+/*
+ * Add a map to the string contained in the argument.
+ */
+static void fun_addmap(char *s, void *data, int num)
+{
+ chrwork *arg = data;
+
+ assert(arg->map->input);
+ set_map_string(arg->map->input, s, strlen(s), arg->string);
+}
+
static int scan_string(char *s, void (*fun)(char *c, void *data, int num),
void *data, int *num)
{
- unsigned char c, str[1024], begin, end;
+ unsigned char c, str[1024], begin, end, *p;
while (*s)
{
}
break;
case '[': s++; abort(); break;
- case '(': s++; abort(); break;
+ case '(':
+ p = (unsigned char*) ++s;
+ /* Find the end-marker, ignoring escapes */
+ do
+ {
+ if (!(p = (unsigned char*) strchr((char*) p, ')')))
+ {
+ logf(LOG_FATAL, "Missing ')' in string");
+ return -1;
+ }
+ }
+ while (*(p - 1) == '\\');
+ *p = 0;
+ (*fun)(s, data, num ? (*num)++ : 0);
+ s = (char*) p + 1;
+ break;
default:
c = prim(&s);
str[0] = c; str[1] = '\0';
res->input = xmalloc(sizeof(*res->input));
res->input->target = (unsigned char*) CHR_UNKNOWN;
res->input->equiv = 0;
-#if 0
+#if 1
res->input->children = xmalloc(sizeof(res->input) * 256);
for (i = 0; i < 256; i++)
{
res->input->children[i] = xmalloc(sizeof(*res->input));
res->input->children[i]->children = 0;
- res->input->children[i]->target = CHR_UNKNOWN;
+ res->input->children[i]->target = (unsigned char*) CHR_UNKNOWN;
res->input->children[i]->equiv = 0;
}
#else
res->input->children = 0;
#endif
res->query_equiv = 0;
- for (i = 0; i < 256; i++)
- {
- char *t = xmalloc(2);
-
- t[0] = i;
- t[1] = '\0';
- res->output[i] = (unsigned char*)t;
- }
+ for (i = *CHR_BASE; i < 256; i++)
+ res->output[i] = 0;
res->output[(int) *CHR_SPACE] = (unsigned char *) " ";
res->output[(int) *CHR_UNKNOWN] = (unsigned char*) "@";
res->base_uppercase = 0;
fclose(f);
return 0;
}
- if (scan_string(argv[1], fun_addspace, res, 0))
+ if (scan_string(argv[1], fun_addspace, res, 0) < 0)
{
logf(LOG_FATAL, "Bad space specification");
fclose(f);
return 0;
}
}
+ else if (!yaz_matchstr(argv[0], "map"))
+ {
+ chrwork buf;
+
+ if (argc != 3)
+ {
+ logf(LOG_FATAL, "charmap MAP directive requires 2 args");
+ fclose(f);
+ return 0;
+ }
+ buf.map = res;
+ buf.string[0] = '\0';
+ if (scan_string(argv[2], fun_mkstring, &buf, 0) < 0)
+ {
+ logf(LOG_FATAL, "Bad map target");
+ fclose(f);
+ return 0;
+ }
+ if (scan_string(argv[1], fun_addmap, &buf, 0) < 0)
+ {
+ logf(LOG_FATAL, "Bad map source");
+ fclose(f);
+ return 0;
+ }
+ }
else
{
#if 0