From a3722408557fb93c2ec20233c041b20c7d7fb035 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 11 Mar 2005 21:10:12 +0000 Subject: [PATCH] Implemented the 'equivalent' directive for .chr-files. --- NEWS | 2 + include/charmap.h | 11 +-- include/zebramap.h | 24 ++++- index/zrpn.c | 221 ++++++++++++++++++++++++++++-------------- test/charmap/Makefile.am | 4 +- test/charmap/string.utf8.chr | 10 +- test/charmap/test1.sh | 3 +- test/charmap/test3.sh | 21 ++++ util/charmap.c | 156 +++++++++++++++++++---------- util/zebramap.c | 53 +++++++--- 10 files changed, 348 insertions(+), 157 deletions(-) create mode 100755 test/charmap/test3.sh diff --git a/NEWS b/NEWS index 5cba622..1475b80 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +Implemented the 'equivalent' directive for .chr-files. + --- 1.3.24 2005/02/09 For configure, support threading again. It was removed by mistake diff --git a/include/charmap.h b/include/charmap.h index facf776..f3e150a 100644 --- a/include/charmap.h +++ b/include/charmap.h @@ -1,6 +1,6 @@ -/* $Id: charmap.h,v 1.9.2.1 2004-09-16 14:07:49 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: charmap.h,v 1.9.2.2 2005-03-11 21:10:12 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -46,9 +46,8 @@ YAZ_EXPORT void chrmaptab_destroy (chrmaptab tab); YAZ_EXPORT const char **chr_map_input(chrmaptab t, const char **from, int len, int first); YAZ_EXPORT const char **chr_map_input_x(chrmaptab t, const char **from, int *len, int first); -YAZ_EXPORT const char **chr_map_input_q(chrmaptab maptab, - const char **from, int len, - const char **qmap); +YAZ_EXPORT const char **chr_map_q_input(chrmaptab maptab, + const char **from, int len, int first); YAZ_EXPORT const char *chr_map_output(chrmaptab t, const char **from, int len); diff --git a/include/zebramap.h b/include/zebramap.h index 60e8ebc..5c27841 100644 --- a/include/zebramap.h +++ b/include/zebramap.h @@ -1,6 +1,6 @@ -/* $Id: zebramap.h,v 1.15.2.1 2004-09-16 14:07:49 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: zebramap.h,v 1.15.2.2 2005-03-11 21:10:12 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -29,25 +29,43 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA YAZ_BEGIN_CDECL typedef struct zebra_maps *ZebraMaps; + +YAZ_EXPORT ZebraMaps zebra_maps_open (Res res, const char *base); +YAZ_EXPORT void zebra_maps_close (ZebraMaps zm); +YAZ_EXPORT const char **zebra_maps_input (ZebraMaps zms, unsigned reg_id, const char **from, int len, int first); + +YAZ_EXPORT +const char **zebra_maps_search (ZebraMaps zms, unsigned reg_id, + const char **from, int len, int *q_map_match); + +YAZ_EXPORT const char *zebra_maps_output(ZebraMaps, unsigned reg_id, const char **from); +YAZ_EXPORT int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, unsigned *reg_id, char **search_type, char *rank_type, int *complete_flag, int *sort_flag); +YAZ_EXPORT int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes, int *numerical); +YAZ_EXPORT int zebra_maps_is_complete (ZebraMaps zms, unsigned reg_id); + +YAZ_EXPORT int zebra_maps_is_sort (ZebraMaps zms, unsigned reg_id); + +YAZ_EXPORT int zebra_maps_is_positioned (ZebraMaps zms, unsigned reg_id); +YAZ_EXPORT WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list, const char *input_str, int input_len); diff --git a/index/zrpn.c b/index/zrpn.c index b3fddab..cea199f 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,5 +1,5 @@ -/* $Id: zrpn.c,v 1.141.2.7 2005-02-25 10:09:15 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 +/* $Id: zrpn.c,v 1.141.2.8 2005-03-11 21:10:12 adam Exp $ + Copyright (C) 1995-2005 Index Data Aps This file is part of the Zebra server. @@ -275,14 +275,41 @@ static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, return *s0; } + +static void esc_str(char *out_buf, int out_size, + const char *in_buf, int in_size) +{ + int k; + + assert(out_buf); + assert(in_buf); + assert(out_size > 20); + *out_buf = '\0'; + for (k = 0; k 126) + pc = '?'; + else + pc = c; + sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc); + if (strlen(out_buf) > out_size-20) + { + strcat(out_buf, ".."); + break; + } + } +} + #define REGEX_CHARS " []()|.*+?!" -/* term_100: handle term, where trunc=none(no operators at all) */ +/* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) + const char **src, char *dst, int space_split, + char *dst_term) { - const char *s0, *s1; + const char *s0; const char **map; int i = 0; int j = 0; @@ -295,42 +322,56 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, s0 = *src; while (*s0) { - s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0); - if (space_split) + const char *s1 = s0; + int q_map_match = 0; + map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + &q_map_match); + if (space_split) + { + if (**map == *CHR_SPACE) + break; + } + else /* complete subfield only. */ + { + if (**map == *CHR_SPACE) + { /* save space mapping for later .. */ + space_start = s1; + space_end = s0; + continue; + } + else if (space_start) + { /* reload last space */ + while (space_start < space_end) + { + if (strchr(REGEX_CHARS, *space_start)) + dst[i++] = '\\'; + dst_term[j++] = *space_start; + dst[i++] = *space_start++; + } + /* and reset */ + space_start = space_end = 0; + } + } + /* add non-space char */ + memcpy(dst_term+j, s1, s0 - s1); + j += (s0 - s1); + if (!q_map_match) { - if (**map == *CHR_SPACE) - break; + while (s1 < s0) + { + if (strchr(REGEX_CHARS, *s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } } - else /* complete subfield only. */ + else { - if (**map == *CHR_SPACE) - { /* save space mapping for later .. */ - space_start = s1; - space_end = s0; - continue; - } - else if (space_start) - { /* reload last space */ - while (space_start < space_end) - { - if (strchr(REGEX_CHARS, *space_start)) - dst[i++] = '\\'; - dst_term[j++] = *space_start; - dst[i++] = *space_start++; - } - /* and reset */ - space_start = space_end = 0; - } + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + strcpy(dst + i, map[0]); + i += strlen(map[0]); } - /* add non-space char */ - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst_term[j++] = *s1; - dst[i++] = *s1++; - } } dst[i] = '\0'; dst_term[j] = '\0'; @@ -338,12 +379,12 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, return i; } -/* term_101: handle term, where trunc=Process # */ +/* term_101: handle term, where trunc = Process # */ static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) + const char **src, char *dst, int space_split, + char *dst_term) { - const char *s0, *s1; + const char *s0; const char **map; int i = 0; int j = 0; @@ -357,21 +398,37 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, { dst[i++] = '.'; dst[i++] = '*'; - dst_term[j++] = *s0++; + dst_term[j++] = *s0++; } else { - s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0); + const char *s1 = s0; + int q_map_match = 0; + map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + &q_map_match); if (space_split && **map == *CHR_SPACE) break; - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst_term[j++] = *s1; - dst[i++] = *s1++; - } + + /* add non-space char */ + memcpy(dst_term+j, s1, s0 - s1); + j += (s0 - s1); + if (!q_map_match) + { + while (s1 < s0) + { + if (strchr(REGEX_CHARS, *s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + strcpy(dst + i, map[0]); + i += strlen(map[0]); + } } } dst[i] = '\0'; @@ -380,21 +437,21 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, return i; } -/* term_103: handle term, where trunc=re-2 (regular expressions) */ +/* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, - char *dst_term) + char *dst, int *errors, int space_split, + char *dst_term) { int i = 0; int j = 0; - const char *s0, *s1; + const char *s0; const char **map; if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && - isdigit(s0[1])) + isdigit(((const unsigned char *)s0)[1])) { *errors = s0[1] - '0'; s0 += 3; @@ -404,40 +461,56 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, while (*s0) { if (strchr("^\\()[].*+?|-", *s0)) - { - dst_term[j++] = *s0; + { + dst_term[j++] = *s0; dst[i++] = *s0++; - } + } else { - s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0); - if (**map == *CHR_SPACE) + const char *s1 = s0; + int q_map_match = 0; + map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + &q_map_match); + if (space_split && **map == *CHR_SPACE) break; - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst_term[j++] = *s1; - dst[i++] = *s1++; - } + + /* add non-space char */ + memcpy(dst_term+j, s1, s0 - s1); + j += (s0 - s1); + if (!q_map_match) + { + while (s1 < s0) + { + if (strchr(REGEX_CHARS, *s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + strcpy(dst + i, map[0]); + i += strlen(map[0]); + } } } dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; + return i; } -/* term_103: handle term, where trunc=re-1 (regular expressions) */ +/* term_103: handle term, where trunc = re-1 (regular expressions) */ static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) + char *dst, int space_split, char *dst_term) { return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, - dst_term); + dst_term); } - /* term_104: handle term, where trunc=Process # and ! */ static int term_104(ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, diff --git a/test/charmap/Makefile.am b/test/charmap/Makefile.am index 37b79de..c0ad0f0 100644 --- a/test/charmap/Makefile.am +++ b/test/charmap/Makefile.am @@ -1,6 +1,6 @@ -# $Id: Makefile.am,v 1.1 2004-03-09 15:12:15 adam Exp $ +# $Id: Makefile.am,v 1.1.2.1 2005-03-11 21:10:12 adam Exp $ -check_SCRIPTS = test1.sh test2.sh +check_SCRIPTS = test1.sh test2.sh test3.sh TESTS = $(check_SCRIPTS) diff --git a/test/charmap/string.utf8.chr b/test/charmap/string.utf8.chr index d67402f..dbe280b 100644 --- a/test/charmap/string.utf8.chr +++ b/test/charmap/string.utf8.chr @@ -1,4 +1,4 @@ -# $Id: string.utf8.chr,v 1.1 2004-03-09 15:12:15 adam Exp $ +# $Id: string.utf8.chr,v 1.1.2.1 2005-03-11 21:10:12 adam Exp $ # Define the basic value-set. *Beware* of changing this without re-indexing # your databases. @@ -15,10 +15,10 @@ space {\001-\040}!"#$%&'\()*+,-./:;<=>?@\[\\]^_`\{|}~ # Characters to be considered equivalent for searching purposes. -# equivalent æä(ae) -# equivalent øö(oe) -# equivalent å(aa) -# equivalent uü +equivalent æä(ae) +equivalent øö(oe) +equivalent å(aa) +equivalent uü # Supplemental mappings diff --git a/test/charmap/test1.sh b/test/charmap/test1.sh index 966ff5e..2820b1a 100755 --- a/test/charmap/test1.sh +++ b/test/charmap/test1.sh @@ -1,5 +1,5 @@ #!/bin/sh -# $Id: test1.sh,v 1.3 2004-06-15 09:43:27 adam Exp $ +# $Id: test1.sh,v 1.3.2.1 2005-03-11 21:10:12 adam Exp $ pp=${srcdir:-"."} @@ -13,6 +13,7 @@ fi ../../index/zebraidx -c $pp/zebra.cfg -l$LOG update $pp/*.xml ../../index/zebrasrv -c $pp/zebra.cfg -l$LOG unix:socket & sleep 1 +# ae ../api/testclient unix:socket '@term string æ' >tmp1 echo 'Result count: 1' >tmp2 kill `cat zebrasrv.pid` || exit 1 diff --git a/test/charmap/test3.sh b/test/charmap/test3.sh new file mode 100755 index 0000000..1a640b9 --- /dev/null +++ b/test/charmap/test3.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# $id$ + +pp=${srcdir:-"."} + +LOG=test2.log +rm -f $LOG +if ../../index/zebraidx -c $pp/zebra.cfg -l $LOG -V|grep Expat >/dev/null; then + ../../index/zebraidx -c $pp/zebra.cfg -l$LOG init +else + exit 0 +fi +../../index/zebraidx -c $pp/zebra.cfg -l$LOG update $pp/*.xml +../../index/zebrasrv -c $pp/zebra.cfg -l$LOG unix:socket & +sleep 1 +# search for ae (equivalent test) +../api/testclient unix:socket '@term string ae' >tmp1 +echo 'Result count: 1' >tmp2 +kill `cat zebrasrv.pid` || exit 1 +diff tmp1 tmp2 || exit 2 +rm -f tmp1 tmp2 diff --git a/util/charmap.c b/util/charmap.c index 6510159..b1042bc 100644 --- a/util/charmap.c +++ b/util/charmap.c @@ -1,6 +1,6 @@ -/* $Id: charmap.c,v 1.29.2.4 2005-01-16 23:13:31 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: charmap.c,v 1.29.2.5 2005-03-11 21:10:13 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -28,15 +28,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA */ #include -#include #include +#include #include typedef unsigned ucs4_t; -#include #include +#include + #define CHR_MAXSTR 1024 #define CHR_MAXEQUIV 32 @@ -75,6 +76,15 @@ typedef struct chrwork } chrwork; /* + * Callback for equivalent stuff + */ +typedef struct +{ + NMEM nmem; + int no_eq; + char *eq[CHR_MAXEQUIV]; +} chr_equiv_work; +/* * Add an entry to the character map. */ static chr_t_entry *set_map_string(chr_t_entry *root, NMEM nmem, @@ -210,6 +220,20 @@ const char **chr_map_input(chrmaptab maptab, const char **from, int len, int fir return (const char **) (res->target); } +const char **chr_map_q_input(chrmaptab maptab, + const char **from, int len, int first) +{ + chr_t_entry *t = maptab->q_input; + chr_t_entry *res; + int len_tmp[2]; + + len_tmp[0] = len; + len_tmp[1] = -1; + if (!(res = find_entry_x(t, from, len_tmp, first))) + return 0; + return (const char **) (res->target); +} + const char *chr_map_output(chrmaptab maptab, const char **from, int len) { unsigned char c = ** (unsigned char **) from; @@ -392,39 +416,36 @@ static void fun_mkstring(const char *s, void *data, int num) res = chr_map_input(arg->map, &s, strlen(s), 0); if (*res == (char*) CHR_UNKNOWN) - logf(LOG_WARN, "Map: '%s' has no mapping", p); + yaz_log(LOG_WARN, "Map: '%s' has no mapping", p); strncat(arg->string, *res, CHR_MAXSTR - strlen(arg->string)); arg->string[CHR_MAXSTR] = '\0'; } /* - * Add a map to the string contained in the argument. + * Create an unmodified string (scan_string handler). */ -static void fun_add_map(const char *s, void *data, int num) +static void fun_add_equivalent_string(const char *s, void *data, int num) { - chrwork *arg = (chrwork *) data; - - assert(arg->map->input); - logf (LOG_DEBUG, "set map %.*s", (int) strlen(s), s); - set_map_string(arg->map->input, arg->map->nmem, s, strlen(s), arg->string, - 0); - for (s = arg->string; *s; s++) - logf (LOG_DEBUG, " %3d", (unsigned char) *s); + chr_equiv_work *arg = (chr_equiv_work *) data; + + if (arg->no_eq == CHR_MAXEQUIV) + return; + arg->eq[arg->no_eq++] = nmem_strdup(arg->nmem, s); } /* - * Add a query map to the string contained in the argument. + * Add a map to the string contained in the argument. */ -static void fun_add_qmap(const char *s, void *data, int num) +static void fun_add_map(const char *s, void *data, int num) { chrwork *arg = (chrwork *) data; - assert(arg->map->q_input); - logf (LOG_DEBUG, "set qmap %.*s", (int) strlen(s), s); - set_map_string(arg->map->q_input, arg->map->nmem, s, - strlen(s), arg->string, 0); + assert(arg->map->input); + yaz_log (LOG_DEBUG, "set map %.*s", (int) strlen(s), s); + set_map_string(arg->map->input, arg->map->nmem, s, strlen(s), arg->string, + 0); for (s = arg->string; *s; s++) - logf (LOG_DEBUG, " %3d", (unsigned char) *s); + yaz_log (LOG_DEBUG, " %3d", (unsigned char) *s); } static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen, @@ -494,14 +515,14 @@ static int scan_string(char *s_native, begin = zebra_prim_w(&s); if (*s != '-') { - logf(LOG_FATAL, "Bad range in char-map"); + yaz_log(LOG_FATAL, "Bad range in char-map"); return -1; } s++; end = zebra_prim_w(&s); if (end <= begin) { - logf(LOG_FATAL, "Bad range in char-map"); + yaz_log(LOG_FATAL, "Bad range in char-map"); return -1; } s++; @@ -554,10 +575,10 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, t_utf8 = yaz_iconv_open ("UTF-8", ucs4_native); - logf (LOG_DEBUG, "maptab %s open", name); + yaz_log (LOG_DEBUG, "maptab %s open", name); if (!(f = yaz_fopen(tabpath, name, "r", tabroot))) { - logf(LOG_WARN|LOG_ERRNO, "%s", name); + yaz_log(LOG_WARN|LOG_ERRNO, "%s", name); return 0; } nmem = nmem_create (); @@ -604,13 +625,13 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, { if (argc != 2) { - logf(LOG_FATAL, "Syntax error in charmap"); + yaz_log(LOG_FATAL, "Syntax error in charmap"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry, res, &num) < 0) { - logf(LOG_FATAL, "Bad value-set specification"); + yaz_log(LOG_FATAL, "Bad value-set specification"); ++errors; } res->base_uppercase = num; @@ -622,18 +643,18 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, { if (!res->base_uppercase) { - logf(LOG_FATAL, "Uppercase directive with no lowercase set"); + yaz_log(LOG_FATAL, "Uppercase directive with no lowercase set"); ++errors; } if (argc != 2) { - logf(LOG_FATAL, "Missing arg for uppercase directive"); + yaz_log(LOG_FATAL, "Missing arg for uppercase directive"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry, res, &num) < 0) { - logf(LOG_FATAL, "Bad value-set specification"); + yaz_log(LOG_FATAL, "Bad value-set specification"); ++errors; } } @@ -641,13 +662,13 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, { if (argc != 2) { - logf(LOG_FATAL, "Syntax error in charmap for space"); + yaz_log(LOG_FATAL, "Syntax error in charmap for space"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addspace, res, 0) < 0) { - logf(LOG_FATAL, "Bad space specification"); + yaz_log(LOG_FATAL, "Bad space specification"); ++errors; } } @@ -655,13 +676,13 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, { if (argc != 2) { - logf(LOG_FATAL, "Syntax error in charmap for cut"); + yaz_log(LOG_FATAL, "Syntax error in charmap for cut"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addcut, res, 0) < 0) { - logf(LOG_FATAL, "Bad cut specification"); + yaz_log(LOG_FATAL, "Bad cut specification"); ++errors; } } @@ -671,7 +692,7 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, if (argc != 3) { - logf(LOG_FATAL, "charmap directive map requires 2 args"); + yaz_log(LOG_FATAL, "charmap directive map requires 2 args"); ++errors; } buf.map = res; @@ -679,39 +700,68 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, if (scan_string(argv[2], t_unicode, t_utf8, fun_mkstring, &buf, 0) < 0) { - logf(LOG_FATAL, "Bad map target"); + yaz_log(LOG_FATAL, "Bad map target"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_add_map, &buf, 0) < 0) { - logf(LOG_FATAL, "Bad map source"); + yaz_log(LOG_FATAL, "Bad map source"); ++errors; } } - else if (!yaz_matchstr(argv[0], "qmap")) + else if (!yaz_matchstr(argv[0], "equivalent")) { - chrwork buf; + chr_equiv_work w; - if (argc != 3) + if (argc != 2) { - logf(LOG_FATAL, "charmap directive qmap requires 2 args"); + yaz_log(LOG_FATAL, "equivalent requires 1 argument"); ++errors; } - buf.map = res; - buf.string[0] = '\0'; - if (scan_string(argv[2], t_unicode, t_utf8, - fun_mkstring, &buf, 0) < 0) + w.nmem = res->nmem; + w.no_eq = 0; + if (scan_string(argv[1], t_unicode, t_utf8, + fun_add_equivalent_string, &w, 0) < 0) { - logf(LOG_FATAL, "Bad qmap target"); + yaz_log(LOG_FATAL, "equivalent: invalid string"); ++errors; } - if (scan_string(argv[1], t_unicode, t_utf8, - fun_add_qmap, &buf, 0) < 0) + else if (w.no_eq == 0) { - logf(LOG_FATAL, "Bad qmap source"); + yaz_log(LOG_FATAL, "equivalent: no strings"); ++errors; } + else + { + char *result_str; + int i, slen = 5; + + /* determine length of regular expression */ + for (i = 0; inmem, slen + 5); + + /* build the regular expression */ + *result_str = '\0'; + slen = 0; + for (i = 0; iq_input, res->nmem, + w.eq[i], strlen(w.eq[i]), + result_str, 0); + } + } } else if (!yaz_matchstr(argv[0], "encoding")) { @@ -740,7 +790,7 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, } else { - logf(LOG_WARN, "Syntax error at '%s' in %s", line, name); + yaz_log(LOG_WARN, "Syntax error at '%s' in %s", line, name); } yaz_fclose(f); @@ -749,7 +799,7 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, chrmaptab_destroy(res); res = 0; } - logf (LOG_DEBUG, "maptab %s close %d errors", name, errors); + yaz_log (LOG_DEBUG, "maptab %s close %d errors", name, errors); if (t_utf8 != 0) yaz_iconv_close(t_utf8); if (t_unicode != 0) diff --git a/util/zebramap.c b/util/zebramap.c index 8fad5e1..ffe13c6 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,6 +1,6 @@ -/* $Id: zebramap.c,v 1.32.2.3 2005-01-16 23:13:32 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: zebramap.c,v 1.32.2.4 2005-03-11 21:10:13 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -20,12 +20,13 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include #include +#include #include -#include #include +#include + #include #define ZEBRA_MAP_TYPE_SORT 1 @@ -96,7 +97,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) if (!(f = yaz_fopen(zms->tabpath, name, "r", zms->tabroot))) { - logf(LOG_WARN|LOG_ERRNO, "%s", name); + yaz_log(LOG_WARN|LOG_ERRNO, "%s", name); return ; } while ((argc = readconf_line(f, &lineno, line, 512, argv, 10))) @@ -155,7 +156,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) token->next = (*zm)->replace_tokens; (*zm)->replace_tokens = token; #if 0 - logf (LOG_LOG, "replace %s", argv[1]); + yaz_log (LOG_LOG, "replace %s", argv[1]); #endif token->token_from = 0; if (argc >= 2) @@ -178,7 +179,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) { *dp++ = zebra_prim(&cp); #if 0 - logf (LOG_LOG, " char %2X %c", dp[-1], dp[-1]); + yaz_log (LOG_LOG, " char %2X %c", dp[-1], dp[-1]); #endif } *dp = '\0'; @@ -261,7 +262,7 @@ chrmaptab zebra_charmap_get (ZebraMaps zms, unsigned reg_id) if (!zm) { zm = (struct zebra_map *) nmem_malloc (zms->nmem, sizeof(*zm)); - logf (LOG_WARN, "Unknown register type: %c", reg_id); + yaz_log (LOG_WARN, "Unknown register type: %c", reg_id); zm->reg_id = reg_id; zm->maptab_name = nmem_strdup (zms->nmem, "@"); @@ -281,10 +282,10 @@ chrmaptab zebra_charmap_get (ZebraMaps zms, unsigned reg_id) if (!(zm->maptab = chrmaptab_create (zms->tabpath, zm->maptab_name, 0, zms->tabroot))) - logf(LOG_WARN, "Failed to read character table %s", + yaz_log(LOG_WARN, "Failed to read character table %s", zm->maptab_name); else - logf(LOG_DEBUG, "Read character table %s", zm->maptab_name); + yaz_log(LOG_DEBUG, "Read character table %s", zm->maptab_name); } return zm->maptab; } @@ -304,6 +305,32 @@ const char **zebra_maps_input (ZebraMaps zms, unsigned reg_id, return zms->temp_map_ptr; } +const char **zebra_maps_search(ZebraMaps zms, unsigned reg_id, + const char **from, int len, int *q_map_match) +{ + chrmaptab maptab; + + *q_map_match = 0; + maptab = zebra_charmap_get (zms, reg_id); + if (maptab) + { + const char **map; + map = chr_map_q_input(maptab, from, len, 0); + if (map && map[0]) + { + *q_map_match = 1; + return map; + } + map = chr_map_input(maptab, from, len, 0); + if (map) + return map; + } + zms->temp_map_str[0] = **from; + + (*from)++; + return zms->temp_map_ptr; +} + const char *zebra_maps_output(ZebraMaps zms, unsigned reg_id, const char **from) { @@ -545,7 +572,7 @@ WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list, return zms->wrbuf_1; #if 0 - logf (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1), + yaz_log (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1), wrbuf_buf(zms->wrbuf_1)); #endif for (;;) @@ -632,7 +659,7 @@ int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list, } } #if 0 - logf (LOG_LOG, "out:%.*s:", wrbuf_len(wrbuf), wrbuf_buf(wrbuf)); + yaz_log (LOG_LOG, "out:%.*s:", wrbuf_len(wrbuf), wrbuf_buf(wrbuf)); #endif return no_replaces; } -- 1.7.10.4