only characters after the cutting char should be indexed.
+Added a new 'cut' directive to charmaps (.chr files) which specifies that
+only characters after the cutting char should be indexed.
+
Update Perl internals so that it matches the current Zebra API.
The recordGroup structure is no longer available. A group of resources
can still be referenced by setting groupName=>.. in various methods.
-/* $Id: charmap.h,v 1.8 2002-08-02 19:26:55 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: charmap.h,v 1.9 2004-07-28 09:47:41 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
#include <yaz/yconfig.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
+YAZ_BEGIN_CDECL
YAZ_EXPORT extern const char *CHR_UNKNOWN;
YAZ_EXPORT extern const char *CHR_SPACE;
+YAZ_EXPORT extern const char *CHR_CUT;
YAZ_EXPORT extern const char *CHR_BASE;
struct chr_t_entry;
YAZ_EXPORT unsigned char zebra_prim(char **s);
-#ifdef __cplusplus
-}
-#endif
+YAZ_END_CDECL
#endif
-/* $Id: zebramap.h,v 1.14 2002-08-02 19:26:55 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: zebramap.h,v 1.15 2004-07-28 09:47:41 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
02111-1307, USA.
*/
-
-
#ifndef ZEBRAMAP_H
#define ZEBRAMAP_H
#include <yaz/proto.h>
#include <res.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
+YAZ_BEGIN_CDECL
typedef struct zebra_maps *ZebraMaps;
ZebraMaps zebra_maps_open (Res res, const char *base);
WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list,
const char *input_str, int input_len);
-#ifdef __cplusplus
-}
-#endif
+YAZ_END_CDECL
#endif
-/* $Id: extract.c,v 1.156 2004-07-28 08:15:45 adam Exp $
+/* $Id: extract.c,v 1.157 2004-07-28 09:47:41 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
while (map && *map && **map == *CHR_SPACE)
{
remain = p->length - (b - p->string);
+
if (remain > 0)
map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
else
{
const char *cp = *map;
- if (i >= IT_MAX_WORD)
- break;
- while (i < IT_MAX_WORD && *cp)
- buf[i++] = *(cp++);
+ if (**map == *CHR_CUT)
+ {
+ i = 0;
+ }
+ else
+ {
+ if (i >= IT_MAX_WORD)
+ break;
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ }
remain = p->length - (b - p->string);
if (remain > 0)
map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
-# $Id: Makefile.am,v 1.2 2003-05-21 14:39:22 adam Exp $
+# $Id: Makefile.am,v 1.3 2004-07-28 09:47:41 adam Exp $
check_SCRIPTS = test1.sh
TESTS = $(check_SCRIPTS)
-EXTRA_DIST = zebra.cfg rec1.xml rec2.xml rec3.xml zebra.cfg my.abs \
+EXTRA_DIST = zebra.cfg default.idx string-hat.chr \
+ rec1.xml rec2.xml rec3.xml rec4.xml zebra.cfg my.abs \
$(check_SCRIPTS)
--- /dev/null
+# Zebra indexes as referred to from the *.abs-files.
+# $Id: default.idx,v 1.1 2004-07-28 09:47:41 adam Exp $
+#
+
+# Traditional word index
+# Used if completenss is 'incomplete field' (@attr 6=1) and
+# structure is word/phrase/word-list/free-form-text/document-text
+index w
+completeness 0
+position 1
+charmap string.chr
+
+# Phrase index
+# Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1)
+# and structure is word/phrase/word-list/free-form-text/document-text
+index p
+completeness 1
+charmap string.chr
+
+# URX (URL) index
+# Used if structure=urx (@attr 4=104)
+index u
+completeness 0
+charmap urx.chr
+
+# Numeric index
+# Used if structure=numeric (@attr 4=109)
+index n
+completeness 0
+charmap numeric.chr
+
+# Null map index (no mapping at all)
+# Used if structure=key (@attr 4=3)
+index 0
+completeness 0
+position 1
+charmap @
+
+# Year
+# Used if structure=year (@attr 4=4)
+index y
+completeness 0
+charmap @
+
+# Date
+# Used if structure=date (@attr 4=5)
+index d
+completeness 0
+charmap @
+
+# Sort register as usual but specify another map : string-cut.
+sort s
+completeness 1
+charmap string-cut.chr
+
<my>
- <title>third computer</title>
+ <title>3rd computer</title>
</my>
--- /dev/null
+<my>
+ <title>third ^computer</title>
+</my>
--- /dev/null
+# Generic character map but with ^ as cut char
+#
+# $Id: string-hat.chr,v 1.1 2004-07-28 09:47:41 adam Exp $
+
+# Define the basic value-set. *Beware* of changing this without re-indexing
+# your databases.
+
+lowercase {0-9}{a-y}üzæäøöå
+uppercase {0-9}{A-Y}ÜZÆÄØÖÅ
+
+cut ^
+
+# Breaking characters
+
+space {\001-\040}!"#$%&'\()*+,-./:;<=>?@\[\\]_`\{|}~
+
+# Characters to be considered equivalent for searching purposes.
+
+# equivalent æä(ae)
+# equivalent øö(oe)
+# equivalent å(aa)
+# equivalent uü
+
+# Supplemental mappings
+
+#map (ä) ä
+#map (æ) æ
+#map (ø) ø
+#map (å) å
+#map (ö) ö
+#map (Ä) Ä
+#map (&Aelig;) Æ
+#map (Ø) Ø
+#map (Å) Å
+#map (Ö) Ö
+
+#map éÉ e
+#map á a
+#map ó o
+#map í i
+
+#map (Aa) (AA)
+
+#map (aa) a
#!/bin/sh
-# $Id: test1.sh,v 1.7 2004-06-15 09:43:34 adam Exp $
+# $Id: test1.sh,v 1.8 2004-07-28 09:47:41 adam Exp $
pp=${srcdir:-"."}
../../index/zebrasrv -c $pp/zebra.cfg -l $LOG unix:socket &
sleep 1
test -f lock/zebrasrv.pid || exit 2
-../api/testclient -n3 unix:socket '@or computer @attr 7=1 @attr 1=30 0' >tmp1
-echo 'Result count: 3
+../api/testclient -n4 unix:socket '@or computer @attr 7=1 @attr 1=30 0' >tmp1
+echo 'Result count: 4
my:
- title: third computer
+ title: 3rd computer
+my:
+ title: third ^computer
my:
title: second computer
dateTime: 1
# Simple Zebra configuration file
-# $Id: zebra.cfg,v 1.2 2004-06-15 09:43:34 adam Exp $
+# $Id: zebra.cfg,v 1.3 2004-07-28 09:47:41 adam Exp $
#
# Where the schema files, attribute files, etc are located.
profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab
attset: explain.att
recordtype.xml: grs.sgml
-recordId: file
lockdir: lock
register: reg:20M
isam: b
-/* $Id: charmap.c,v 1.28 2004-03-09 15:12:15 adam Exp $
+/* $Id: charmap.c,v 1.29 2004-07-28 09:47:42 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
const char *CHR_UNKNOWN = "\001";
const char *CHR_SPACE = "\002";
-const char *CHR_BASE = "\003";
+const char *CHR_CUT = "\003";
+const char *CHR_BASE = "\005";
struct chrmaptab_info
{
(char*) CHR_SPACE, 0);
}
+/*
+ * Callback function.
+ * Add a space-entry to the value space.
+ */
+static void fun_addcut(const char *s, void *data, int num)
+{
+ chrmaptab tab = (chrmaptab) data;
+ tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s),
+ (char*) CHR_CUT, 0);
+}
+
/*
* Create a string containing the mapped characters provided.
*/
{
if (argc != 2)
{
- logf(LOG_FATAL, "Syntax error in charmap");
+ logf(LOG_FATAL, "Syntax error in charmap for space");
++errors;
}
if (scan_string(argv[1], t_unicode, t_utf8,
++errors;
}
}
+ else if (!map_only && !yaz_matchstr(argv[0], "cut"))
+ {
+ if (argc != 2)
+ {
+ logf(LOG_FATAL, "Syntax error in charmap for cut");
+ ++errors;
+ }
+ if (scan_string(argv[1], t_unicode, t_utf8,
+ fun_addcut, res, 0) < 0)
+ {
+ logf(LOG_FATAL, "Bad cut specification");
+ ++errors;
+ }
+ }
else if (!yaz_matchstr(argv[0], "map"))
{
chrwork buf;