-/* $Id: extract.c,v 1.263 2007-10-29 09:25:40 adam Exp $
+/* $Id: extract.c,v 1.264 2007-10-29 13:43:57 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
*/
+/** \file
+ \brief indexes records and extract tokens for indexing and sorting
+*/
+
#include <stdio.h>
#include <assert.h>
#include <ctype.h>
#endif
#include <fcntl.h>
+
#include "index.h"
#include "orddict.h"
#include <direntz.h>
#include <charmap.h>
+#include <yaz/snprintf.h>
static int log_level_extract = 0;
static int log_level_details = 0;
int cmd, zebra_rec_keys_t skp);
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add(RecWord *p);
+static void extract_token_add2(RecWord *p);
static void check_log_limit(ZebraHandle zh)
{
stream->endf(stream, &null_offset);;
extractCtrl.init = extract_init;
- extractCtrl.tokenAdd = extract_token_add;
+ if (zh->reg->index_types)
+ {
+ extractCtrl.tokenAdd = extract_token_add2;
+ }
+ else
+ {
+ extractCtrl.tokenAdd = extract_token_add;
+ }
extractCtrl.schemaAdd = extract_schema_add;
extractCtrl.dh = zh->reg->dh;
extractCtrl.handle = zh;
extract_add_string(p, buf, i);
}
+static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type,
+ RecWord *p)
+{
+ struct it_key key;
+ const char *res_buf = 0;
+ size_t res_len = 0;
+ int r = zebra_index_type_tokenize(type, p->term_buf, p->term_len,
+ &res_buf, &res_len);
+ int cat = zinfo_index_category_index;
+ int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zh->reg->zei, cat, p->index_type, p->index_name);
+ while (r)
+ {
+ int i = 0;
+ key.mem[i++] = ch;
+ key.mem[i++] = p->record_id;
+ key.mem[i++] = p->section_id;
+
+ if (zh->m_segment_indexing)
+ key.mem[i++] = p->segment;
+ key.mem[i++] = p->seqno;
+ key.len = i;
+
+ yaz_log(YLOG_LOG, "keys_write %.*s", (int) res_len, res_buf);
+ zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key);
+
+ p->seqno++;
+ r = zebra_index_type_tokenize(type, 0, 0, &res_buf, &res_len);
+ }
+}
+
+static void extract_token_add2(RecWord *p)
+{
+ zebra_index_type_t type;
+ ZebraHandle zh = p->extractCtrl->handle;
+ char type_tmp[2];
+ type_tmp[0] = p->index_type;
+ type_tmp[1] = '\0';
+ type = zebra_index_type_get(zh->reg->index_types, type_tmp);
+ if (type)
+ {
+ if (zebra_index_type_is_index(type))
+ {
+ extract_token_add2_index(zh, type, p);
+ }
+ else if (zebra_index_type_is_sort(type))
+ {
+ ;
+
+ }
+ }
+}
+
+/** \brief top-level indexing handler for recctrl system
+ \param p token data to be indexed
+
+ Call sequence:
+ extract_token
+ zebra_add_{in}_complete
+ extract_add_string
+
+ extract_add_index_string
+ or
+ extract_add_sort_string
+ or
+ extract_add_staticrank_string
+
+*/
static void extract_token_add(RecWord *p)
{
ZebraHandle zh = p->extractCtrl->handle;
-# $Id: Makefile.am,v 1.40 2006-11-16 12:48:28 adam Exp $
+# $Id: Makefile.am,v 1.41 2007-10-29 13:43:58 adam Exp $
noinst_PROGRAMS = testclient
testclient_SOURCES = testclient.c
-simpletests = t0 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12 t13 t14 t15 t16
+simpletests = t0 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12 t13 t14 t15 t16 t17
safaritests = safari1
check_PROGRAMS = $(simpletests) $(safaritests)
TESTS = $(check_PROGRAMS)
EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebra10.cfg zebra15.cfg safari.cfg \
- t10.att t10.abs
+ t10.att t10.abs zebra17.cfg indextypes17.xml
noinst_LIBRARIES = libtestlib.a
t14_SOURCES = t14.c
t15_SOURCES = t15.c
t16_SOURCES = t16.c
+t17_SOURCES = t17.c
safari1_SOURCES = safari1.c testlib.c
--- /dev/null
+/* $Id: t17.c,v 1.1 2007-10-29 13:43:58 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+/** \file
+ \brief tests unicode enabled searching (index_types)
+*/
+#include <yaz/test.h>
+#include "testlib.h"
+
+const char *myrec[] = {
+ "<gils>\n<title>My title</title>\n</gils>\n",
+ "<gils>\n<title>My x title</title>\n</gils>\n",
+ "<gils>\n<title>My title x</title>\n</gils>\n" ,
+ 0} ;
+
+static void tst(int argc, char **argv)
+{
+ ZebraService zs = tl_start_up("zebra17.cfg", argc, argv);
+ ZebraHandle zh = zebra_open(zs, 0);
+
+ YAZ_CHECK(tl_init_data(zh, myrec));
+
+ /* simple term */
+ YAZ_CHECK(tl_query(zh, "@attr 1=title notfound", 0));
+
+ /* we should get 3 hits. But 0 for now */
+#if 0
+
+ YAZ_CHECK(tl_query(zh, "@attr 1=title title", 3));
+#else
+ YAZ_CHECK(tl_query(zh, "@attr 1=title title", 0));
+#endif
+
+
+ YAZ_CHECK(tl_close_down(zh, zs));
+}
+
+TL_MAIN
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+