-/* $Id: rpnsearch.c,v 1.29 2007-12-17 12:24:50 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2011 Index Data
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
*/
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdio.h>
#include <assert.h>
#ifdef WIN32
}
static int term_pre(zebra_map_t zm, const char **src,
- const char *ct1, const char *ct2, int first)
+ const char *ct1, int first)
{
const char *s1, *s0 = *src;
const char **map;
{
if (ct1 && strchr(ct1, *s0))
break;
- if (ct2 && strchr(ct2, *s0))
- break;
s1 = s0;
map = zebra_maps_input(zm, &s1, strlen(s1), first);
if (**map != *CHR_SPACE)
}
}
-#define REGEX_CHARS " []()|.*+?!"
+#define REGEX_CHARS " ^[]()|.*+?!\"$"
static void add_non_space(const char *start, const char *end,
WRBUF term_dict,
static int term_100_icu(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
- WRBUF display_term)
+ WRBUF display_term,
+ int right_trunc)
{
int i;
const char *res_buf = 0;
return 0;
}
wrbuf_write(display_term, display_buf, display_len);
+ if (right_trunc)
+ {
+ /* ICU sort keys seem to be of the form
+ basechars \x01 accents \x01 length
+ For now we'll just right truncate from basechars . This
+ may give false hits due to accents not being used.
+ */
+ i = res_len;
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ if (i > 0)
+ {
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ }
+ if (i == 0)
+ { /* did not find base chars at all. Throw error */
+ return -1;
+ }
+ res_len = i; /* reduce res_len */
+ }
for (i = 0; i < res_len; i++)
{
if (strchr(REGEX_CHARS "\\", res_buf[i]))
wrbuf_putc(term_dict, '\\');
if (res_buf[i] < 32)
wrbuf_putc(term_dict, 1);
+
wrbuf_putc(term_dict, res_buf[i]);
}
+ if (right_trunc)
+ wrbuf_puts(term_dict, ".*");
return 1;
}
const char *space_start = 0;
const char *space_end = 0;
- if (zebra_maps_is_icu(zm))
- return term_100_icu(zm, src, term_dict, space_split, display_term);
-
- if (!term_pre(zm, src, NULL, NULL, !space_split))
+ if (!term_pre(zm, src, 0, !space_split))
return 0;
s0 = *src;
while (*s0)
const char **map;
int i = 0;
- if (!term_pre(zm, src, "#", "#", !space_split))
+ if (!term_pre(zm, src, "#", !space_split))
return 0;
s0 = *src;
while (*s0)
const char *s0;
const char **map;
- if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
+ if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
return 0;
s0 = *src;
if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
}
-/* term_104: handle term, process # and ! */
+/* term_104: handle term, process ?n * # */
static int term_104(zebra_map_t zm, const char **src,
WRBUF term_dict, int space_split, WRBUF display_term)
{
const char **map;
int i = 0;
- if (!term_pre(zm, src, "?*#", "?*#", !space_split))
+ if (!term_pre(zm, src, "?*#", !space_split))
return 0;
s0 = *src;
while (*s0)
return i;
}
-/* term_105/106: handle term, where trunc = Process * and ! and right trunc */
+/* term_105/106: handle term, process * ! and possibly right_truncate */
static int term_105(zebra_map_t zm, const char **src,
WRBUF term_dict, int space_split,
WRBUF display_term, int right_truncate)
const char **map;
int i = 0;
- if (!term_pre(zm, src, "*!", "*!", !space_split))
+ if (!term_pre(zm, src, "\\*!", !space_split))
return 0;
s0 = *src;
while (*s0)
wrbuf_putc(display_term, *s0);
s0++;
}
+ else if (*s0 == '\\')
+ {
+ i++;
+ wrbuf_puts(term_dict, "\\\\");
+ wrbuf_putc(display_term, *s0);
+ s0++;
+ }
else
{
const char *s1 = s0;
AttrType term_ref_id_attr;
AttrType hits_limit_attr;
int term_ref_id_int;
+ zint hits_limit_from_attr;
attr_init_APT(&hits_limit_attr, zapt, 11);
- *hits_limit_value = attr_find(&hits_limit_attr, NULL);
+ hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
attr_init_APT(&term_ref_id_attr, zapt, 10);
term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
sprintf(res, "%d", term_ref_id_int);
*term_ref_id_str = res;
}
+ if (hits_limit_from_attr != -1)
+ *hits_limit_value = hits_limit_from_attr;
- /* no limit given ? */
- if (*hits_limit_value == -1)
- {
- if (*term_ref_id_str)
- {
- /* use global if term_ref is present */
- *hits_limit_value = zh->approx_limit;
- }
- else
- {
- /* no counting if term_ref is not present */
- *hits_limit_value = 0;
- }
- }
- else if (*hits_limit_value == 0)
- {
- /* 0 is the same as global limit */
- *hits_limit_value = zh->approx_limit;
- }
yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
*term_ref_id_str ? *term_ref_id_str : "none",
*hits_limit_value);
static ZEBRA_RES search_term(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char **term_sub,
- const Odr_oid *attributeSet, NMEM stream,
+ const Odr_oid *attributeSet,
+ zint hits_limit, NMEM stream,
struct grep_info *grep_info,
const char *index_type, int complete_flag,
const char *rank_type,
{
ZEBRA_RES res;
struct ord_list *ol;
- zint hits_limit_value;
+ zint hits_limit_value = hits_limit;
const char *term_ref_id_str = 0;
WRBUF term_dict = wrbuf_alloc();
WRBUF display_term = wrbuf_alloc();
wrbuf_putc(term_dict, ')');
prefix_len = wrbuf_len(term_dict);
-
- switch (truncation_value)
- {
- case -1: /* not specified */
- case 100: /* do not truncate */
- if (!string_relation(zh, zapt, &termp, term_dict,
- attributeSet,
- zm, space_split, display_term,
- &relation_error))
+
+ if (zebra_maps_is_icu(zm))
+ {
+ int relation_value;
+ AttrType relation;
+
+ attr_init_APT(&relation, zapt, 2);
+ relation_value = attr_find(&relation, NULL);
+ if (relation_value == 103) /* always matches */
+ termp += strlen(termp); /* move to end of term */
+ else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
{
- if (relation_error)
+ /* ICU case */
+ switch (truncation_value)
{
- zebra_setError(zh, relation_error, 0);
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ case 1: /* right truncation */
+ if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ default:
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+ truncation_value);
return ZEBRA_FAIL;
}
- *term_sub = 0;
- return ZEBRA_OK;
}
- break;
- case 1: /* right truncation */
- wrbuf_putc(term_dict, '(');
- if (!term_100(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ".*)");
- break;
- case 2: /* keft truncation */
- wrbuf_puts(term_dict, "(.*");
- if (!term_100(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 3: /* left&right truncation */
- wrbuf_puts(term_dict, "(.*");
- if (!term_100(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ".*)");
- break;
- case 101: /* process # in term */
- wrbuf_putc(term_dict, '(');
- if (!term_101(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ")");
- break;
- case 102: /* Regexp-1 */
- wrbuf_putc(term_dict, '(');
- if (!term_102(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 103: /* Regexp-2 */
- regex_range = 1;
- wrbuf_putc(term_dict, '(');
- if (!term_103(zm, &termp, term_dict, ®ex_range,
- space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 104: /* process # and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_104(zm, &termp, term_dict, space_split, display_term))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 105: /* process * and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
+ else
{
- *term_sub = 0;
- return ZEBRA_OK;
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
+ relation_value);
+ return ZEBRA_FAIL;
}
- wrbuf_putc(term_dict, ')');
- break;
- case 106: /* process * and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
+ }
+ else
+ {
+ /* non-ICU case. using string.chr and friends */
+ switch (truncation_value)
{
- *term_sub = 0;
- return ZEBRA_OK;
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!string_relation(zh, zapt, &termp, term_dict,
+ attributeSet,
+ zm, space_split, display_term,
+ &relation_error))
+ {
+ if (relation_error)
+ {
+ zebra_setError(zh, relation_error, 0);
+ return ZEBRA_FAIL;
+ }
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ case 1: /* right truncation */
+ wrbuf_putc(term_dict, '(');
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ".*)");
+ break;
+ case 2: /* left truncation */
+ wrbuf_puts(term_dict, "(.*");
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 3: /* left&right truncation */
+ wrbuf_puts(term_dict, "(.*");
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ".*)");
+ break;
+ case 101: /* process # in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_101(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ")");
+ break;
+ case 102: /* Regexp-1 */
+ wrbuf_putc(term_dict, '(');
+ if (!term_102(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 103: /* Regexp-2 */
+ regex_range = 1;
+ wrbuf_putc(term_dict, '(');
+ if (!term_103(zm, &termp, term_dict, ®ex_range,
+ space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 104: /* process ?n * # term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_104(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 105: /* process * ! in term and right truncate */
+ wrbuf_putc(term_dict, '(');
+ if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 106: /* process * ! in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ default:
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+ truncation_value);
+ return ZEBRA_FAIL;
}
- wrbuf_putc(term_dict, ')');
- break;
- default:
- zebra_setError_zint(zh,
- YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
- truncation_value);
- return ZEBRA_FAIL;
}
if (1)
{
Z_AttributesPlusTerm *zapt,
const char *termz,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type, int complete_flag,
const char *rank_type,
alloc_sets = alloc_sets + add;
*result_sets = rnew;
}
- res = search_term(zh, zapt, &termp, attributeSet,
+ res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
stream, &grep_info,
index_type, complete_flag,
rank_type,
Z_AttributesPlusTerm *zapt,
const char *termz,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type, int complete_flag,
const char *rank_type,
zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
if (zebra_maps_is_icu(zm))
zebra_map_tokenize_start(zm, termz, strlen(termz));
- return search_terms_chrmap(zh, zapt, termz, attributeSet,
+ return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
stream, index_type, complete_flag,
rank_type, xpath_use,
rset_nmem, result_sets, num_result_sets,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type,
int complete_flag,
RSET *result_sets = 0;
int num_result_sets = 0;
ZEBRA_RES res =
- search_terms_list(zh, zapt, termz_org, attributeSet,
+ search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
stream, index_type, complete_flag,
rank_type, xpath_use,
rset_nmem,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type,
int complete_flag,
int num_result_sets = 0;
int i;
ZEBRA_RES res =
- search_terms_list(zh, zapt, termz_org, attributeSet,
+ search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
stream, index_type, complete_flag,
rank_type, xpath_use,
rset_nmem,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type,
int complete_flag,
int num_result_sets = 0;
int i;
ZEBRA_RES res =
- search_terms_list(zh, zapt, termz_org, attributeSet,
+ search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
stream, index_type, complete_flag,
rank_type, xpath_use,
rset_nmem,
Z_AttributesPlusTerm *zapt,
const char *termz,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream,
const char *index_type,
int complete_flag,
ZEBRA_RES res;
struct grep_info grep_info;
int alloc_sets = 0;
- zint hits_limit_value;
+ zint hits_limit_value = hits_limit;
const char *term_ref_id_str = 0;
zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
sk->u.sortAttributes->list = zapt->attributes;
- sks->sortRelation = (int *)
+ sks->sortRelation = (Odr_int *)
nmem_malloc(stream, sizeof(*sks->sortRelation));
if (sort_relation_value == 1)
*sks->sortRelation = Z_SortKeySpec_ascending;
else
*sks->sortRelation = Z_SortKeySpec_ascending;
- sks->caseSensitivity = (int *)
+ sks->caseSensitivity = (Odr_int *)
nmem_malloc(stream, sizeof(*sks->caseSensitivity));
*sks->caseSensitivity = 0;
static ZEBRA_RES rpn_search_database(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeSet, NMEM stream,
+ const Odr_oid *attributeSet,
+ zint hits_limit, NMEM stream,
Z_SortKeySpecList *sort_sequence,
NMEM rset_nmem,
RSET *rset,
struct rset_key_control *kc);
static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeSet, NMEM stream,
+ const Odr_oid *attributeSet,
+ zint hits_limit, NMEM stream,
Z_SortKeySpecList *sort_sequence,
int num_bases, const char **basenames,
NMEM rset_nmem,
res = ZEBRA_FAIL;
break;
}
- res = rpn_search_database(zh, zapt, attributeSet, stream,
+ res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
sort_sequence,
rset_nmem, rsets+i, kc);
if (res != ZEBRA_OK)
static ZEBRA_RES rpn_search_database(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeSet, NMEM stream,
+ const Odr_oid *attributeSet,
+ zint hits_limit, NMEM stream,
Z_SortKeySpecList *sort_sequence,
NMEM rset_nmem,
RSET *rset,
*/
if (!strcmp(search_type, "phrase"))
{
- res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
+ res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
+ stream,
index_type, complete_flag, rank_type,
xpath_use,
rset_nmem,
}
else if (!strcmp(search_type, "and-list"))
{
- res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
+ res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
+ stream,
index_type, complete_flag, rank_type,
xpath_use,
rset_nmem,
}
else if (!strcmp(search_type, "or-list"))
{
- res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
+ res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
+ stream,
index_type, complete_flag, rank_type,
xpath_use,
rset_nmem,
}
else if (!strcmp(search_type, "numeric"))
{
- res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
+ res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
+ stream,
index_type, complete_flag, rank_type,
xpath_use,
rset_nmem,
}
static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
- const Odr_oid *attributeSet,
+ const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
int num_bases, const char **basenames,
ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
const Odr_oid *attributeSet,
+ zint hits_limit,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
int num_bases, const char **basenames,
ZEBRA_RES res;
struct rset_key_control *kc = zebra_key_control_create(zh);
- res = rpn_search_structure(zh, zs, attributeSet,
+ res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
stream, rset_nmem,
sort_sequence,
num_bases, basenames,
}
ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
- const Odr_oid *attributeSet,
+ const Odr_oid *attributeSet, zint hits_limit,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
int num_bases, const char **basenames,
int num_result_sets_r = 0;
res = rpn_search_structure(zh, zs->u.complex->s1,
- attributeSet, stream, rset_nmem,
+ attributeSet, hits_limit, stream, rset_nmem,
sort_sequence,
num_bases, basenames,
&result_sets_l, &num_result_sets_l,
return res;
}
res = rpn_search_structure(zh, zs->u.complex->s2,
- attributeSet, stream, rset_nmem,
+ attributeSet, hits_limit, stream, rset_nmem,
sort_sequence,
num_bases, basenames,
&result_sets_r, &num_result_sets_r,
{
yaz_log(YLOG_DEBUG, "rpn_search_APT");
res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
- attributeSet, stream, sort_sequence,
+ attributeSet, hits_limit,
+ stream, sort_sequence,
num_bases, basenames, rset_nmem, &rset,
kc);
if (res != ZEBRA_OK)
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab