-/* $Id: zrpn.c,v 1.216 2006-06-06 21:01:30 adam Exp $
+/* $Id: zrpn.c,v 1.221 2006-06-23 11:21:38 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
}
static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
- struct rpn_char_map_info *map_info)
+ struct rpn_char_map_info *map_info)
{
map_info->zm = reg->zebra_maps;
map_info->reg_type = reg_type;
}
-static void esc_str(char *out_buf, int out_size,
+static void esc_str(char *out_buf, size_t out_size,
const char *in_buf, int in_size)
{
int k;
break;
case 3:
case 102:
- case 103:
case -1:
if (!**term_sub)
return 1;
strcat(term_tmp, term_component);
strcat(term_tmp, ")");
break;
+ case 103:
+ yaz_log(log_level_rpn, "Relation always matches");
+ /* skip to end of term (we don't care what it is) */
+ while (**term_sub != '\0')
+ (*term_sub)++;
+ break;
default:
*error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
return 0;
const char *term_ref_id_str = 0;
*rset = 0;
- term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
- stream);
+ term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
grep_info->isam_p_indx = 0;
res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
reg_type, complete_flag, num_bases, basenames,
int j, r, base_no;
AttrType truncation;
int truncation_value;
- oid_value curAttributeSet = attributeSet;
const char *termp;
struct rpn_char_map_info rcmi;
int space_split = complete_flag ? 0 : 1;
for (base_no = 0; base_no < num_bases; base_no++)
{
int ord = -1;
- int attr_ok = 0;
int regex_range = 0;
- int init_pos = 0;
-#if 0
- attent attp;
- data1_local_attribute id_xpath_attr;
- data1_local_attribute *local_attr;
-#endif
int max_pos, prefix_len = 0;
int relation_error;
char ord_buf[32];
int ord_len, i;
- termp = *term_sub;
+ termp = *term_sub; /* start of term for each database */
if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
{
return ZEBRA_FAIL;
}
- if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
- curAttributeSet, &ord)
- != ZEBRA_OK)
- {
- break;
- }
+ if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
+ attributeSet, &ord) != ZEBRA_OK)
+ continue;
+
+ bases_ok++;
+
*ol = ord_list_append(stream, *ol, ord);
-
- if (prefix_len)
- term_dict[prefix_len++] = '|';
- else
- term_dict[prefix_len++] = '(';
-
ord_len = key_SU_encode (ord, ord_buf);
+
+ term_dict[prefix_len++] = '(';
for (i = 0; i<ord_len; i++)
{
- term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = ord_buf[i];
+ term_dict[prefix_len++] = 1; /* our internal regexp escape char */
+ term_dict[prefix_len++] = ord_buf[i];
}
- if (ord_len > init_pos)
- init_pos = ord_len;
-
- bases_ok++;
- if (prefix_len)
- attr_ok = 1;
-
term_dict[prefix_len++] = ')';
term_dict[prefix_len] = '\0';
j = prefix_len;
{
case -1: /* not specified */
case 100: /* do not truncate */
- if (!string_relation (zh, zapt, &termp, term_dict,
- attributeSet,
- reg_type, space_split, term_dst,
- &relation_error))
+ if (!string_relation(zh, zapt, &termp, term_dict,
+ attributeSet,
+ reg_type, space_split, term_dst,
+ &relation_error))
{
if (relation_error)
{
truncation_value);
return ZEBRA_FAIL;
}
- if (attr_ok)
+ if (1)
{
char buf[80];
const char *input = term_dict + prefix_len;
esc_str(buf, sizeof(buf), input, strlen(input));
}
- if (attr_ok)
- {
- yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
- r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
- grep_info, &max_pos, init_pos,
- grep_handle);
- if (r)
- yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
- }
+ yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
+ r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
+ grep_info, &max_pos,
+ ord_len /* number of "exact" chars */,
+ grep_handle);
+ if (r)
+ yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
}
if (!bases_ok)
return ZEBRA_FAIL;
struct grep_info grep_info;
const char *termp = termz;
int alloc_sets = 0;
- int empty_term = *termz ? 0 : 1;
- empty_term = 0;
*num_result_sets = 0;
*term_dst = 0;
if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
break;
(*num_result_sets)++;
- if (empty_term)
- break;
if (!*termp)
break;
}
yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
- if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
- term_dst))
- return 0;
- term_value = atoi (term_tmp);
switch (relation_value)
{
case 1:
yaz_log(log_level_rpn, "Relation <");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
+ return 0;
+ term_value = atoi (term_tmp);
gen_regular_rel(term_tmp, term_value-1, 1);
break;
case 2:
yaz_log(log_level_rpn, "Relation <=");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
+ return 0;
+ term_value = atoi (term_tmp);
gen_regular_rel(term_tmp, term_value, 1);
break;
case 4:
yaz_log(log_level_rpn, "Relation >=");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
+ return 0;
+ term_value = atoi (term_tmp);
gen_regular_rel(term_tmp, term_value, 0);
break;
case 5:
yaz_log(log_level_rpn, "Relation >");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
+ return 0;
+ term_value = atoi (term_tmp);
gen_regular_rel(term_tmp, term_value+1, 0);
break;
case -1:
case 3:
yaz_log(log_level_rpn, "Relation =");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
+ return 0;
+ term_value = atoi (term_tmp);
sprintf(term_tmp, "(0*%d)", term_value);
break;
+ case 103:
+ /* term_tmp untouched.. */
+ while (**term_sub != '\0')
+ (*term_sub)++;
+ break;
default:
*error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
return 0;
static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const char **term_sub,
- oid_value attributeSet,
+ oid_value attributeSet, NMEM stream,
struct grep_info *grep_info,
int reg_type, int complete_flag,
int num_bases, char **basenames,
char *term_dst,
const char *xpath_use,
- NMEM stream)
+ struct ord_list **ol)
{
char term_dict[2*IT_MAX_WORD+2];
int base_no;
- oid_value curAttributeSet = attributeSet;
const char *termp;
struct rpn_char_map_info rcmi;
int bases_ok = 0; /* no of databases with OK attribute */
+ *ol = ord_list_create(stream);
+
rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
for (base_no = 0; base_no < num_bases; base_no++)
}
if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
- curAttributeSet, &ord)
- != ZEBRA_OK)
- {
- break;
- }
+ attributeSet, &ord) != ZEBRA_OK)
+ continue;
+ bases_ok++;
+
+ *ol = ord_list_append(stream, *ol, ord);
- if (prefix_len)
- term_dict[prefix_len++] = '|';
- else
- term_dict[prefix_len++] = '(';
-
ord_len = key_SU_encode (ord, ord_buf);
+
+ term_dict[prefix_len++] = '(';
for (i = 0; i < ord_len; i++)
{
term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = ord_buf[i];
+ term_dict[prefix_len++] = ord_buf[i];
}
- bases_ok++;
term_dict[prefix_len++] = ')';
term_dict[prefix_len] = '\0';
+
if (!numeric_relation(zh, zapt, &termp, term_dict,
attributeSet, grep_info, &max_pos, reg_type,
term_dst, &relation_error))
return ZEBRA_FAIL;
while (1)
{
+ struct ord_list *ol;
if (alloc_sets == num_result_sets)
{
int add = 10;
}
yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
grep_info.isam_p_indx = 0;
- res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
+ res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
reg_type, complete_flag, num_bases, basenames,
- term_dst, xpath_use,
- stream);
+ term_dst, xpath_use, &ol);
if (res == ZEBRA_FAIL || termp == 0)
break;
yaz_log(YLOG_DEBUG, "term: %s", term_dst);
strlen(term_dst), rank_type,
0 /* preserve position */,
zapt->term->which, rset_nmem,
- kc, kc->scope, 0, reg_type,
+ kc, kc->scope, ol, reg_type,
hits_limit_value,
term_ref_id_str);
if (!result_sets[num_result_sets])
break;
num_result_sets++;
+ if (!*termp)
+ break;
}
grep_info_delete(&grep_info);
- if (termp)
- {
- int i;
- for (i = 0; i<num_result_sets; i++)
- rset_delete(result_sets[i]);
- return ZEBRA_FAIL;
- }
+
+ if (res != ZEBRA_OK)
+ return res;
if (num_result_sets == 0)
*rset = rset_create_null(rset_nmem, kc, 0);
- if (num_result_sets == 1)
+ else if (num_result_sets == 1)
*rset = result_sets[0];
else
- *rset = rset_create_and(rset_nmem, kc, kc->scope,
+ *rset = rset_create_and(rset_nmem, kc, kc->scope,
num_result_sets, result_sets);
if (!*rset)
- return ZEBRA_FAIL;
+ return ZEBRA_FAIL;
return ZEBRA_OK;
}
}
-static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- oid_value attributeSet,
- struct xpath_location_step *xpath, int max, NMEM mem)
+static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ oid_value attributeSet,
+ struct xpath_location_step *xpath, int max,
+ NMEM mem)
{
oid_value curAttributeSet = attributeSet;
AttrType use;
static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
int reg_type, const char *term,
const char *xpath_use,
- oid_value curAttributeSet, NMEM rset_nmem,
+ NMEM rset_nmem,
struct rset_key_control *kc)
{
RSET rset;
char term_dict[2048];
char ord_buf[32];
int prefix_len = 0;
- int ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_type,
+ int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+ zinfo_index_category_index,
+ reg_type,
xpath_use);
int ord_len, i, r, max_pos;
int term_type = Z_Term_characterString;
static
ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
- oid_value attributeSet,
int num_bases, char **basenames,
NMEM stream, const char *rank_type, RSET rset,
int xpath_len, struct xpath_location_step *xpath,
RSET *rset_out,
struct rset_key_control *kc)
{
- oid_value curAttributeSet = attributeSet;
int base_no;
int i;
+ int always_matches = rset ? 0 : 1;
if (xpath_len < 0)
{
}
- curAttributeSet = VAL_IDXPATH;
-
/*
//a -> a/.*
//a/b -> b/a/.*
}
while (--level >= 0)
{
- char xpath_rev[128];
- int i, len;
+ WRBUF xpath_rev = wrbuf_alloc();
+ int i;
RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
- *xpath_rev = 0;
- len = 0;
for (i = level; i >= 1; --i)
{
const char *cp = xpath[i].part;
if (*cp)
{
- for (;*cp; cp++)
+ for (; *cp; cp++)
+ {
if (*cp == '*')
- {
- memcpy (xpath_rev + len, "[^/]*", 5);
- len += 5;
- }
+ wrbuf_puts(xpath_rev, "[^/]*");
else if (*cp == ' ')
- {
-
- xpath_rev[len++] = 1;
- xpath_rev[len++] = ' ';
- }
-
+ wrbuf_puts(xpath_rev, "\001 ");
else
- xpath_rev[len++] = *cp;
- xpath_rev[len++] = '/';
+ wrbuf_putc(xpath_rev, *cp);
+
+ /* wrbuf_putc does not null-terminate , but
+ wrbuf_puts below ensures it does.. so xpath_rev
+ is OK iff length is > 0 */
+ }
+ wrbuf_puts(xpath_rev, "/");
}
else if (i == 1) /* // case */
- {
- xpath_rev[len++] = '.';
- xpath_rev[len++] = '*';
- }
+ wrbuf_puts(xpath_rev, ".*");
}
- xpath_rev[len] = 0;
-
if (xpath[level].predicate &&
xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
xpath[level].predicate->u.relation.name[0])
wrbuf_puts(wbuf, "");
rset_attr = xpath_trunc(
zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
- curAttributeSet, rset_nmem, kc);
+ rset_nmem, kc);
wrbuf_free(wbuf, 1);
}
else
{
if (!first_path)
+ {
+ wrbuf_free(xpath_rev, 1);
continue;
+ }
}
- yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
- if (strlen(xpath_rev))
+ yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
+ wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
+ if (wrbuf_len(xpath_rev))
{
rset_start_tag = xpath_trunc(zh, stream, '0',
- xpath_rev,
+ wrbuf_buf(xpath_rev),
ZEBRA_XPATH_ELM_BEGIN,
- curAttributeSet,
rset_nmem, kc);
-
- rset_end_tag = xpath_trunc(zh, stream, '0',
- xpath_rev,
- ZEBRA_XPATH_ELM_END,
- curAttributeSet,
- rset_nmem, kc);
-
- rset = rset_create_between(rset_nmem, kc, kc->scope,
- rset_start_tag, rset,
- rset_end_tag, rset_attr);
+ if (always_matches)
+ rset = rset_start_tag;
+ else
+ {
+ rset_end_tag = xpath_trunc(zh, stream, '0',
+ wrbuf_buf(xpath_rev),
+ ZEBRA_XPATH_ELM_END,
+ rset_nmem, kc);
+
+ rset = rset_create_between(rset_nmem, kc, kc->scope,
+ rset_start_tag, rset,
+ rset_end_tag, rset_attr);
+ }
}
+ wrbuf_free(xpath_rev, 1);
first_path = 0;
}
}
return ZEBRA_OK;
}
+#define MAX_XPATH_STEPS 10
+
static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
oid_value attributeSet, NMEM stream,
Z_SortKeySpecList *sort_sequence,
char termz[IT_MAX_WORD+1];
int xpath_len;
const char *xpath_use = 0;
- struct xpath_location_step xpath[10];
+ struct xpath_location_step xpath[MAX_XPATH_STEPS];
if (!log_level_set)
{
return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
rank_type, rset_nmem, rset, kc);
/* consider if an X-Path query is used */
- xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
+ xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
+ xpath, MAX_XPATH_STEPS, stream);
if (xpath_len >= 0)
{
if (xpath[xpath_len-1].part[0] == '@')
xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
else
xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
+
+ if (1)
+ {
+ AttrType relation;
+ int relation_value;
+
+ attr_init_APT(&relation, zapt, 2);
+ relation_value = attr_find(&relation, NULL);
+
+ if (relation_value == 103) /* alwaysmatches */
+ {
+ *rset = 0; /* signal no "term" set */
+ return rpn_search_xpath(zh, num_bases, basenames,
+ stream, rank_type, *rset,
+ xpath_len, xpath, rset_nmem, rset, kc);
+ }
+ }
}
/* search using one of the various search type strategies
num_bases, basenames, rset_nmem,
rset, kc);
}
- else if (!strcmp(search_type, "always"))
- {
- *termz = '\0';
- res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
- reg_id, complete_flag, rank_type,
- xpath_use,
- num_bases, basenames, rset_nmem,
- rset, kc);
- }
else
{
- zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
+ zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
res = ZEBRA_FAIL;
}
if (res != ZEBRA_OK)
return res;
if (!*rset)
return ZEBRA_FAIL;
- return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
+ return rpn_search_xpath(zh, num_bases, basenames,
stream, rank_type, *rset,
xpath_len, xpath, rset_nmem, rset, kc);
}
*count = rset->hits_count;
}
+#define RPN_MAX_ORDS 32
+
ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
oid_value attributeset,
int num_bases, char **basenames,
char termz[IT_MAX_WORD+20];
struct scan_info *scan_info_array;
ZebraScanEntry *glist;
- int ords[32], ord_no = 0;
- int ptr[32];
-
- int bases_ok = 0; /* no of databases with OK attribute */
- int errCode = 0; /* err code (if any is not OK) */
- char *errString = 0; /* addinfo */
+ int ords[RPN_MAX_ORDS], ord_no = 0;
+ int ptr[RPN_MAX_ORDS];
unsigned index_type;
char *search_type = NULL;
zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
return ZEBRA_FAIL;
}
- for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
+ for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
{
int ord;
*num_entries = 0;
return ZEBRA_FAIL;
}
-
if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
!= ZEBRA_OK)
- {
- break;
- }
+ continue;
ords[ord_no++] = ord;
}
- if (!bases_ok && errCode)
- {
- zebra_setError(zh, errCode, errString);
- *num_entries = 0;
- return ZEBRA_FAIL;
- }
if (ord_no == 0)
{
*num_entries = 0;