-/* $Id: zrpn.c,v 1.168 2005-01-15 20:47:15 adam Exp $
+/* $Id: zrpn.c,v 1.171 2005-03-11 17:56:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
}
static void add_isam_p(const char *name, const char *info,
- struct grep_info *p)
+ struct grep_info *p)
{
if (!log_level_set)
{
return *s0;
}
+
+static void esc_str(char *out_buf, int out_size,
+ const char *in_buf, int in_size)
+{
+ int k;
+
+ assert(out_buf);
+ assert(in_buf);
+ assert(out_size > 20);
+ *out_buf = '\0';
+ for (k = 0; k<in_size; k++)
+ {
+ int c = in_buf[k] & 0xff;
+ int pc;
+ if (c < 32 || c > 126)
+ pc = '?';
+ else
+ pc = c;
+ sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
+ if (strlen(out_buf) > out_size-20)
+ {
+ strcat(out_buf, "..");
+ break;
+ }
+ }
+}
+
#define REGEX_CHARS " []()|.*+?!"
/* term_100: handle term, where trunc = none(no operators at all) */
const char **src, char *dst, int space_split,
char *dst_term)
{
- const char *s0, *s1;
+ const char *s0;
const char **map;
int i = 0;
int j = 0;
s0 = *src;
while (*s0)
{
- s1 = s0;
- map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
if (space_split)
{
if (**map == *CHR_SPACE)
space_start = space_end = 0;
}
}
- /* add non-space char */
- while (s1 < s0)
- {
- if (strchr(REGEX_CHARS, *s1))
- dst[i++] = '\\';
- dst_term[j++] = *s1;
- dst[i++] = *s1++;
- }
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
}
dst[i] = '\0';
dst_term[j] = '\0';
const char **src, char *dst, int space_split,
char *dst_term)
{
- const char *s0, *s1;
+ const char *s0;
const char **map;
int i = 0;
int j = 0;
}
else
{
- s1 = s0;
- map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
if (space_split && **map == *CHR_SPACE)
break;
- while (s1 < s0)
- {
- if (strchr(REGEX_CHARS, *s1))
- dst[i++] = '\\';
- dst_term[j++] = *s1;
- dst[i++] = *s1++;
- }
+
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
}
}
dst[i] = '\0';
{
int i = 0;
int j = 0;
- const char *s0, *s1;
+ const char *s0;
const char **map;
if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
}
else
{
- s1 = s0;
- map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
- if (**map == *CHR_SPACE)
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
+ if (space_split && **map == *CHR_SPACE)
break;
- while (s1 < s0)
- {
- if (strchr(REGEX_CHARS, *s1))
- dst[i++] = '\\';
- dst_term[j++] = *s1;
- dst[i++] = *s1++;
- }
+
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
}
}
dst[i] = '\0';
dst_term[j] = '\0';
*src = s0;
+
return i;
}
dst[i++] = '.';
dst_term[j++] = *s0++;
}
+ else
{
s1 = s0;
map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
dst[i++] = '.';
dst_term[j++] = *s0++;
}
+ else
{
s1 = s0;
map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
use_value = 1016;
for (base_no = 0; base_no < num_bases; base_no++)
{
+ int ord = -1;
int attr_ok = 0;
int regex_range = 0;
int init_pos = 0;
}
if (xpath_use > 0 && use_value == -2)
{
- use_value = xpath_use;
+ /* xpath mode and we have a string attribute */
attp.local_attributes = &id_xpath_attr;
attp.attset_ordinal = VAL_IDXPATH;
id_xpath_attr.next = 0;
+
+ use_value = xpath_use; /* xpath_use as use-attribute now */
id_xpath_attr.local = use_value;
}
- else if (curAttributeSet == VAL_IDXPATH)
+ else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
{
+ /* X-Path attribute, use numeric value directly */
attp.local_attributes = &id_xpath_attr;
attp.attset_ordinal = VAL_IDXPATH;
id_xpath_attr.next = 0;
id_xpath_attr.local = use_value;
}
- else
+ else if (use_string &&
+ (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+ use_string)) >= 0)
+ {
+ /* we have a match for a raw string attribute */
+ char ord_buf[32];
+ int i, ord_len;
+
+ if (prefix_len)
+ term_dict[prefix_len++] = '|';
+ else
+ term_dict[prefix_len++] = '(';
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ for (i = 0; i<ord_len; i++)
+ {
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = ord_buf[i];
+ }
+ attp.local_attributes = 0; /* no more attributes */
+ }
+ else
{
+ /* lookup in the .att files . Allow string as well */
if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
- use_string)))
+ use_string)))
{
yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
curAttributeSet, use_value, r);
continue;
}
}
- for (local_attr = attp.local_attributes; local_attr;
- local_attr = local_attr->next)
- {
- int ord;
- char ord_buf[32];
- int i, ord_len;
-
- ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
- local_attr->local);
- if (ord < 0)
- continue;
- if (prefix_len)
- term_dict[prefix_len++] = '|';
- else
- term_dict[prefix_len++] = '(';
-
- ord_len = key_SU_encode (ord, ord_buf);
- for (i = 0; i<ord_len; i++)
- {
- term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = ord_buf[i];
- }
- }
+ for (local_attr = attp.local_attributes; local_attr;
+ local_attr = local_attr->next)
+ {
+ char ord_buf[32];
+ int i, ord_len;
+
+ ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+ attp.attset_ordinal,
+ local_attr->local);
+ if (ord < 0)
+ continue;
+ if (prefix_len)
+ term_dict[prefix_len++] = '|';
+ else
+ term_dict[prefix_len++] = '(';
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ for (i = 0; i<ord_len; i++)
+ {
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = ord_buf[i];
+ }
+ }
if (!prefix_len)
{
#if 1
}
if (attr_ok)
{
+ char buf[80];
+ const char *input = term_dict + prefix_len;
+ esc_str(buf, sizeof(buf), input, strlen(input));
+ }
+ if (attr_ok)
+ {
yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
grep_info, &max_pos, init_pos,
char ord_buf[32];
int i, ord_len;
- ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
- local_attr->local);
+ ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+ attp.attset_ordinal,
+ local_attr->local);
if (ord < 0)
continue;
if (prefix_len)
char term_dict[2048];
char ord_buf[32];
int prefix_len = 0;
- int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
+ int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
int ord_len, i, r, max_pos;
int term_type = Z_Term_characterString;
const char *flags = "void";
grep_info.isam_p_indx = 0;
r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
&grep_info, &max_pos, 0, grep_handle);
- yaz_log (YLOG_LOG, "%s %d positions", term,
+ yaz_log (YLOG_DEBUG, "%s %d positions", term,
grep_info.isam_p_indx);
rset = rset_trunc(zh, grep_info.isam_p_buf,
grep_info.isam_p_indx, term, strlen(term),
{
int ord;
- ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
- local_attr->local);
+ ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+ attp.attset_ordinal,
+ local_attr->local);
if (ord > 0)
ords[ord_no++] = ord;
}