+Honor position attribute, i.e. allow first-in-field search. To
+enable this, "firstinfield 1" must be given for an index in
+default.idx. Enabled in tab/default.idx for w. At this stage
+first-in field is only supported for phrase searches (including
+simple words).
+
Common stream reader interface for record filters (struct ZebraRecStream).
Debian package fix: packages idzebra-2.0 + libidzebra-2.0-modules did
-/* $Id: zebramap.h,v 1.19 2006-08-15 14:28:32 adam Exp $
+/* $Id: zebramap.h,v 1.20 2006-09-08 14:40:51 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int zebra_maps_is_positioned (ZebraMaps zms, unsigned reg_id);
YAZ_EXPORT
+int zebra_maps_is_first_in_field (ZebraMaps zms, unsigned reg_id);
+
+YAZ_EXPORT
WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list,
const char *input_str, int input_len);
-/* $Id: extract.c,v 1.228 2006-08-22 13:39:27 adam Exp $
+/* $Id: extract.c,v 1.229 2006-09-08 14:40:52 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
};
static int log_level = 0;
-static int log_level_initialized = 1;
+static int log_level_initialized = 0;
static void zebra_init_log_level()
{
}
}
-static void extract_add_incomplete_field (RecWord *p)
+static void extract_add_incomplete_field(RecWord *p)
{
ZebraHandle zh = p->extractCtrl->handle;
const char *b = p->term_buf;
if (remain > 0)
map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
+ if (map)
+ {
+ if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ {
+ /* first in field marker */
+ extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
+ p->seqno++;
+ }
+ }
while (map)
{
char buf[IT_MAX_WORD+1];
{
ZebraHandle zh = p->extractCtrl->handle;
WRBUF wrbuf;
+
if (log_level)
+ {
yaz_log(log_level, "extract_token_add "
"type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s",
p->index_type, p->index_name,
p->seqno, p->term_len, p->term_buf);
+ }
if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0,
p->term_buf, p->term_len)))
{
-/* $Id: index.h,v 1.174 2006-08-22 13:39:27 adam Exp $
+/* $Id: index.h,v 1.175 2006-09-08 14:40:52 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
ZEBRA_RES zebra_update_from_path(ZebraHandle zh, const char *path);
ZEBRA_RES zebra_delete_from_path(ZebraHandle zh, const char *path);
+#define FIRST_IN_FIELD_STR "\001^"
+#define FIRST_IN_FIELD_LEN 2
+
YAZ_END_CDECL
#endif
-/* $Id: zrpn.c,v 1.227 2006-08-31 08:35:48 adam Exp $
+/* $Id: zrpn.c,v 1.228 2006-09-08 14:40:53 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
return ZEBRA_OK;
}
+static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ oid_value attributeSet,
+ int reg_type,
+ int num_bases, char **basenames,
+ NMEM rset_nmem,
+ RSET *rset,
+ struct rset_key_control *kc)
+{
+ RSET *f_set;
+ int base_no;
+ int position_value;
+ int num_sets = 0;
+ AttrType position;
+
+ attr_init_APT(&position, zapt, 3);
+ position_value = attr_find(&position, NULL);
+ switch(position_value)
+ {
+ case 3:
+ case -1:
+ return ZEBRA_OK;
+ case 1:
+ case 2:
+ break;
+ default:
+ zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+ position_value);
+ return ZEBRA_FAIL;
+ }
+
+ if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
+ {
+ zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+ position_value);
+ return ZEBRA_FAIL;
+ }
+
+ if (!zh->reg->isamb)
+ {
+ zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+ position_value);
+ return ZEBRA_FAIL;
+ }
+ f_set = xmalloc(sizeof(RSET) * num_bases);
+ for (base_no = 0; base_no < num_bases; base_no++)
+ {
+ int ord = -1;
+ char ord_buf[32];
+ char term_dict[100];
+ int ord_len;
+ char *val;
+ ISAM_P isam_p;
+
+ if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+ {
+ zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
+ basenames[base_no]);
+ return ZEBRA_FAIL;
+ }
+
+ if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
+ attributeSet, &ord) != ZEBRA_OK)
+ continue;
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ memcpy(term_dict, ord_buf, ord_len);
+ strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
+ val = dict_lookup(zh->reg->dict, term_dict);
+ if (!val)
+ continue;
+ assert(*val == sizeof(ISAM_P));
+ memcpy(&isam_p, val+1, sizeof(isam_p));
+
+ f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
+ zh->reg->isamb, isam_p, 0);
+
+ }
+ if (num_sets)
+ {
+ *rset = rset_create_or(rset_nmem, kc, kc->scope,
+ 0 /* termid */, num_sets, f_set);
+ }
+ xfree(f_set);
+ return ZEBRA_OK;
+}
+
static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
num_bases, basenames,
rset_nmem,
&result_sets, &num_result_sets, kc);
+
if (res != ZEBRA_OK)
return res;
+
+ if (num_result_sets > 0)
+ {
+ RSET first_set = 0;
+ res = rpn_search_APT_position(zh, zapt, attributeSet,
+ reg_type,
+ num_bases, basenames,
+ rset_nmem, &first_set,
+ kc);
+ if (res != ZEBRA_OK)
+ return res;
+ if (first_set)
+ {
+ RSET *nsets = nmem_malloc(stream,
+ sizeof(RSET) * (num_result_sets+1));
+ nsets[0] = first_set;
+ memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
+ result_sets = nsets;
+ num_result_sets++;
+ }
+ }
if (num_result_sets == 0)
*rset = rset_create_null(rset_nmem, kc, 0);
else if (num_result_sets == 1)
else
idx = - pos - 1;
+ /* skip special terms.. of no interest */
+ if (name[len_prefix] < 4)
+ return 1;
+
if (idx < 0)
return 0;
scan_info->list[idx].term = (char *)
-/* $Id: rsmultiandor.c,v 1.25 2006-08-16 13:14:55 adam Exp $
+/* $Id: rsmultiandor.c,v 1.26 2006-09-08 14:40:55 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
*term = rset->term;
else
*term = it->term;
- assert(*term);
}
(mrfd->hits)++;
rdres = rset_read(it->fd, it->buf, &it->term);
# Zebra indexes as referred to from the *.abs-files.
-# $Id: default.idx,v 1.13 2006-06-22 09:48:09 adam Exp $
+# $Id: default.idx,v 1.14 2006-09-08 14:40:56 adam Exp $
#
# Traditional word index
completeness 0
position 1
alwaysmatches 1
+firstinfield 1
charmap string.chr
# Phrase index
-/* $Id: t9.c,v 1.11 2006-08-14 10:40:22 adam Exp $
+/* $Id: t9.c,v 1.12 2006-09-08 14:40:57 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
YAZ_CHECK(tl_init_data(zh, recs));
YAZ_CHECK(tl_ranking_query(zh, "@attr 1=4 @attr 2=102 the",
- 3, "first title", 1000 ));
+ 3, "first title", 936 ));
YAZ_CHECK(tl_ranking_query(zh, "@attr 1=62 @attr 2=102 foo",
3, "second title", 850 ));
- /* get the record with the most significant hit, that is the 'bar' */
- /* as that is the rarest of my search words */
- YAZ_CHECK(tl_ranking_query(zh, "@attr 1=1016 @attr 2=102 @or @or the foo bar",
- 3, "third title", 813 ));
-
YAZ_CHECK(tl_close_down(zh, zs));
}
-/* $Id: t1.c,v 1.10 2006-08-22 08:11:32 adam Exp $
+/* $Id: t1.c,v 1.11 2006-09-08 14:40:58 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
YAZ_CHECK(tl_query(zh, "@attr 1=leader 00366", 2));
YAZ_CHECK(tl_query(zh, "@attr 1=leader2 nam", 2));
+ YAZ_CHECK(tl_query(zh, "@attr 1=1003 jack", 2));
+ YAZ_CHECK(tl_query(zh, "@attr 1=1003 jack", 2));
+ YAZ_CHECK(tl_query(zh, "@attr 1=1003 collins", 2));
+ YAZ_CHECK(tl_query(zh, "@attr 1=1003 @attr 3=1 collins", 0));
+ YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 3=1 program", 0));
+ YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 3=1 to", 0));
YAZ_CHECK(tl_close_down(zh, zs));
}
-/* $Id: zebramap.c,v 1.52 2006-08-15 14:28:35 adam Exp $
+/* $Id: zebramap.c,v 1.53 2006-09-08 14:41:00 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int completeness;
int positioned;
int alwaysmatches;
+ int first_in_field;
int type;
union {
struct {
(*zm)->completeness = 0;
(*zm)->positioned = 1;
(*zm)->alwaysmatches = 0;
+ (*zm)->first_in_field = 0;
zms->no_maps++;
}
else if (!yaz_matchstr(argv[0], "sort"))
(*zm)->completeness = 0;
(*zm)->positioned = 0;
(*zm)->alwaysmatches = 0;
+ (*zm)->first_in_field = 0;
zms->no_maps++;
}
else if (!zm)
{
(*zm)->alwaysmatches = atoi(argv[1]);
}
+ else if (!yaz_matchstr(argv[0], "firstinfield") && argc == 2)
+ {
+ (*zm)->first_in_field = atoi(argv[1]);
+ }
else if (!yaz_matchstr(argv[0], "entrysize") && argc == 2)
{
if ((*zm)->type == ZEBRA_MAP_TYPE_SORT)
return 0;
}
+int zebra_maps_is_first_in_field(ZebraMaps zms, unsigned reg_id)
+{
+ struct zebra_map *zm = zebra_map_get(zms, reg_id);
+ if (zm)
+ return zm->first_in_field;
+ return 0;
+}
+
int zebra_maps_sort(ZebraMaps zms, Z_SortAttributes *sortAttributes,
int *numerical)
{