X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=index%2Fzrpn.c;h=1b74b9a1be64fdd8462a0fceda83029982bfc149;hb=7a2d0f25682890bde5d8f2883d6020df2ed0b365;hp=f3ef7a8afade7b2e8209bf42883a170efa8a2c7c;hpb=6cb032fd2ece4742e3516e0941507143f677e5cd;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index f3ef7a8..1b74b9a 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.191 2005-05-11 12:39:37 adam Exp $ +/* $Id: zrpn.c,v 1.197 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -50,7 +50,6 @@ typedef struct Z_AttributesPlusTerm *zapt; } AttrType; - static int log_level_set = 0; static int log_level_rpn = 0; @@ -173,8 +172,8 @@ struct grep_info { ZebraSet termset; }; -static void term_untrans(ZebraHandle zh, int reg_type, - char *dst, const char *src) +void zebra_term_untrans(ZebraHandle zh, int reg_type, + char *dst, const char *src) { int len = 0; while (*src) @@ -238,7 +237,7 @@ static void add_isam_p(const char *name, const char *info, int su_code = 0; int len = key_SU_decode (&su_code, name); - term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); + zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp); zebraExplain_lookup_ord (p->zh->reg->zei, su_code, &db, &set, &use); @@ -987,7 +986,8 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, - char *term_dst, int xpath_use); + char *term_dst, int xpath_use, + struct ord_list **ol); static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, @@ -1003,11 +1003,12 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, struct rset_key_control *kc) { ZEBRA_RES res; + struct ord_list *ol; *rset = 0; grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use); + term_dst, xpath_use, &ol); if (res != ZEBRA_OK) return res; if (!*term_sub) /* no more terms ? */ @@ -1017,7 +1018,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope); + kc, kc->scope, ol, reg_type); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1036,7 +1037,8 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, - char *term_dst, int xpath_use) + char *term_dst, int xpath_use, + struct ord_list **ol) { char term_dict[2*IT_MAX_WORD+4000]; int j, r, base_no; @@ -1054,6 +1056,9 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int errCode = 0; /* err code (if any is not OK) */ char *errString = 0; /* addinfo */ + + *ol = ord_list_create(stream); + rpn_char_map_prepare (zh->reg, reg_type, &rcmi); attr_init(&use, zapt, 1); use_value = attr_find_ex(&use, &curAttributeSet, &use_string); @@ -1122,6 +1127,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, term_dict[prefix_len++] = ord_buf[i]; } attp.local_attributes = 0; /* no more attributes */ + *ol = ord_list_append(stream, *ol, ord); } else { @@ -1167,6 +1173,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, local_attr->local); if (ord < 0) continue; + *ol = ord_list_append(stream, *ol, ord); if (prefix_len) term_dict[prefix_len++] = '|'; else @@ -1523,7 +1530,23 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, return ZEBRA_OK; } - +/** + \brief Create result set(s) for list of terms + \param zh Zebra Handle + \param termz_org term as used in query but converted to UTF-8 + \param attributeSet default attribute set + \param stream memory for result + \param reg_type register type ('w', 'p',..) + \param complete_flag whether it's phrases or not + \param rank_type term flags for ranking + \param xpath_use use attribute for X-Path (-1 for no X-path) + \param num_bases number of databases + \param basenames array of databases + \param rset_mem memory for result sets + \param result_sets output result set for each term in list (output) + \param number number of output result sets + \param kc rset key control to be used for created result sets +*/ static ZEBRA_RES term_list_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, @@ -1608,7 +1631,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, if (res != ZEBRA_OK) return res; if (num_result_sets == 0) - *rset = rsnull_create (rset_nmem, kc); + *rset = rsnull_create (rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1646,11 +1669,11 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, if (res != ZEBRA_OK) return res; if (num_result_sets == 0) - *rset = rsnull_create (rset_nmem, kc); + *rset = rsnull_create (rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else - *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, + *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, num_result_sets, result_sets); if (!*rset) return ZEBRA_FAIL; @@ -1683,7 +1706,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, if (res != ZEBRA_OK) return res; if (num_result_sets == 0) - *rset = rsnull_create (rset_nmem, kc); + *rset = rsnull_create (rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1942,7 +1965,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope); + kc, kc->scope, 0, reg_type); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1956,7 +1979,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, return ZEBRA_FAIL; } if (num_result_sets == 0) - *rset = rsnull_create(rset_nmem, kc); + *rset = rsnull_create(rset_nmem, kc, 0); if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1998,6 +2021,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, oid_value attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, + NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { @@ -2066,7 +2090,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sks->which = Z_SortKeySpec_null; sks->u.null = odr_nullval (); sort_sequence->specs[i] = sks; - *rset = rsnull_create (NULL, kc); + *rset = rsnull_create (rset_nmem, kc, 0); return ZEBRA_OK; } @@ -2106,10 +2130,10 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, const char *flags = "void"; if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) - return rsnull_create(rset_nmem, kc); + return rsnull_create(rset_nmem, kc, 0); if (ord < 0) - return rsnull_create(rset_nmem, kc); + return rsnull_create(rset_nmem, kc, 0); if (prefix_len) term_dict[prefix_len++] = '|'; else @@ -2135,7 +2159,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, rset = rset_trunc(zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), flags, 1, term_type,rset_nmem, - kc, kc->scope); + kc, kc->scope, 0, reg_type); grep_info_delete(&grep_info); return rset; } @@ -2327,15 +2351,24 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (sort_flag) return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, - rank_type, rset, kc); + rank_type, rset_nmem, rset, kc); + /* consider if an X-Path query is used */ xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream); if (xpath_len >= 0) { - xpath_use = 1016; - if (xpath[xpath_len-1].part[0] == '@') - xpath_use = 1015; - } - + xpath_use = 1016; /* searching for element by default */ + if (xpath[xpath_len-1].part[0] == '@') + xpath_use = 1015; /* last step an attribute .. */ + } + + /* search using one of the various search type strategies + termz is our UTF-8 search term + attributeSet is top-level default attribute set + stream is ODR for search + reg_id is the register type + complete_flag is 1 for complete subfield, 0 for incomplete + xpath_use is use-attribute to be used for X-Path search, 0 for none + */ if (!strcmp(search_type, "phrase")) { res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, @@ -2506,7 +2539,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, break; case Z_Operator_or: rset = rsmulti_or_create(rset_nmem, kc, - kc->scope, + kc->scope, 0, /* termid */ *num_result_sets, *result_sets); break; case Z_Operator_and_not: @@ -2633,13 +2666,13 @@ static int scan_handle (char *name, const char *info, int pos, void *client) return 0; } -static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type, - char **dst, const char *src) +void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type, + char **dst, const char *src) { char term_src[IT_MAX_WORD]; char term_dst[IT_MAX_WORD]; - term_untrans (zh, reg_type, term_src, src); + zebra_term_untrans (zh, reg_type, term_src, src); if (zh->iconv_from_utf8 != 0) { @@ -2925,12 +2958,12 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (lo >= 0) { /* get result set for first term */ - scan_term_untrans(zh, stream->mem, reg_id, - &glist[lo].term, mterm); + zebra_term_untrans_iconv(zh, stream->mem, reg_id, + &glist[lo].term, mterm); rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope); + kc, kc->scope, 0, reg_id); } ptr[j0]++; /* move index for this set .. */ /* get result set for remaining scan terms */ @@ -2951,9 +2984,10 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which,rset_nmem, - kc, kc->scope); + kc, kc->scope, 0, reg_id); rset = rsmulti_or_create(rset_nmem, kc, - 2, kc->scope, rsets); + kc->scope, 0 /* termid */, + 2, rsets); } ptr[j]++; } @@ -2968,7 +3002,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rsets[1] = rset_dup(limit_set); rset = rsmulti_and_create(rset_nmem, kc, - kc->scope, 2, rsets); + kc->scope, + 2, rsets); } /* count it */ count_set(rset, &glist[lo].occurrences); @@ -3012,14 +3047,14 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (j0 == -1) break; - scan_term_untrans (zh, stream->mem, reg_id, - &glist[lo].term, mterm); + zebra_term_untrans_iconv(zh, stream->mem, reg_id, + &glist[lo].term, mterm); rset = rset_trunc (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), - NULL, 0, zapt->term->which,rset_nmem, - kc, kc->scope); + NULL, 0, zapt->term->which, rset_nmem, + kc, kc->scope, 0, reg_id); ptr[j0]++; @@ -3038,9 +3073,9 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope); + kc, kc->scope, 0, reg_id); rset = rsmulti_or_create(rset_nmem, kc, - 2, kc->scope, rsets); + kc->scope, 0 /* termid */, 2, rsets); ptr[j]++; }