2 * Copyright (C) 1994-1995, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.6 1995-10-10 14:00:04 adam
8 * Function rset_open changed its wflag parameter to general flags.
10 * Revision 1.5 1995/10/06 14:38:06 adam
11 * New result set method: r_score.
12 * Local no (sysno) and score is transferred to retrieveCtrl.
14 * Revision 1.4 1995/09/14 07:48:56 adam
15 * Other score calculation.
17 * Revision 1.3 1995/09/11 15:23:40 adam
18 * More work on relevance search.
20 * Revision 1.2 1995/09/11 13:09:41 adam
21 * More work on relevance feedback.
23 * Revision 1.1 1995/09/08 14:52:42 adam
24 * Work on relevance feedback.
37 static rset_control *r_create(const struct rset_control *sel, void *parms);
38 static RSFD r_open (rset_control *ct, int flag);
39 static void r_close (RSFD rfd);
40 static void r_delete (rset_control *ct);
41 static void r_rewind (RSFD rfd);
42 static int r_count (rset_control *ct);
43 static int r_read (RSFD rfd, void *buf);
44 static int r_write (RSFD rfd, const void *buf);
45 static int r_score (RSFD rfd, int *score);
47 static const rset_control control =
62 const rset_control *rset_kind_relevance = &control;
64 struct rset_rel_info {
68 int (*cmp)(const void *p1, const void *p2);
69 char *key_buf; /* key buffer */
70 float *score_buf; /* score buffer */
71 int *sort_idx; /* score sorted index */
72 int *sysno_idx; /* sysno sorted index (ring buffer) */
73 int sysno_idx_p; /* last sysno sort index */
74 struct rset_rel_rfd *rfd_list;
80 struct rset_rel_rfd *next;
81 struct rset_rel_info *info;
84 static void add_rec (struct rset_rel_info *info, double score, void *key)
88 logf (LOG_DEBUG, "add %f", score);
89 for (i = 0; i<info->no_rec; i++)
91 idx = info->sort_idx[i];
92 if (score <= info->score_buf[idx])
95 if (info->no_rec < info->max_rec)
96 { /* there is room for this entry */
97 for (j = info->no_rec; j > i; --j)
98 info->sort_idx[j] = info->sort_idx[j-1];
99 idx = info->sort_idx[j] = info->no_rec;
103 return; /* score too low */
106 idx = info->sort_idx[0]; /* remove this entry */
109 for (j = 0; j < i; ++j) /* make room */
110 info->sort_idx[j] = info->sort_idx[j+1];
111 info->sort_idx[j] = idx; /* allocate sort entry */
113 info->sysno_idx[info->sysno_idx_p] = idx;
114 if (++(info->sysno_idx_p) == info->max_rec)
115 info->sysno_idx_p = 0;
117 memcpy (info->key_buf + idx*info->key_size, key, info->key_size);
118 info->score_buf[idx] = score;
121 static void relevance (struct rset_rel_info *info, rset_relevance_parms *parms)
131 logf (LOG_DEBUG, "relevance");
132 isam_buf = xmalloc (parms->no_isam_positions * sizeof(*isam_buf));
133 isam_r = xmalloc (sizeof (*isam_r) * parms->no_isam_positions);
134 isam_pt = xmalloc (sizeof (*isam_pt) * parms->no_isam_positions);
135 isam_tmp_buf = xmalloc (info->key_size);
136 max_tf = xmalloc (sizeof (*max_tf) * parms->no_isam_positions);
137 wgt = xmalloc (sizeof (*wgt) * parms->no_isam_positions);
139 for (i = 0; i<parms->no_isam_positions; i++)
141 isam_buf[i] = xmalloc (info->key_size);
142 isam_pt[i] = is_position (parms->is, parms->isam_positions[i]);
143 max_tf [i] = is_numkeys (isam_pt[i]);
144 isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]);
145 logf (LOG_DEBUG, "max tf %d = %d", i, max_tf[i]);
152 /* find min with lowest sysno */
153 for (i = 0; i<parms->no_isam_positions; i++)
155 (min < 0 || (*parms->cmp)(isam_buf[i], isam_buf[min]) < 1))
159 memcpy (isam_tmp_buf, isam_buf[min], info->key_size);
160 /* calculate for all with those sysno */
161 for (i = 0; i<parms->no_isam_positions; i++)
166 r = (*parms->cmp)(isam_buf[i], isam_tmp_buf);
177 isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]);
178 } while (isam_r[i] &&
179 (*parms->cmp)(isam_buf[i], isam_tmp_buf) <= 1);
180 wgt[i] = 0.1+tf*0.9/max_tf[i];
183 /* calculate relevance value */
185 for (i = 0; i<parms->no_isam_positions; i++)
187 /* if value is in the top score, then save it - don't emit yet */
188 add_rec (info, score, isam_tmp_buf);
190 for (i = 0; i<parms->no_isam_positions; i++)
192 is_pt_free (isam_pt[i]);
196 xfree (isam_tmp_buf);
203 static rset_control *r_create (const struct rset_control *sel, void *parms)
206 rset_relevance_parms *r_parms = parms;
207 struct rset_rel_info *info;
209 newct = xmalloc(sizeof(*newct));
210 memcpy(newct, sel, sizeof(*sel));
211 newct->buf = xmalloc (sizeof(struct rset_rel_info));
214 info->key_size = r_parms->key_size;
215 assert (info->key_size > 1);
216 info->max_rec = r_parms->max_rec;
217 assert (info->max_rec > 1);
218 info->cmp = r_parms->cmp;
220 info->key_buf = xmalloc (info->key_size * info->max_rec);
221 info->score_buf = xmalloc (sizeof(*info->score_buf) * info->max_rec);
222 info->sort_idx = xmalloc (sizeof(*info->sort_idx) * info->max_rec);
223 info->sysno_idx = xmalloc (sizeof(*info->sysno_idx) * info->max_rec);
224 info->sysno_idx_p = 0;
226 info->rfd_list = NULL;
228 relevance (info, r_parms);
232 static RSFD r_open (rset_control *ct, int flag)
234 struct rset_rel_rfd *rfd;
235 struct rset_rel_info *info = ct->buf;
237 if (flag & RSETF_WRITE)
239 logf (LOG_FATAL, "relevance set type is read-only");
242 rfd = xmalloc (sizeof(*rfd));
244 rfd->next = info->rfd_list;
245 info->rfd_list = rfd;
246 rfd->position = info->no_rec;
251 static void r_close (RSFD rfd)
253 struct rset_rel_info *info = ((struct rset_rel_rfd*)rfd)->info;
254 struct rset_rel_rfd **rfdp;
256 for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
259 *rfdp = (*rfdp)->next;
263 logf (LOG_FATAL, "r_close but no rfd match!");
267 static void r_delete (rset_control *ct)
269 struct rset_rel_info *info = ct->buf;
271 assert (info->rfd_list == NULL);
272 xfree (info->key_buf);
273 xfree (info->score_buf);
274 xfree (info->sort_idx);
275 xfree (info->sysno_idx);
280 static void r_rewind (RSFD rfd)
282 struct rset_rel_rfd *p = rfd;
283 struct rset_rel_info *info = p->info;
285 p->position = info->no_rec;
288 static int r_count (rset_control *ct)
290 struct rset_rel_info *info = ct->buf;
295 static int r_read (RSFD rfd, void *buf)
297 struct rset_rel_rfd *p = rfd;
298 struct rset_rel_info *info = p->info;
300 if (p->position <= 0)
304 info->key_buf + info->key_size * info->sort_idx[p->position],
309 static int r_score (RSFD rfd, int *score)
311 struct rset_rel_rfd *p = rfd;
312 struct rset_rel_info *info = p->info;
316 *score = (int) (1000*info->score_buf[info->sort_idx[p->position]]);
320 static int r_write (RSFD rfd, const void *buf)
322 logf (LOG_FATAL, "relevance set type is read-only");