1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <yaz/wrbuf.h>
25 #include <idzebra/snippet.h>
27 struct zebra_snippets {
29 zebra_snippet_word *front;
30 zebra_snippet_word *tail;
33 zebra_snippets *zebra_snippets_create(void)
35 NMEM nmem = nmem_create();
36 zebra_snippets *l = nmem_malloc(nmem, sizeof(*l));
38 l->front = l->tail = 0;
42 void zebra_snippets_destroy(zebra_snippets *l)
45 nmem_destroy(l->nmem);
48 void zebra_snippets_append(zebra_snippets *l,
49 zint seqno, int ws, int ord, const char *term)
51 zebra_snippets_append_match(l, seqno, ws, ord, term, strlen(term), 0);
54 void zebra_snippets_appendn(zebra_snippets *l,
55 zint seqno, int ws, int ord, const char *term,
58 zebra_snippets_append_match(l, seqno, ws, ord, term, term_len, 0);
62 void zebra_snippets_append_match(zebra_snippets *l,
63 zint seqno, int ws, int ord,
64 const char *term, size_t term_len,
67 struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w));
84 w->term = nmem_malloc(l->nmem, term_len+1);
85 memcpy(w->term, term, term_len);
86 w->term[term_len] = '\0';
91 zebra_snippet_word *zebra_snippets_list(zebra_snippets *l)
96 const zebra_snippet_word *zebra_snippets_constlist(const zebra_snippets *l)
101 void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
103 zebra_snippet_word *w;
104 for (w = l->front; w; w = w->next)
106 WRBUF wr_term = wrbuf_alloc();
107 wrbuf_puts_escaped(wr_term, w->term);
110 yaz_log(log_level, "term='%s'%s mark=%d seqno=" ZINT_FORMAT " ord=%d",
112 (w->match && !w->ws ? "*" : ""), w->mark,
114 wrbuf_destroy(wr_term);
118 zebra_snippets *zebra_snippets_window(const zebra_snippets *doc,
119 const zebra_snippets *hit,
123 zebra_snippets *result = zebra_snippets_create();
124 if (window_size == 0)
125 window_size = 1000000;
130 zint first_seq_no_best_window = 0;
131 zint last_seq_no_best_window = 0;
132 int number_best_window = 0;
133 const zebra_snippet_word *hit_w, *doc_w;
134 int min_ord = 0; /* not set yet */
136 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
137 if (hit_w->ord > ord &&
138 (min_ord == 0 || hit_w->ord < min_ord))
140 min_ord = hit_w->ord;
146 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
148 if (hit_w->ord == ord)
150 const zebra_snippet_word *look_w = hit_w;
152 zint seq_no_last = 0;
153 while (look_w && look_w->seqno < hit_w->seqno + window_size)
155 if (look_w->ord == ord)
157 seq_no_last = look_w->seqno;
160 look_w = look_w->next;
162 if (number_this > number_best_window)
164 number_best_window = number_this;
165 first_seq_no_best_window = hit_w->seqno;
166 last_seq_no_best_window = seq_no_last;
170 yaz_log(YLOG_DEBUG, "ord=%d", ord);
171 yaz_log(YLOG_DEBUG, "first_seq_no_best_window=" ZINT_FORMAT,
172 first_seq_no_best_window);
173 yaz_log(YLOG_DEBUG, "last_seq_no_best_window=" ZINT_FORMAT,
174 last_seq_no_best_window);
175 yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window);
177 window_start = (first_seq_no_best_window + last_seq_no_best_window -
179 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
180 if (doc_w->ord == ord
181 && doc_w->seqno >= window_start
182 && doc_w->seqno < window_start + window_size)
185 for (hit_w = zebra_snippets_constlist(hit); hit_w;
188 if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno)
195 zebra_snippets_append_match(result, doc_w->seqno,
198 strlen(doc_w->term), match);
204 static void zebra_snippets_clear(zebra_snippets *sn)
206 zebra_snippet_word *w;
208 for (w = zebra_snippets_list(sn); w; w = w->next)
215 const struct zebra_snippet_word *zebra_snippets_lookup(
216 const zebra_snippets *doc, const zebra_snippets *hit)
218 const zebra_snippet_word *hit_w;
219 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
221 const zebra_snippet_word *doc_w;
222 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
224 if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
234 void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit,
235 int before, int after)
239 zebra_snippets_clear(doc);
242 const zebra_snippet_word *hit_w;
243 zebra_snippet_word *doc_w;
244 int min_ord = 0; /* not set yet */
246 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
247 if (hit_w->ord > ord &&
248 (min_ord == 0 || hit_w->ord < min_ord))
250 min_ord = hit_w->ord;
256 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
258 if (hit_w->ord == ord)
260 for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next)
262 if (doc_w->ord == ord && doc_w->seqno == hit_w->seqno
271 /* mark following terms */
274 zebra_snippet_word *w = doc_w->next;
277 && hit_w->seqno - before < w->seqno
278 && hit_w->seqno + after > w->seqno)
286 /* mark preceding terms */
289 zebra_snippet_word *w = doc_w->prev;
292 && hit_w->seqno - before < w->seqno
293 && hit_w->seqno + after > w->seqno)
310 * c-file-style: "Stroustrup"
311 * indent-tabs-mode: nil
313 * vim: shiftwidth=4 tabstop=8 expandtab