1 /* $Id: retrieve.c,v 1.70 2007-05-08 12:50:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <yaz/diagbib1.h>
38 #include <yaz/oid_db.h>
40 #define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
42 static int zebra_create_record_stream(ZebraHandle zh,
44 struct ZebraRecStream *stream)
46 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
48 if ((*rec)->size[recInfo_storeData] > 0)
49 zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50 (*rec)->size[recInfo_storeData]);
56 if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57 strcpy(full_rep, zh->path_reg);
58 strcat(full_rep, "/");
59 strcat(full_rep, (*rec)->info[recInfo_filename]);
62 strcpy(full_rep, (*rec)->info[recInfo_filename]);
64 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65 yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
68 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
70 zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
77 static int parse_zebra_elem(const char *elem,
78 const char **index, size_t *index_len,
79 const char **type, size_t *type_len)
90 /* verify that '::' is in the beginning of *elem
91 and something more follows */
93 || !(elem +1) || ':' != *(elem +1)
94 || !(elem +2) || '\0' == *(elem +2))
97 /* pick out info from string after '::' */
99 cp = strchr(elem, ':');
101 if (!cp) /* index, no colon, no type */
104 *index_len = strlen(elem);
106 else if (cp[1] == '\0') /* colon, but no following type */
110 else /* index, colon and type */
113 *index_len = cp - elem;
115 *type_len = strlen(cp+1);
122 int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
123 const char *elemsetname,
124 const Odr_oid *input_format,
125 const Odr_oid **output_format,
126 char **rec_bufp, int *rec_lenp)
128 const char *retrieval_index;
129 size_t retrieval_index_len;
130 const char *retrieval_type;
131 size_t retrieval_type_len;
132 char retrieval_index_cstr[256];
135 /* only accept XML and SUTRS requests */
136 if (oid_oidcmp(input_format, yaz_oid_recsyn_xml)
137 && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
139 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
142 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
145 if (!parse_zebra_elem(elemsetname,
146 &retrieval_index, &retrieval_index_len,
147 &retrieval_type, &retrieval_type_len))
149 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
152 if (retrieval_type_len == 0)
153 return -1; /* must have a register type specified */
154 if (!retrieval_index_len ||
155 retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
157 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
160 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
161 retrieval_index_cstr[retrieval_index_len] = '\0';
163 ord = zebraExplain_lookup_attr_str(zh->reg->zei,
164 zinfo_index_category_sort,
166 retrieval_index_cstr);
168 return -1; /* is not a sort index */
171 char dst_buf[IT_MAX_WORD];
172 char str[IT_MAX_WORD];
175 const char *string_index = 0;
176 WRBUF wrbuf = wrbuf_alloc();
178 zebra_sort_sysno(zh->reg->sort_index, sysno);
179 zebra_sort_type(zh->reg->sort_index, ord);
180 zebra_sort_read(zh->reg->sort_index, str);
182 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
184 zebra_term_untrans(zh, index_type, dst_buf, str);
187 if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
189 *output_format = yaz_oid_recsyn_xml;
190 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
191 " sysno=\"" ZINT_FORMAT "\""
192 " set=\"zebra::index%s/\">\n",
195 wrbuf_printf(wrbuf, " <index name=\"%s\"",
197 wrbuf_printf(wrbuf, " type=\"%c\">", index_type);
198 wrbuf_xmlputs(wrbuf, dst_buf);
199 wrbuf_printf(wrbuf, "</index>\n");
200 wrbuf_printf(wrbuf, "</record>\n");
202 else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
204 *output_format = yaz_oid_recsyn_sutrs;
206 wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type,
209 *rec_lenp = wrbuf_len(wrbuf);
210 *rec_bufp = odr_malloc(odr, *rec_lenp);
211 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
212 wrbuf_destroy(wrbuf);
217 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
219 const char *elemsetname,
220 const Odr_oid *input_format,
221 const Odr_oid **output_format,
222 char **rec_bufp, int *rec_lenp)
224 const char *retrieval_index;
225 size_t retrieval_index_len;
226 const char *retrieval_type;
227 size_t retrieval_type_len;
228 zebra_rec_keys_t keys;
231 /* set output variables before processing possible error states */
234 /* only accept XML and SUTRS requests */
235 if (oid_oidcmp(input_format, yaz_oid_recsyn_xml)
236 && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
238 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
241 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
244 if (!parse_zebra_elem(elemsetname,
245 &retrieval_index, &retrieval_index_len,
246 &retrieval_type, &retrieval_type_len))
247 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
249 if (retrieval_type_len != 0 && retrieval_type_len != 1)
251 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
254 if (retrieval_index_len)
256 char retrieval_index_cstr[256];
258 if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
260 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
261 retrieval_index_cstr[retrieval_index_len] = '\0';
263 if (zebraExplain_lookup_attr_str(zh->reg->zei,
264 zinfo_index_category_index,
265 (retrieval_type_len == 0 ? -1 :
267 retrieval_index_cstr) == -1)
268 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
272 keys = zebra_rec_keys_open();
273 zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
274 rec->size[recInfo_delKeys], 0);
276 if (!zebra_rec_keys_rewind(keys))
278 ret_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
284 struct it_key key_in;
285 WRBUF wrbuf = wrbuf_alloc();
287 if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
289 *output_format = input_format;
290 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
291 " sysno=\"" ZINT_FORMAT "\""
292 " set=\"zebra::index%s/\">\n",
295 else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
296 *output_format = input_format;
298 while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
301 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
304 const char *string_index = 0;
305 size_t string_index_len;
306 char dst_buf[IT_MAX_WORD];
308 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
310 string_index_len = strlen(string_index);
312 /* process only if index is not defined,
313 or if defined and matching */
314 if (retrieval_index == 0
315 || (string_index_len == retrieval_index_len
316 && !memcmp(string_index, retrieval_index,
319 /* process only if type is not defined, or is matching */
320 if (retrieval_type == 0
321 || (retrieval_type_len == 1
322 && retrieval_type[0] == index_type))
324 zebra_term_untrans(zh, index_type, dst_buf, str);
327 if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
329 wrbuf_printf(wrbuf, " <index name=\"%s\"",
332 wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
334 wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">",
335 key_in.mem[key_in.len -1]);
337 wrbuf_xmlputs(wrbuf, dst_buf);
338 wrbuf_printf(wrbuf, "</index>\n");
342 wrbuf_printf(wrbuf, "%s ", string_index);
344 wrbuf_printf(wrbuf, "%c", index_type);
346 for (i = 1; i < key_in.len; i++)
347 wrbuf_printf(wrbuf, " " ZINT_FORMAT,
350 /* zebra_term_untrans(zh, index_type, dst_buf, str); */
351 wrbuf_printf(wrbuf, " %s", dst_buf);
353 wrbuf_printf(wrbuf, "\n");
360 if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
361 wrbuf_printf(wrbuf, "</record>\n");
362 *rec_lenp = wrbuf_len(wrbuf);
363 *rec_bufp = odr_malloc(odr, *rec_lenp);
364 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
365 wrbuf_destroy(wrbuf);
367 zebra_rec_keys_close(keys);
372 static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
377 wrbuf_printf(wrbuf, " %s=\"", name);
378 wrbuf_xmlputs(wrbuf, value);
379 wrbuf_printf(wrbuf, "\"");
383 static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
386 wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
389 static void retrieve_puts_str(WRBUF wrbuf, const char *name,
393 wrbuf_printf(wrbuf, "%s %s\n", name, value);
396 static void retrieve_puts_int(WRBUF wrbuf, const char *name,
399 wrbuf_printf(wrbuf, "%s %i\n", name, value);
402 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
403 const char *elemsetname,
404 const Odr_oid *input_format,
405 const Odr_oid **output_format,
406 char **rec_bufp, int *rec_lenp)
410 /* set output variables before processing possible error states */
415 /* processing zebra::meta::sysno elemset without fetching binary data */
416 if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
419 WRBUF wrbuf = wrbuf_alloc();
420 if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
422 wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
423 *output_format = input_format;
425 else if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
427 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
428 " sysno=\"" ZINT_FORMAT "\"/>\n",
430 *output_format = input_format;
432 *rec_lenp = wrbuf_len(wrbuf);
434 *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
436 ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
437 wrbuf_destroy(wrbuf);
441 /* processing special elementsetname zebra::index:: for sort elements */
442 if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
444 int ret = zebra_special_sort_fetch(zh, sysno, odr,
446 input_format, output_format,
450 /* not a sort index so we continue to get the full record */
454 /* fetching binary record up for all other display elementsets */
455 rec = rec_get(zh->reg->records, sysno);
458 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
459 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
462 /* processing special elementsetnames zebra::data */
463 if (elemsetname && 0 == strcmp(elemsetname, "data"))
465 struct ZebraRecStream stream;
466 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
467 zebra_create_record_stream(zh, &rec, &stream);
468 *output_format = input_format;
469 *rec_lenp = recordAttr->recordSize;
470 *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
471 stream.readf(&stream, *rec_bufp, *rec_lenp);
472 stream.destroy(&stream);
477 /* only accept XML and SUTRS requests from now */
478 if (oid_oidcmp(input_format, yaz_oid_recsyn_xml)
479 && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
481 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
483 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
487 /* processing special elementsetnames zebra::meta:: */
488 if (elemsetname && 0 == strcmp(elemsetname, "meta"))
491 WRBUF wrbuf = wrbuf_alloc();
492 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
494 if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
496 *output_format = input_format;
498 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
499 " sysno=\"" ZINT_FORMAT "\"", sysno);
500 retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
501 retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
502 retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
504 retrieve_puts_attr_int(wrbuf, "score", score);
507 " rank=\"" ZINT_FORMAT "\""
509 " set=\"zebra::%s\"/>\n",
510 recordAttr->staticrank,
511 recordAttr->recordSize,
514 else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
516 *output_format = input_format;
517 wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
518 retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
519 retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
520 retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
522 retrieve_puts_int(wrbuf, "score", score);
525 "rank " ZINT_FORMAT "\n"
528 recordAttr->staticrank,
529 recordAttr->recordSize,
532 *rec_lenp = wrbuf_len(wrbuf);
534 *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
536 ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
538 wrbuf_destroy(wrbuf);
543 /* processing special elementsetnames zebra::index:: */
544 if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
546 int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
548 input_format, output_format,
557 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
561 int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
562 zebra_snippets *hit_snippet, ODR odr,
563 const Odr_oid *input_format, Z_RecordComposition *comp,
564 const Odr_oid **output_format,
565 char **rec_bufp, int *rec_lenp, char **basenamep,
569 char *fname, *file_type, *basename;
570 const char *elemsetname;
571 struct ZebraRecStream stream;
572 RecordAttr *recordAttr;
578 elemsetname = yaz_get_esn(comp);
580 /* processing zebra special elementset names of form 'zebra:: */
581 if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
582 return zebra_special_fetch(zh, sysno, score, odr,
584 input_format, output_format,
588 /* processing all other element set names */
589 rec = rec_get(zh->reg->records, sysno);
592 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
594 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
598 recordAttr = rec_init_attr(zh->reg->zei, rec);
600 file_type = rec->info[recInfo_fileType];
601 fname = rec->info[recInfo_filename];
602 basename = rec->info[recInfo_databaseName];
603 *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
604 strcpy (*basenamep, basename);
606 yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
609 return_code = zebra_create_record_stream(zh, &rec, &stream);
613 zebra_snippets *snippet;
614 zebra_rec_keys_t reckeys = zebra_rec_keys_open();
616 struct recRetrieveCtrl retrieveCtrl;
618 retrieveCtrl.stream = &stream;
619 retrieveCtrl.fname = fname;
620 retrieveCtrl.localno = sysno;
621 retrieveCtrl.staticrank = recordAttr->staticrank;
622 retrieveCtrl.score = score;
623 retrieveCtrl.recordSize = recordAttr->recordSize;
624 retrieveCtrl.odr = odr;
625 retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
626 retrieveCtrl.comp = comp;
627 retrieveCtrl.encoding = zh->record_encoding;
628 retrieveCtrl.diagnostic = 0;
629 retrieveCtrl.addinfo = 0;
630 retrieveCtrl.dh = zh->reg->dh;
631 retrieveCtrl.res = zh->res;
632 retrieveCtrl.rec_buf = 0;
633 retrieveCtrl.rec_len = -1;
634 retrieveCtrl.hit_snippet = hit_snippet;
635 retrieveCtrl.doc_snippet = zebra_snippets_create();
637 zebra_rec_keys_set_buf(reckeys,
638 rec->info[recInfo_delKeys],
639 rec->size[recInfo_delKeys],
641 zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
642 zebra_rec_keys_close(reckeys);
645 /* for debugging purposes */
646 yaz_log(YLOG_LOG, "DOC SNIPPET:");
647 zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
648 yaz_log(YLOG_LOG, "HIT SNIPPET:");
649 zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
651 snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
652 retrieveCtrl.hit_snippet,
655 /* for debugging purposes */
656 yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
657 zebra_snippets_log(snippet, YLOG_LOG);
660 if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
661 file_type, &clientData)))
663 char addinfo_str[100];
665 sprintf(addinfo_str, "Could not handle record type %.40s",
668 *addinfo = odr_strdup(odr, addinfo_str);
669 return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
673 (*rt->retrieve)(clientData, &retrieveCtrl);
674 return_code = retrieveCtrl.diagnostic;
676 *output_format = retrieveCtrl.output_format;
677 *rec_bufp = (char *) retrieveCtrl.rec_buf;
678 *rec_lenp = retrieveCtrl.rec_len;
679 *addinfo = retrieveCtrl.addinfo;
682 zebra_snippets_destroy(snippet);
683 zebra_snippets_destroy(retrieveCtrl.doc_snippet);
685 stream.destroy(&stream);
695 * indent-tabs-mode: nil
697 * vim: shiftwidth=4 tabstop=8 expandtab