+#define MAX_SYSNOS_PER_RECORD 40
+
+#define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
+
+static int zebra_create_record_stream(ZebraHandle zh,
+ Record *rec,
+ struct ZebraRecStream *stream)
+{
+ RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
+
+ if ((*rec)->size[recInfo_storeData] > 0
+ || (*rec)->info[recInfo_filename] == 0)
+ zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
+ (*rec)->size[recInfo_storeData]);
+ else
+ {
+ char full_rep[1024];
+ int fd;
+
+ if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
+ strcpy(full_rep, zh->path_reg);
+ strcat(full_rep, "/");
+ strcat(full_rep, (*rec)->info[recInfo_filename]);
+ }
+ else
+ strcpy(full_rep, (*rec)->info[recInfo_filename]);
+
+ if ((fd = open(full_rep, O_BINARY|O_RDONLY)) == -1){
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
+ full_rep);
+ rec_free(rec);
+ return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ }
+ zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
+ }
+ return 0;
+}
+
+
+struct index_spec {
+ const char *index_name;
+ const char *index_type;
+ const char *extra;
+ struct index_spec *next;
+};
+
+
+struct index_spec *parse_index_spec(const char *elem, NMEM nmem,
+ int *error)
+{
+ struct index_spec *first = 0;
+ struct index_spec **last = &first;
+ const char *cp = elem;
+
+ *error = 0;
+ if (cp[0] == ':' && cp[1] == ':')
+ {
+
+ cp++; /* skip first ':' */
+
+ for (;;)
+ {
+ const char *cp0;
+ struct index_spec *spec = nmem_malloc(nmem, sizeof(*spec));
+ spec->index_type = 0;
+ spec->next = 0;
+ spec->extra = 0;
+
+ if (!first)
+ first = spec;
+ *last = spec;
+ last = &spec->next;
+
+ cp++; /* skip ',' or second ':' */
+ cp0 = cp;
+ while (*cp != ':' && *cp != '\0' && *cp != ',')
+ cp++;
+ spec->index_name = nmem_strdupn(nmem, cp0, cp - cp0);
+ if (*cp == ':') /* type as well */
+ {
+ cp++;
+ cp0 = cp;
+
+ while (*cp != '\0' && *cp != ',' && *cp != ':')
+ cp++;
+ spec->index_type = nmem_strdupn(nmem, cp0, cp - cp0);
+ }
+ if (*cp == ':') /* extra arguments */
+ {
+ cp++;
+ cp0 = cp;
+
+ while (*cp != '\0' && *cp != ',' && *cp != ':')
+ cp++;
+ spec->extra = nmem_strdupn(nmem, cp0, cp - cp0);
+ }
+ if (*cp != ',')
+ break;
+ }
+ }
+ if (*cp != '\0')
+ *error = 1;
+ return first;
+}
+
+static int parse_zebra_elem(const char *elem,
+ const char **index, size_t *index_len,
+ const char **type, size_t *type_len)
+{
+ *index = 0;
+ *index_len = 0;
+
+ *type = 0;
+ *type_len = 0;
+
+ if (elem && *elem)
+ {
+ char *cp;
+ /* verify that '::' is in the beginning of *elem
+ and something more follows */
+ if (':' != *elem
+ || !(elem +1) || ':' != *(elem +1)
+ || !(elem +2) || '\0' == *(elem +2))
+ return 0;
+
+ /* pick out info from string after '::' */
+ elem = elem + 2;
+ cp = strchr(elem, ':');
+
+ if (!cp) /* index, no colon, no type */
+ {
+ *index = elem;
+ *index_len = strlen(elem);
+ }
+ else if (cp[1] == '\0') /* colon, but no following type */
+ {
+ return 0;
+ }
+ else /* index, colon and type */
+ {
+ *index = elem;
+ *index_len = cp - elem;
+ *type = cp+1;
+ *type_len = strlen(cp+1);
+ }
+ }
+ return 1;
+}
+
+
+int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
+ const char *elemsetname,
+ const Odr_oid *input_format,
+ const Odr_oid **output_format,
+ char **rec_bufp, int *rec_lenp)
+{
+ const char *retrieval_index;
+ size_t retrieval_index_len;
+ const char *retrieval_type;
+ size_t retrieval_type_len;
+ char retrieval_index_cstr[256];
+ char retrieval_type_cstr[256];
+ int ord;
+
+ /* only accept XML and SUTRS requests */
+ if (oid_oidcmp(input_format, yaz_oid_recsyn_xml)
+ && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
+ {
+ yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
+ elemsetname);
+ *output_format = 0;
+ return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+ }
+
+ if (!parse_zebra_elem(elemsetname,
+ &retrieval_index, &retrieval_index_len,
+ &retrieval_type, &retrieval_type_len))
+ {
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+
+ if (retrieval_type_len == 0)
+ return -1; /* must have a register type specified */
+ if (!retrieval_index_len ||
+ retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
+ {
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+
+ memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
+ retrieval_index_cstr[retrieval_index_len] = '\0';
+
+ memcpy(retrieval_type_cstr, retrieval_type, retrieval_type_len);
+ retrieval_type_cstr[retrieval_type_len] = '\0';
+
+ ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+ zinfo_index_category_sort,
+ retrieval_type_cstr,
+ retrieval_index_cstr);
+ if (ord == -1)
+ return -1; /* is not a sort index */
+ else
+ {
+ char dst_buf[IT_MAX_WORD];
+ char str[IT_MAX_WORD];
+ const char *index_type;
+ const char *db = 0;
+ const char *string_index = 0;
+ WRBUF wrbuf = wrbuf_alloc();
+
+ zebra_sort_sysno(zh->reg->sort_index, sysno);
+ zebra_sort_type(zh->reg->sort_index, ord);
+ zebra_sort_read(zh->reg->sort_index, str);
+
+ zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
+
+ zebra_term_untrans(zh, index_type, dst_buf, str);
+
+ if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
+ {
+ *output_format = yaz_oid_recsyn_xml;
+ wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+ " sysno=\"" ZINT_FORMAT "\""
+ " set=\"zebra::index%s/\">\n",
+ sysno, elemsetname);
+
+ wrbuf_printf(wrbuf, " <index name=\"%s\"",
+ string_index);
+ wrbuf_printf(wrbuf, " type=\"%s\">", index_type);
+ wrbuf_xmlputs(wrbuf, dst_buf);
+ wrbuf_printf(wrbuf, "</index>\n");
+ wrbuf_printf(wrbuf, "</record>\n");
+ }
+ else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
+ {
+ *output_format = yaz_oid_recsyn_sutrs;
+
+ wrbuf_printf(wrbuf, "%s %s %s\n", string_index, index_type,
+ dst_buf);
+ }
+ *rec_lenp = wrbuf_len(wrbuf);
+ *rec_bufp = odr_malloc(odr, *rec_lenp);
+ memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
+ wrbuf_destroy(wrbuf);
+ return 0;
+ }
+}
+
+int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
+ Record rec,
+ const char *elemsetname,
+ const Odr_oid *input_format,
+ const Odr_oid **output_format,
+ char **rec_bufp, int *rec_lenp)
+{
+ const char *retrieval_index;
+ size_t retrieval_index_len;
+ const char *retrieval_type;
+ size_t retrieval_type_len;
+ zebra_rec_keys_t keys;
+ int ret_code = 0;
+ char retrieval_type_cstr[256];
+
+ /* set output variables before processing possible error states */
+ /* *rec_lenp = 0; */
+
+ /* only accept XML and SUTRS requests */
+ if (oid_oidcmp(input_format, yaz_oid_recsyn_xml)
+ && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
+ {
+ yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
+ elemsetname);
+ *output_format = 0;
+ return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+ }
+
+ if (!parse_zebra_elem(elemsetname,
+ &retrieval_index, &retrieval_index_len,
+ &retrieval_type, &retrieval_type_len))
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+
+ if (retrieval_type_len)
+ {
+ memcpy(retrieval_type_cstr, retrieval_type, retrieval_type_len);
+ retrieval_type_cstr[retrieval_type_len] = '\0';
+ }
+
+ if (retrieval_index_len)
+ {
+ char retrieval_index_cstr[256];
+
+ if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
+ {
+ memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
+ retrieval_index_cstr[retrieval_index_len] = '\0';
+
+ if (zebraExplain_lookup_attr_str(zh->reg->zei,
+ zinfo_index_category_index,
+ (retrieval_type_len == 0 ? 0 :
+ retrieval_type_cstr),
+ retrieval_index_cstr) == -1)
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+ }
+
+ keys = zebra_rec_keys_open();
+ zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys], 0);
+
+ if (!zebra_rec_keys_rewind(keys))
+ {
+ ret_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ }
+ else
+ {
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ WRBUF wrbuf = wrbuf_alloc();
+
+ if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
+ {
+ *output_format = input_format;
+ wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+ " sysno=\"" ZINT_FORMAT "\""
+ " set=\"zebra::index%s/\">\n",
+ sysno, elemsetname);
+ }
+ else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs))
+ *output_format = input_format;
+
+ while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+ {
+ int i;
+ int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+ const char *index_type;
+ const char *db = 0;
+ const char *string_index = 0;
+ size_t string_index_len;
+ char dst_buf[IT_MAX_WORD];
+
+ zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
+ &string_index);
+ string_index_len = strlen(string_index);
+
+ /* process only if index is not defined,
+ or if defined and matching */
+ if (retrieval_index == 0
+ || (string_index_len == retrieval_index_len
+ && !memcmp(string_index, retrieval_index,
+ string_index_len)))
+ {
+ /* process only if type is not defined, or is matching */
+ if (retrieval_type == 0
+ || !strcmp(retrieval_type_cstr, index_type))
+ {
+ if (zebra_term_untrans(zh, index_type, dst_buf, str))
+ *dst_buf = '\0'; /* untrans failed */
+
+ if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
+ {
+ wrbuf_printf(wrbuf, " <index name=\"%s\"",
+ string_index);
+
+ wrbuf_printf(wrbuf, " type=\"%s\"", index_type);
+
+ wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">",
+ key_in.mem[key_in.len -1]);
+ wrbuf_xmlputs(wrbuf, dst_buf);
+ wrbuf_printf(wrbuf, "</index>\n");
+ }
+ else
+ {
+ wrbuf_printf(wrbuf, "%s ", string_index);
+
+ wrbuf_printf(wrbuf, "%s", index_type);
+
+ for (i = 1; i < key_in.len; i++)
+ wrbuf_printf(wrbuf, " " ZINT_FORMAT,
+ key_in.mem[i]);
+
+ wrbuf_printf(wrbuf, " %s", dst_buf);
+
+ wrbuf_printf(wrbuf, "\n");
+
+ }
+
+ }
+ }
+ }
+ if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml))
+ wrbuf_printf(wrbuf, "</record>\n");
+ *rec_lenp = wrbuf_len(wrbuf);
+ *rec_bufp = odr_malloc(odr, *rec_lenp);
+ memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
+ wrbuf_destroy(wrbuf);
+ }
+ zebra_rec_keys_close(keys);
+ return ret_code;
+}
+
+
+static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
+ const char *value)
+{
+ if (value)
+ {
+ wrbuf_printf(wrbuf, " %s=\"", name);
+ wrbuf_xmlputs(wrbuf, value);
+ wrbuf_printf(wrbuf, "\"");
+ }
+}
+
+static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
+ const int value)
+{
+ wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
+}
+
+static void retrieve_puts_str(WRBUF wrbuf, const char *name,
+ const char *value)
+{
+ if (value)
+ wrbuf_printf(wrbuf, "%s %s\n", name, value);
+}
+
+static void retrieve_puts_int(WRBUF wrbuf, const char *name,
+ const int value)
+{
+ wrbuf_printf(wrbuf, "%s %i\n", name, value);
+}
+
+
+static void snippet_xml_record(ZebraHandle zh, WRBUF wrbuf, zebra_snippets *doc)
+{
+ const zebra_snippet_word *doc_w;
+ int mark_state = 0;
+
+ wrbuf_printf(wrbuf, "%s>\n", ZEBRA_XML_HEADER_STR);
+ for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
+ {
+ if (doc_w->mark)
+ {
+ const char *index_type;
+ const char *db = 0;
+ const char *string_index = 0;
+
+ zebraExplain_lookup_ord(zh->reg->zei, doc_w->ord,
+ &index_type, &db, &string_index);
+
+ if (mark_state == 0)
+ {
+ wrbuf_printf(wrbuf, " <snippet name=\"%s\"", string_index);
+ wrbuf_printf(wrbuf, " type=\"%s\">", index_type);
+ }
+ if (doc_w->match)
+ wrbuf_puts(wrbuf, "<s>");
+ /* not printing leading ws */
+ if (mark_state || !doc_w->ws || doc_w->match)
+ wrbuf_xmlputs(wrbuf, doc_w->term);
+ if (doc_w->match)
+ wrbuf_puts(wrbuf, "</s>");
+ }
+ else if (mark_state == 1)
+ {
+ wrbuf_puts(wrbuf, "</snippet>\n");
+ }
+ mark_state = doc_w->mark;
+ }
+ if (mark_state == 1)
+ {
+ wrbuf_puts(wrbuf, "</snippet>\n");
+ }
+ wrbuf_printf(wrbuf, "</record>");
+}
+
+int zebra_get_rec_snippets(ZebraHandle zh, zint sysno,
+ zebra_snippets *snippets)
+{
+ int return_code = 0;
+ Record rec = rec_get(zh->reg->records, sysno);
+ if (!rec)
+ {
+ yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
+ return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ }
+ else
+ {
+ const char *file_type = rec->info[recInfo_fileType];
+ void *recTypeClientData;
+ RecType rt = recType_byName(zh->reg->recTypes, zh->res,
+ file_type, &recTypeClientData);
+
+ if (!rt)
+ return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ else
+ {
+ struct ZebraRecStream stream;
+ return_code = zebra_create_record_stream(zh, &rec, &stream);
+ if (return_code == 0)
+ {
+ extract_snippet(zh, snippets, &stream,
+ rt, recTypeClientData);
+
+ stream.destroy(&stream);
+ }
+ }
+ rec_free(&rec);
+ }
+ return return_code;
+}
+
+static int snippet_fetch(ZebraHandle zh, const char *setname,
+ zint sysno, ODR odr,
+ const char *elemsetname,
+ const Odr_oid *input_format,
+ const Odr_oid **output_format,
+ char **rec_bufp, int *rec_lenp)
+{
+ zebra_snippets *rec_snippets = zebra_snippets_create();
+ int return_code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
+
+ if (!return_code)
+ {
+ WRBUF wrbuf = wrbuf_alloc();
+ zebra_snippets *hit_snippet = zebra_snippets_create();
+
+ zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet);