1 /* $Id: recgrs.c,v 1.102 2005-06-23 06:45:47 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
223 wrd->term_buf = nmem_malloc(sp->nmem, sp->len);
224 memcpy(wrd->term_buf, sp->tok, sp->len);
225 wrd->term_len = sp->len;
228 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
230 wrd->term_len = sp->len - 2;
231 wrd->term_buf = nmem_malloc(sp->nmem, wrd->term_len);
232 memcpy(wrd->term_buf, sp->tok+1, wrd->term_len);
244 static struct source_parser *source_parser_create()
246 struct source_parser *sp = xmalloc(sizeof(*sp));
248 sp->nmem = nmem_create();
252 static void source_parser_destroy(struct source_parser *sp)
256 nmem_destroy(sp->nmem);
260 static int sp_parse(struct source_parser *sp,
261 data1_node *n, RecWord *wrd, const char *src)
267 nmem_reset(sp->nmem);
270 return sp_expr(sp, n, wrd);
273 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
282 if (p->which == XPATH_PREDICATE_RELATION) {
283 if (p->u.relation.name[0]) {
284 if (*p->u.relation.name != '@') {
286 " Only attributes (@) are supported in xelm xpath predicates");
287 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
290 attname = p->u.relation.name + 1;
292 /* looking for the attribute with a specified name */
293 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
294 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
296 if (!strcmp(attr->name, attname)) {
297 if (p->u.relation.op[0]) {
298 if (*p->u.relation.op != '=') {
300 "Only '=' relation is supported (%s)",p->u.relation.op);
301 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
304 yaz_log(YLOG_DEBUG," - value %s <-> %s",
305 p->u.relation.value, attr->value );
306 if (!strcmp(attr->value, p->u.relation.value)) {
311 /* attribute exists, no value specified */
316 yaz_log(YLOG_DEBUG, "return %d", res);
322 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
323 if (!strcmp(p->u.boolean.op,"and")) {
324 return d1_check_xpath_predicate(n, p->u.boolean.left)
325 && d1_check_xpath_predicate(n, p->u.boolean.right);
327 else if (!strcmp(p->u.boolean.op,"or")) {
328 return (d1_check_xpath_predicate(n, p->u.boolean.left)
329 || d1_check_xpath_predicate(n, p->u.boolean.right));
331 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
342 New function, looking for xpath "element" definitions in abs, by
343 tagpath, using a kind of ugly regxp search.The DFA was built while
344 parsing abs, so here we just go trough them and try to match
345 against the given tagpath. The first matching entry is returned.
349 Added support for enhanced xelm. Now [] predicates are considered
350 as well, when selecting indexing rules... (why the hell it's called
357 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
359 data1_absyn *abs = n->root->u.root.absyn;
360 data1_xpelement *xpe = abs->xp_elements;
363 struct xpath_location_step *xp;
365 char *pexpr = xmalloc(strlen(tagpath)+2);
368 sprintf (pexpr, "%s\n", tagpath);
369 yaz_log(YLOG_DEBUG, "Checking tagpath %s",tagpath);
370 for (; xpe; xpe = xpe->next)
372 struct DFA_state **dfaar = xpe->dfa->states;
373 struct DFA_state *s = dfaar[0];
374 struct DFA_tran *t = s->trans;
376 unsigned char c = *pexpr++;
379 if ((c >= t->ch[0] && c <= t->ch[1]) || (!t->ch[0]))
381 const char *p = pexpr;
384 if ((s = dfaar[t->to])->rule_no &&
385 (start_line || s->rule_nno))
390 for (t=s->trans, i=s->tran_no; --i >= 0; t++)
391 if ((unsigned) *p >= t->ch[0] && (unsigned) *p <= t->ch[1])
398 yaz_log(YLOG_DEBUG, " xpath match %s",xpe->xpath_expr);
400 yaz_log(YLOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
405 /* we have to check the perdicates up to the root node */
408 /* find the first tag up in the node structure */
409 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
412 /* go from inside out in the node structure, while going
413 backwards trough xpath location steps ... */
414 for (i = xpe->xpath_len - 1; i>0; i--)
416 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
417 i, xp[i].part, nn->u.tag.tag);
419 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
421 yaz_log(YLOG_DEBUG, " Predicates didn't match");
426 if (nn->which == DATA1N_tag)
438 yaz_log(YLOG_DEBUG, "Got it");
439 return xpe->termlists;
446 1 start element (tag)
448 3 start attr (and attr-exact)
456 Now, if there is a matching xelm described in abs, for the
457 indexed element or the attribute, then the data is handled according
458 to those definitions...
460 modified by pop, 2002-12-13
463 /* add xpath index for an attribute */
464 static void index_xpath_attr (char *tag_path, char *name, char *value,
465 char *structure, struct recExtractCtrl *p,
469 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
471 wrd->attrSet = VAL_IDXPATH;
474 wrd->index_type = '0';
475 wrd->term_buf = tag_path;
476 wrd->term_len = strlen(tag_path);
481 wrd->index_name = ZEBRA_XPATH_ATTR;
485 wrd->index_type = 'w';
486 wrd->term_buf = value;
487 wrd->term_len = strlen(value);
491 wrd->index_name = ZEBRA_XPATH_ELM_END;
495 wrd->index_type = '0';
496 wrd->term_buf = tag_path;
497 wrd->term_len = strlen(tag_path);
502 static void index_xpath(struct source_parser *sp, data1_node *n,
503 struct recExtractCtrl *p,
504 int level, RecWord *wrd,
514 char tag_path_full[1024];
517 int termlist_only = 1;
522 int xpath_is_start = 0;
528 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
531 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
533 if ((!n->root->u.root.absyn) ||
534 (n->root->u.root.absyn->enable_xpath_indexing)) {
541 wrd->term_buf = n->u.data.data;
542 wrd->term_len = n->u.data.len;
546 /* we have to fetch the whole path to the data tag */
547 for (nn = n; nn; nn = nn->parent)
549 if (nn->which == DATA1N_tag)
551 size_t tlen = strlen(nn->u.tag.tag);
552 if (tlen + flen > (sizeof(tag_path_full)-2))
554 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
556 tag_path_full[flen++] = '/';
559 if (nn->which == DATA1N_root)
563 tag_path_full[flen] = 0;
565 /* If we have a matching termlist... */
566 if (n->root->u.root.absyn &&
567 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
569 for (; tl; tl = tl->next)
571 /* need to copy recword because it may be changed */
573 wrd->index_type = *tl->structure;
574 memcpy (&wrd_tl, wrd, sizeof(*wrd));
576 sp_parse(sp, n, &wrd_tl, tl->source);
583 /* this is the ! case, so structure is for the xpath index */
585 wrd_tl.index_name = xpath_index;
587 wrd_tl.attrSet = VAL_IDXPATH;
588 wrd_tl.attrUse = use;
590 if (p->flagShowRecords)
593 printf("%*sXPath index", (level + 1) * 4, "");
594 printf (" XData:\"");
595 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
596 fputc (wrd_tl.term_buf[i], stdout);
598 if (wrd_tl.term_len > 40)
600 fputc ('\n', stdout);
603 (*p->tokenAdd)(&wrd_tl);
606 /* this is just the old fashioned attribute based index */
608 wrd_tl.index_name = tl->index_name;
610 wrd_tl.attrSet = (int) (tl->att->parent->reference);
611 wrd_tl.attrUse = tl->att->locals->local;
613 if (p->flagShowRecords)
616 printf("%*sIdx: [%s]", (level + 1) * 4, "",
619 printf("%s %s", tl->index_name, tl->source);
621 printf("%s:%s [%d] %s",
622 tl->att->parent->name,
623 tl->att->name, tl->att->value,
626 printf (" XData:\"");
627 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
628 fputc (wrd_tl.term_buf[i], stdout);
630 if (wrd_tl.term_len > 40)
632 fputc ('\n', stdout);
635 (*p->tokenAdd)(&wrd_tl);
639 /* xpath indexing is done, if there was no termlist given,
640 or no ! in the termlist, and default indexing is enabled... */
641 if (!p->flagShowRecords && !xpdone && !termlist_only)
644 wrd->index_name = xpath_index;
646 wrd->attrSet = VAL_IDXPATH;
649 wrd->index_type = 'w';
655 for (nn = n; nn; nn = nn->parent)
657 if (nn->which == DATA1N_tag)
659 size_t tlen = strlen(nn->u.tag.tag);
660 if (tlen + flen > (sizeof(tag_path_full)-2))
662 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
664 tag_path_full[flen++] = '/';
666 else if (nn->which == DATA1N_root)
671 wrd->index_type = '0';
672 wrd->term_buf = tag_path_full;
673 wrd->term_len = flen;
675 wrd->index_name = xpath_index;
677 wrd->attrSet = VAL_IDXPATH;
680 if (p->flagShowRecords)
682 printf("%*s tag=", (level + 1) * 4, "");
683 for (i = 0; i<wrd->term_len && i < 40; i++)
684 fputc (wrd->term_buf[i], stdout);
695 tag_path_full[flen] = 0;
697 /* Add tag start/end xpath index, only when there is a ! in
698 the apropriate xelm directive, or default xpath indexing
701 if (!(do_xpindex = 1 - termlist_only))
703 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
705 for (; tl; tl = tl->next)
717 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
720 if (xpath_is_start == 1) /* only for the starting tag... */
722 #define MAX_ATTR_COUNT 50
723 data1_termlist *tll[MAX_ATTR_COUNT];
727 /* get termlists for attributes, and find out, if we have to do xpath indexing */
728 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
733 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
735 int do_xpindex = 1 - termlist_only;
737 char attr_tag_path_full[1024];
740 /* this could be cached as well */
741 sprintf (attr_tag_path_full, "@%s/%.*s",
742 xp->name, int_len, tag_path_full);
744 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
746 /* if there is a ! in the xelm termlist, or default indexing is on,
747 proceed with xpath idx */
750 for (; tl; tl = tl->next)
764 /* attribute (no value) */
765 wrd->index_type = '0';
767 wrd->index_name = ZEBRA_XPATH_ATTR;
771 wrd->term_buf = xp->name;
772 wrd->term_len = strlen(xp->name);
778 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
780 /* attribute value exact */
781 strcpy (comb, xp->name);
783 strcat (comb, xp->value);
786 wrd->index_name = ZEBRA_XPATH_ATTR;
790 wrd->index_type = '0';
791 wrd->term_buf = comb;
792 wrd->term_len = strlen(comb);
802 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
804 char attr_tag_path_full[1024];
808 sprintf (attr_tag_path_full, "@%s/%.*s",
809 xp->name, int_len, tag_path_full);
813 /* If there is a termlist given (=xelm directive) */
814 for (; tl; tl = tl->next)
822 /* add xpath index for the attribute */
823 index_xpath_attr (attr_tag_path_full, xp->name,
824 xp->value, tl->structure,
828 /* add attribute based index for the attribute */
832 wrd->index_name = tl->index_name;
835 (tl->att->parent->reference);
836 wrd->attrUse = tl->att->locals->local;
838 wrd->index_type = *tl->structure;
839 wrd->term_buf = xp->value;
840 wrd->term_len = strlen(xp->value);
846 /* if there was no termlist for the given path,
847 or the termlist didn't have a ! element, index
848 the attribute as "w" */
849 if ((!xpdone) && (!termlist_only))
851 index_xpath_attr (attr_tag_path_full, xp->name,
852 xp->value, "w", p, wrd);
861 static void index_termlist (struct source_parser *sp, data1_node *par,
863 struct recExtractCtrl *p, int level, RecWord *wrd)
865 data1_termlist *tlist = 0;
866 data1_datatype dtype = DATA1K_string;
869 * cycle up towards the root until we find a tag with an att..
870 * this has the effect of indexing locally defined tags with
871 * the attribute of their ancestor in the record.
874 while (!par->u.tag.element)
875 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
877 if (!par || !(tlist = par->u.tag.element->termlists))
879 if (par->u.tag.element->tag)
880 dtype = par->u.tag.element->tag->kind;
882 for (; tlist; tlist = tlist->next)
884 /* consider source */
886 assert(tlist->source);
887 sp_parse(sp, n, wrd, tlist->source);
889 if (wrd->term_buf && wrd->term_len)
891 if (p->flagShowRecords)
894 printf("%*sIdx: [%s]", (level + 1) * 4, "",
897 printf("%s %s", tlist->index_name, tlist->source);
899 printf("%s:%s [%d] %s",
900 tlist->att->parent->name,
901 tlist->att->name, tlist->att->value,
904 printf (" XData:\"");
905 for (i = 0; i<wrd->term_len && i < 40; i++)
906 fputc (wrd->term_buf[i], stdout);
908 if (wrd->term_len > 40)
910 fputc ('\n', stdout);
914 wrd->index_type = *tlist->structure;
916 wrd->index_name = tlist->index_name;
918 wrd->attrSet = (int) (tlist->att->parent->reference);
919 wrd->attrUse = tlist->att->locals->local;
927 static int dumpkeys_r(struct source_parser *sp,
928 data1_node *n, struct recExtractCtrl *p, int level,
931 for (; n; n = n->next)
933 if (p->flagShowRecords) /* display element description to user */
935 if (n->which == DATA1N_root)
937 printf("%*s", level * 4, "");
938 printf("Record type: '%s'\n", n->u.root.type);
940 else if (n->which == DATA1N_tag)
944 printf("%*s", level * 4, "");
945 if (!(e = n->u.tag.element))
946 printf("Local tag: '%s'\n", n->u.tag.tag);
949 printf("Elm: '%s' ", e->name);
952 data1_tag *t = e->tag;
954 printf("TagNam: '%s' ", t->names->name);
957 printf("%s[%d],", t->tagset->name, t->tagset->type);
960 if (t->which == DATA1T_numeric)
961 printf("%d)", t->value.numeric);
963 printf("'%s')", t->value.string);
970 if (n->which == DATA1N_tag)
972 index_termlist(sp, n, n, p, level, wrd);
973 /* index start tag */
975 if (n->root->u.root.absyn)
976 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
979 if (n->root->u.root.absyn)
980 index_xpath(sp, n, p, level, wrd, 1);
985 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
989 if (n->which == DATA1N_data)
991 data1_node *par = get_parent_tag(p->dh, n);
993 if (p->flagShowRecords)
995 printf("%*s", level * 4, "");
997 if (n->u.data.len > 256)
998 printf("'%.170s ... %.70s'\n", n->u.data.data,
999 n->u.data.data + n->u.data.len-70);
1000 else if (n->u.data.len > 0)
1001 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
1007 index_termlist(sp, par, n, p, level, wrd);
1010 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
1013 index_xpath(sp, n, p, level, wrd, 1016);
1017 if (n->which == DATA1N_tag)
1021 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1024 index_xpath(sp, n, p, level, wrd, 2);
1028 if (p->flagShowRecords && n->which == DATA1N_root)
1030 printf("%*s-------------\n\n", level * 4, "");
1036 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1038 struct source_parser *sp = source_parser_create();
1039 int r = dumpkeys_r(sp, n, p, 0, wrd);
1040 source_parser_destroy(sp);
1044 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1047 int oidtmp[OID_SIZE];
1050 oe.proto = PROTO_Z3950;
1051 oe.oclass = CLASS_SCHEMA;
1052 if (n->u.root.absyn)
1054 oe.value = n->u.root.absyn->reference;
1056 if ((oid_ent_to_oid (&oe, oidtmp)))
1057 (*p->schemaAdd)(p, oidtmp);
1059 (*p->init)(p, &wrd);
1061 return dumpkeys(n, p, &wrd);
1064 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1066 data1_node *(*grs_read)(struct grs_read_info *))
1069 struct grs_read_info gri;
1071 int oidtmp[OID_SIZE];
1074 gri.readf = p->readf;
1075 gri.seekf = p->seekf;
1076 gri.tellf = p->tellf;
1079 gri.offset = p->offset;
1082 gri.clientData = clientData;
1084 n = (*grs_read)(&gri);
1086 return RECCTRL_EXTRACT_EOF;
1087 oe.proto = PROTO_Z3950;
1088 oe.oclass = CLASS_SCHEMA;
1090 if (!n->u.root.absyn)
1091 return RECCTRL_EXTRACT_ERROR;
1093 if (n->u.root.absyn)
1095 oe.value = n->u.root.absyn->reference;
1096 if ((oid_ent_to_oid (&oe, oidtmp)))
1097 (*p->schemaAdd)(p, oidtmp);
1099 data1_concat_text(p->dh, mem, n);
1101 /* ensure our data1 tree is UTF-8 */
1102 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1105 data1_pr_tree (p->dh, n, stdout);
1108 (*p->init)(p, &wrd);
1109 if (dumpkeys(n, p, &wrd) < 0)
1111 data1_free_tree(p->dh, n);
1112 return RECCTRL_EXTRACT_ERROR_GENERIC;
1114 data1_free_tree(p->dh, n);
1115 return RECCTRL_EXTRACT_OK;
1118 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1119 data1_node *(*grs_read)(struct grs_read_info *))
1122 NMEM mem = nmem_create ();
1123 ret = grs_extract_sub(clientData, p, mem, grs_read);
1129 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1131 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1132 char **addinfo, ODR o)
1134 data1_esetname *eset;
1135 Z_Espec1 *espec = 0;
1140 case Z_RecordComp_simple:
1141 if (c->u.simple->which != Z_ElementSetNames_generic)
1142 return 26; /* only generic form supported. Fix this later */
1143 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1144 c->u.simple->u.generic)))
1146 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1147 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1148 return 25; /* invalid esetname */
1150 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1151 c->u.simple->u.generic);
1154 case Z_RecordComp_complex:
1155 if (c->u.complex->generic)
1157 /* insert check for schema */
1158 if ((p = c->u.complex->generic->elementSpec))
1162 case Z_ElementSpec_elementSetName:
1164 data1_getesetbyname(dh, n->u.root.absyn,
1165 p->u.elementSetName)))
1167 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1168 p->u.elementSetName);
1169 *addinfo = odr_strdup(o, p->u.elementSetName);
1170 return 25; /* invalid esetname */
1172 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1173 p->u.elementSetName);
1176 case Z_ElementSpec_externalSpec:
1177 if (p->u.externalSpec->which == Z_External_espec1)
1179 yaz_log(YLOG_DEBUG, "Got Espec-1");
1180 espec = p->u.externalSpec-> u.espec1;
1184 yaz_log(YLOG_LOG, "Unknown external espec.");
1185 return 25; /* bad. what is proper diagnostic? */
1192 return 26; /* fix */
1196 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1197 return data1_doespec1(dh, n, espec);
1201 yaz_log(YLOG_DEBUG, "Element: all match");
1206 /* Add Zebra info in separate namespace ...
1209 <metadata xmlns="http://www.indexdata.dk/zebra/">
1211 <localnumber>447</localnumber>
1212 <filename>records/genera.xml</filename>
1217 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1220 const char *idzebra_ns[3];
1221 const char *i2 = "\n ";
1222 const char *i4 = "\n ";
1225 idzebra_ns[0] = "xmlns";
1226 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1229 data1_mk_text (p->dh, mem, i2, top);
1231 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1233 data1_mk_text (p->dh, mem, "\n", top);
1235 data1_mk_text (p->dh, mem, i4, n);
1237 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1241 data1_mk_text (p->dh, mem, i4, n);
1242 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1244 data1_mk_text (p->dh, mem, i4, n);
1245 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1248 data1_mk_text (p->dh, mem, i4, n);
1249 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1251 data1_mk_text (p->dh, mem, i2, n);
1254 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1255 data1_node *(*grs_read)(struct grs_read_info *))
1257 data1_node *node = 0, *onode = 0, *top;
1260 int res, selected = 0;
1262 struct grs_read_info gri;
1263 const char *tagname;
1265 int requested_schema = VAL_NONE;
1266 data1_marctab *marctab;
1269 mem = nmem_create();
1270 gri.readf = p->readf;
1271 gri.seekf = p->seekf;
1272 gri.tellf = p->tellf;
1278 gri.clientData = clientData;
1280 yaz_log(YLOG_DEBUG, "grs_retrieve");
1281 node = (*grs_read)(&gri);
1288 data1_concat_text(p->dh, mem, node);
1291 data1_pr_tree (p->dh, node, stdout);
1293 top = data1_get_root_tag (p->dh, node);
1295 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1296 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1298 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1300 dnew->u.data.what = DATA1I_text;
1301 dnew->u.data.data = dnew->lbuf;
1302 sprintf(dnew->u.data.data, "%d", p->recordSize);
1303 dnew->u.data.len = strlen(dnew->u.data.data);
1306 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1307 if (tagname && p->score >= 0 &&
1308 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1310 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1311 dnew->u.data.what = DATA1I_num;
1312 dnew->u.data.data = dnew->lbuf;
1313 sprintf(dnew->u.data.data, "%d", p->score);
1314 dnew->u.data.len = strlen(dnew->u.data.data);
1317 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1318 "localControlNumber");
1319 if (tagname && p->localno > 0 &&
1320 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1322 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1323 dnew->u.data.what = DATA1I_text;
1324 dnew->u.data.data = dnew->lbuf;
1326 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1327 dnew->u.data.len = strlen(dnew->u.data.data);
1330 if (p->input_format == VAL_TEXT_XML)
1331 zebra_xml_metadata (p, top, mem);
1334 data1_pr_tree (p->dh, node, stdout);
1336 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1337 p->comp->u.complex->generic &&
1338 p->comp->u.complex->generic->which == Z_Schema_oid &&
1339 p->comp->u.complex->generic->schema.oid)
1341 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1343 requested_schema = oe->value;
1345 /* If schema has been specified, map if possible, then check that
1346 * we got the right one
1348 if (requested_schema != VAL_NONE)
1350 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1351 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1353 if (map->target_absyn_ref == requested_schema)
1356 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1365 if (node->u.root.absyn &&
1366 requested_schema != node->u.root.absyn->reference)
1368 p->diagnostic = 238;
1374 * Does the requested format match a known syntax-mapping? (this reflects
1375 * the overlap of schema and formatting which is inherent in the MARC
1378 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1379 if (node->u.root.absyn)
1380 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1382 if (map->target_absyn_ref == p->input_format)
1385 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1394 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1395 if (node->u.root.absyn &&
1396 node->u.root.absyn->reference != VAL_NONE &&
1397 p->input_format == VAL_GRS1)
1401 int oidtmp[OID_SIZE];
1403 oe.proto = PROTO_Z3950;
1404 oe.oclass = CLASS_SCHEMA;
1405 oe.value = node->u.root.absyn->reference;
1407 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1410 data1_handle dh = p->dh;
1414 for (ii = oid; *ii >= 0; ii++)
1418 sprintf(p, "%d", *ii);
1421 if ((dnew = data1_mk_tag_data_wd(dh, top,
1422 "schemaIdentifier", mem)))
1424 dnew->u.data.what = DATA1I_oid;
1425 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1426 memcpy(dnew->u.data.data, tmp, p - tmp);
1427 dnew->u.data.len = p - tmp;
1432 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1433 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1436 p->diagnostic = res;
1438 data1_free_tree(p->dh, onode);
1439 data1_free_tree(p->dh, node);
1443 else if (p->comp && !res)
1447 data1_pr_tree (p->dh, node, stdout);
1449 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1450 switch (p->output_format = (p->input_format != VAL_NONE ?
1451 p->input_format : VAL_SUTRS))
1455 data1_pr_tree (p->dh, node, stdout);
1457 /* default output encoding for XML is UTF-8 */
1458 data1_iconv (p->dh, mem, node,
1459 p->encoding ? p->encoding : "UTF-8",
1460 data1_get_encoding(p->dh, node));
1462 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1464 p->diagnostic = 238;
1467 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1468 memcpy (new_buf, p->rec_buf, p->rec_len);
1469 p->rec_buf = new_buf;
1473 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1475 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1477 p->diagnostic = 238; /* not available in requested syntax */
1479 p->rec_len = (size_t) (-1);
1482 /* ensure our data1 tree is UTF-8 */
1483 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1485 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1487 p->diagnostic = 238;
1489 p->rec_len = (size_t) (-1);
1492 /* ensure our data1 tree is UTF-8 */
1493 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1494 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1496 p->diagnostic = 238;
1498 p->rec_len = (size_t) (-1);
1502 data1_iconv (p->dh, mem, node, p->encoding,
1503 data1_get_encoding(p->dh, node));
1504 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1506 p->diagnostic = 238;
1509 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1510 memcpy (new_buf, p->rec_buf, p->rec_len);
1511 p->rec_buf = new_buf;
1516 data1_iconv (p->dh, mem, node, p->encoding,
1517 data1_get_encoding(p->dh, node));
1518 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1520 p->diagnostic = 238;
1523 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1524 memcpy (new_buf, p->rec_buf, p->rec_len);
1525 p->rec_buf = new_buf;
1529 if (!node->u.root.absyn)
1531 p->diagnostic = 238;
1534 for (marctab = node->u.root.absyn->marc; marctab;
1535 marctab = marctab->next)
1536 if (marctab->reference == p->input_format)
1540 p->diagnostic = 238;
1544 data1_iconv (p->dh, mem, node, p->encoding,
1545 data1_get_encoding(p->dh, node));
1546 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1547 selected, &p->rec_len)))
1548 p->diagnostic = 238;
1551 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1552 memcpy (new_buf, p->rec_buf, p->rec_len);
1553 p->rec_buf = new_buf;
1557 data1_free_tree(p->dh, node);
1559 data1_free_tree(p->dh, onode);