1 /* $Id: recgrs.c,v 1.109 2006-05-10 08:13:28 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
395 data1_xpelement *xpe = abs->xp_elements;
398 struct xpath_location_step *xp;
400 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
404 for (; xpe; xpe = xpe->next)
407 ok = dfa_match_first(xpe->dfa->states, pexpr);
411 /* we have to check the perdicates up to the root node */
414 /* find the first tag up in the node structure */
415 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
418 /* go from inside out in the node structure, while going
419 backwards trough xpath location steps ... */
420 for (i = xpe->xpath_len - 1; i>0; i--)
422 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
423 i, xp[i].part, nn->u.tag.tag);
425 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
427 yaz_log(YLOG_DEBUG, " Predicates didn't match");
432 if (nn->which == DATA1N_tag)
444 yaz_log(YLOG_DEBUG, "Got it");
445 return xpe->termlists;
452 1 start element (tag)
454 3 start attr (and attr-exact)
462 Now, if there is a matching xelm described in abs, for the
463 indexed element or the attribute, then the data is handled according
464 to those definitions...
466 modified by pop, 2002-12-13
469 /* add xpath index for an attribute */
470 static void index_xpath_attr (char *tag_path, char *name, char *value,
471 char *structure, struct recExtractCtrl *p,
475 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
477 wrd->attrSet = VAL_IDXPATH;
480 wrd->index_type = '0';
481 wrd->term_buf = tag_path;
482 wrd->term_len = strlen(tag_path);
487 wrd->index_name = ZEBRA_XPATH_ATTR;
491 wrd->index_type = 'w';
492 wrd->term_buf = value;
493 wrd->term_len = strlen(value);
497 wrd->index_name = ZEBRA_XPATH_ELM_END;
501 wrd->index_type = '0';
502 wrd->term_buf = tag_path;
503 wrd->term_len = strlen(tag_path);
508 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
513 /* we have to fetch the whole path to the data tag */
514 for (nn = n; nn; nn = nn->parent)
516 if (nn->which == DATA1N_tag)
518 size_t tlen = strlen(nn->u.tag.tag);
519 if (tlen + flen > (max - 2))
521 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
523 tag_path_full[flen++] = '/';
526 if (nn->which == DATA1N_root)
529 tag_path_full[flen] = 0;
533 static void index_xpath(struct source_parser *sp, data1_node *n,
534 struct recExtractCtrl *p,
535 int level, RecWord *wrd,
545 char tag_path_full[1024];
546 int termlist_only = 1;
551 int xpath_is_start = 0;
557 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
560 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
562 if ((!n->root->u.root.absyn) ||
563 (n->root->u.root.absyn->enable_xpath_indexing)) {
570 wrd->term_buf = n->u.data.data;
571 wrd->term_len = n->u.data.len;
574 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
576 /* If we have a matching termlist... */
577 if (n->root->u.root.absyn &&
578 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
580 for (; tl; tl = tl->next)
582 /* need to copy recword because it may be changed */
584 wrd->index_type = *tl->structure;
585 memcpy (&wrd_tl, wrd, sizeof(*wrd));
587 sp_parse(sp, n, &wrd_tl, tl->source);
594 /* this is the ! case, so structure is for the xpath index */
596 wrd_tl.index_name = xpath_index;
598 wrd_tl.attrSet = VAL_IDXPATH;
599 wrd_tl.attrUse = use;
601 if (p->flagShowRecords)
604 printf("%*sXPath index", (level + 1) * 4, "");
605 printf (" XData:\"");
606 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
607 fputc (wrd_tl.term_buf[i], stdout);
609 if (wrd_tl.term_len > 40)
611 fputc ('\n', stdout);
614 (*p->tokenAdd)(&wrd_tl);
617 /* this is just the old fashioned attribute based index */
619 wrd_tl.index_name = tl->index_name;
621 wrd_tl.attrSet = (int) (tl->att->parent->reference);
622 wrd_tl.attrUse = tl->att->locals->local;
624 if (p->flagShowRecords)
627 printf("%*sIdx: [%s]", (level + 1) * 4, "",
630 printf("%s %s", tl->index_name, tl->source);
632 printf("%s:%s [%d] %s",
633 tl->att->parent->name,
634 tl->att->name, tl->att->value,
637 printf (" XData:\"");
638 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
639 fputc (wrd_tl.term_buf[i], stdout);
641 if (wrd_tl.term_len > 40)
643 fputc ('\n', stdout);
646 (*p->tokenAdd)(&wrd_tl);
650 /* xpath indexing is done, if there was no termlist given,
651 or no ! in the termlist, and default indexing is enabled... */
652 if (!p->flagShowRecords && !xpdone && !termlist_only)
655 wrd->index_name = xpath_index;
657 wrd->attrSet = VAL_IDXPATH;
660 wrd->index_type = 'w';
665 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
667 wrd->index_type = '0';
668 wrd->term_buf = tag_path_full;
669 wrd->term_len = strlen(tag_path_full);
671 wrd->index_name = xpath_index;
673 wrd->attrSet = VAL_IDXPATH;
676 if (p->flagShowRecords)
678 printf("%*s tag=", (level + 1) * 4, "");
679 for (i = 0; i<wrd->term_len && i < 40; i++)
680 fputc (wrd->term_buf[i], stdout);
691 /* Add tag start/end xpath index, only when there is a ! in
692 the apropriate xelm directive, or default xpath indexing
695 if (!(do_xpindex = 1 - termlist_only))
697 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
699 for (; tl; tl = tl->next)
711 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
714 if (xpath_is_start == 1) /* only for the starting tag... */
716 #define MAX_ATTR_COUNT 50
717 data1_termlist *tll[MAX_ATTR_COUNT];
721 /* get termlists for attributes, and find out, if we have to do xpath indexing */
722 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
727 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
729 int do_xpindex = 1 - termlist_only;
731 char attr_tag_path_full[1024];
733 /* this could be cached as well */
734 sprintf (attr_tag_path_full, "@%s/%s",
735 xp->name, tag_path_full);
737 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
739 /* if there is a ! in the xelm termlist, or default indexing is on,
740 proceed with xpath idx */
743 for (; tl; tl = tl->next)
757 /* attribute (no value) */
758 wrd->index_type = '0';
760 wrd->index_name = ZEBRA_XPATH_ATTR;
764 wrd->term_buf = xp->name;
765 wrd->term_len = strlen(xp->name);
771 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
773 /* attribute value exact */
774 strcpy (comb, xp->name);
776 strcat (comb, xp->value);
779 wrd->index_name = ZEBRA_XPATH_ATTR;
783 wrd->index_type = '0';
784 wrd->term_buf = comb;
785 wrd->term_len = strlen(comb);
795 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
797 char attr_tag_path_full[1024];
800 sprintf (attr_tag_path_full, "@%s/%s",
801 xp->name, tag_path_full);
805 /* If there is a termlist given (=xelm directive) */
806 for (; tl; tl = tl->next)
814 /* add xpath index for the attribute */
815 index_xpath_attr (attr_tag_path_full, xp->name,
816 xp->value, tl->structure,
820 /* index attribute value (only path/@attr) */
824 wrd->index_name = tl->index_name;
827 (tl->att->parent->reference);
828 wrd->attrUse = tl->att->locals->local;
830 wrd->index_type = *tl->structure;
831 wrd->term_buf = xp->value;
832 wrd->term_len = strlen(xp->value);
838 /* if there was no termlist for the given path,
839 or the termlist didn't have a ! element, index
840 the attribute as "w" */
841 if ((!xpdone) && (!termlist_only))
843 index_xpath_attr (attr_tag_path_full, xp->name,
844 xp->value, "w", p, wrd);
853 static void index_termlist (struct source_parser *sp, data1_node *par,
855 struct recExtractCtrl *p, int level, RecWord *wrd)
857 data1_termlist *tlist = 0;
858 data1_datatype dtype = DATA1K_string;
861 * cycle up towards the root until we find a tag with an att..
862 * this has the effect of indexing locally defined tags with
863 * the attribute of their ancestor in the record.
866 while (!par->u.tag.element)
867 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
869 if (!par || !(tlist = par->u.tag.element->termlists))
871 if (par->u.tag.element->tag)
872 dtype = par->u.tag.element->tag->kind;
874 for (; tlist; tlist = tlist->next)
876 /* consider source */
878 assert(tlist->source);
879 sp_parse(sp, n, wrd, tlist->source);
881 if (wrd->term_buf && wrd->term_len)
883 if (p->flagShowRecords)
886 printf("%*sIdx: [%s]", (level + 1) * 4, "",
889 printf("%s %s", tlist->index_name, tlist->source);
891 printf("%s:%s [%d] %s",
892 tlist->att->parent->name,
893 tlist->att->name, tlist->att->value,
896 printf (" XData:\"");
897 for (i = 0; i<wrd->term_len && i < 40; i++)
898 fputc (wrd->term_buf[i], stdout);
900 if (wrd->term_len > 40)
902 fputc ('\n', stdout);
906 wrd->index_type = *tlist->structure;
908 wrd->index_name = tlist->index_name;
910 wrd->attrSet = (int) (tlist->att->parent->reference);
911 wrd->attrUse = tlist->att->locals->local;
919 static int dumpkeys_r(struct source_parser *sp,
920 data1_node *n, struct recExtractCtrl *p, int level,
923 for (; n; n = n->next)
925 if (p->flagShowRecords) /* display element description to user */
927 if (n->which == DATA1N_root)
929 printf("%*s", level * 4, "");
930 printf("Record type: '%s'\n", n->u.root.type);
932 else if (n->which == DATA1N_tag)
936 printf("%*s", level * 4, "");
937 if (!(e = n->u.tag.element))
938 printf("Local tag: '%s'\n", n->u.tag.tag);
941 printf("Elm: '%s' ", e->name);
944 data1_tag *t = e->tag;
946 printf("TagNam: '%s' ", t->names->name);
949 printf("%s[%d],", t->tagset->name, t->tagset->type);
952 if (t->which == DATA1T_numeric)
953 printf("%d)", t->value.numeric);
955 printf("'%s')", t->value.string);
962 if (n->which == DATA1N_tag)
964 index_termlist(sp, n, n, p, level, wrd);
965 /* index start tag */
967 if (n->root->u.root.absyn)
968 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
971 if (n->root->u.root.absyn)
972 index_xpath(sp, n, p, level, wrd, 1);
977 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
981 if (n->which == DATA1N_data)
983 data1_node *par = get_parent_tag(p->dh, n);
985 if (p->flagShowRecords)
987 printf("%*s", level * 4, "");
989 if (n->u.data.len > 256)
990 printf("'%.170s ... %.70s'\n", n->u.data.data,
991 n->u.data.data + n->u.data.len-70);
992 else if (n->u.data.len > 0)
993 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
999 index_termlist(sp, par, n, p, level, wrd);
1002 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
1005 index_xpath(sp, n, p, level, wrd, 1016);
1009 if (n->which == DATA1N_tag)
1013 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1016 index_xpath(sp, n, p, level, wrd, 2);
1020 if (p->flagShowRecords && n->which == DATA1N_root)
1022 printf("%*s-------------\n\n", level * 4, "");
1028 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1030 struct source_parser *sp = source_parser_create();
1031 int r = dumpkeys_r(sp, n, p, 0, wrd);
1032 source_parser_destroy(sp);
1036 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1039 int oidtmp[OID_SIZE];
1042 oe.proto = PROTO_Z3950;
1043 oe.oclass = CLASS_SCHEMA;
1044 if (n->u.root.absyn)
1046 oe.value = n->u.root.absyn->reference;
1048 if ((oid_ent_to_oid (&oe, oidtmp)))
1049 (*p->schemaAdd)(p, oidtmp);
1051 (*p->init)(p, &wrd);
1053 return dumpkeys(n, p, &wrd);
1056 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1058 data1_node *(*grs_read)(struct grs_read_info *))
1061 struct grs_read_info gri;
1063 int oidtmp[OID_SIZE];
1066 gri.readf = p->readf;
1067 gri.seekf = p->seekf;
1068 gri.tellf = p->tellf;
1071 gri.offset = p->offset;
1074 gri.clientData = clientData;
1076 n = (*grs_read)(&gri);
1078 return RECCTRL_EXTRACT_EOF;
1079 oe.proto = PROTO_Z3950;
1080 oe.oclass = CLASS_SCHEMA;
1082 if (!n->u.root.absyn)
1083 return RECCTRL_EXTRACT_ERROR;
1085 if (n->u.root.absyn)
1087 oe.value = n->u.root.absyn->reference;
1088 if ((oid_ent_to_oid (&oe, oidtmp)))
1089 (*p->schemaAdd)(p, oidtmp);
1091 data1_concat_text(p->dh, mem, n);
1093 /* ensure our data1 tree is UTF-8 */
1094 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1097 data1_pr_tree (p->dh, n, stdout);
1100 (*p->init)(p, &wrd);
1101 if (dumpkeys(n, p, &wrd) < 0)
1103 data1_free_tree(p->dh, n);
1104 return RECCTRL_EXTRACT_ERROR_GENERIC;
1106 data1_free_tree(p->dh, n);
1107 return RECCTRL_EXTRACT_OK;
1110 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1111 data1_node *(*grs_read)(struct grs_read_info *))
1114 NMEM mem = nmem_create ();
1115 ret = grs_extract_sub(clientData, p, mem, grs_read);
1121 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1123 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1124 char **addinfo, ODR o)
1126 data1_esetname *eset;
1127 Z_Espec1 *espec = 0;
1132 case Z_RecordComp_simple:
1133 if (c->u.simple->which != Z_ElementSetNames_generic)
1134 return 26; /* only generic form supported. Fix this later */
1135 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1136 c->u.simple->u.generic)))
1138 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1139 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1140 return 25; /* invalid esetname */
1142 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1143 c->u.simple->u.generic);
1146 case Z_RecordComp_complex:
1147 if (c->u.complex->generic)
1149 /* insert check for schema */
1150 if ((p = c->u.complex->generic->elementSpec))
1154 case Z_ElementSpec_elementSetName:
1156 data1_getesetbyname(dh, n->u.root.absyn,
1157 p->u.elementSetName)))
1159 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1160 p->u.elementSetName);
1161 *addinfo = odr_strdup(o, p->u.elementSetName);
1162 return 25; /* invalid esetname */
1164 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1165 p->u.elementSetName);
1168 case Z_ElementSpec_externalSpec:
1169 if (p->u.externalSpec->which == Z_External_espec1)
1171 yaz_log(YLOG_DEBUG, "Got Espec-1");
1172 espec = p->u.externalSpec-> u.espec1;
1176 yaz_log(YLOG_LOG, "Unknown external espec.");
1177 return 25; /* bad. what is proper diagnostic? */
1184 return 26; /* fix */
1188 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1189 return data1_doespec1(dh, n, espec);
1193 yaz_log(YLOG_DEBUG, "Element: all match");
1198 /* Add Zebra info in separate namespace ...
1201 <metadata xmlns="http://www.indexdata.dk/zebra/">
1203 <localnumber>447</localnumber>
1204 <filename>records/genera.xml</filename>
1209 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1212 const char *idzebra_ns[3];
1213 const char *i2 = "\n ";
1214 const char *i4 = "\n ";
1217 idzebra_ns[0] = "xmlns";
1218 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1221 data1_mk_text (p->dh, mem, i2, top);
1223 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1225 data1_mk_text (p->dh, mem, "\n", top);
1227 data1_mk_text (p->dh, mem, i4, n);
1229 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1233 data1_mk_text (p->dh, mem, i4, n);
1234 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1236 data1_mk_text (p->dh, mem, i4, n);
1237 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1240 data1_mk_text (p->dh, mem, i4, n);
1241 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1243 data1_mk_text (p->dh, mem, i2, n);
1246 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1247 data1_node *(*grs_read)(struct grs_read_info *))
1249 data1_node *node = 0, *onode = 0, *top;
1252 int res, selected = 0;
1254 struct grs_read_info gri;
1255 const char *tagname;
1257 int requested_schema = VAL_NONE;
1258 data1_marctab *marctab;
1261 mem = nmem_create();
1262 gri.readf = p->readf;
1263 gri.seekf = p->seekf;
1264 gri.tellf = p->tellf;
1270 gri.clientData = clientData;
1272 yaz_log(YLOG_DEBUG, "grs_retrieve");
1273 node = (*grs_read)(&gri);
1280 data1_concat_text(p->dh, mem, node);
1283 data1_pr_tree (p->dh, node, stdout);
1285 top = data1_get_root_tag (p->dh, node);
1287 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1288 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1290 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1292 dnew->u.data.what = DATA1I_text;
1293 dnew->u.data.data = dnew->lbuf;
1294 sprintf(dnew->u.data.data, "%d", p->recordSize);
1295 dnew->u.data.len = strlen(dnew->u.data.data);
1298 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1299 if (tagname && p->score >= 0 &&
1300 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1302 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1303 dnew->u.data.what = DATA1I_num;
1304 dnew->u.data.data = dnew->lbuf;
1305 sprintf(dnew->u.data.data, "%d", p->score);
1306 dnew->u.data.len = strlen(dnew->u.data.data);
1309 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1310 "localControlNumber");
1311 if (tagname && p->localno > 0 &&
1312 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1314 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1315 dnew->u.data.what = DATA1I_text;
1316 dnew->u.data.data = dnew->lbuf;
1318 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1319 dnew->u.data.len = strlen(dnew->u.data.data);
1322 if (p->input_format == VAL_TEXT_XML)
1323 zebra_xml_metadata (p, top, mem);
1326 data1_pr_tree (p->dh, node, stdout);
1328 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1329 p->comp->u.complex->generic &&
1330 p->comp->u.complex->generic->which == Z_Schema_oid &&
1331 p->comp->u.complex->generic->schema.oid)
1333 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1335 requested_schema = oe->value;
1337 /* If schema has been specified, map if possible, then check that
1338 * we got the right one
1340 if (requested_schema != VAL_NONE)
1342 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1343 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1345 if (map->target_absyn_ref == requested_schema)
1348 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1357 if (node->u.root.absyn &&
1358 requested_schema != node->u.root.absyn->reference)
1360 p->diagnostic = 238;
1366 * Does the requested format match a known syntax-mapping? (this reflects
1367 * the overlap of schema and formatting which is inherent in the MARC
1370 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1371 if (node->u.root.absyn)
1372 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1374 if (map->target_absyn_ref == p->input_format)
1377 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1386 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1387 if (node->u.root.absyn &&
1388 node->u.root.absyn->reference != VAL_NONE &&
1389 p->input_format == VAL_GRS1)
1393 int oidtmp[OID_SIZE];
1395 oe.proto = PROTO_Z3950;
1396 oe.oclass = CLASS_SCHEMA;
1397 oe.value = node->u.root.absyn->reference;
1399 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1402 data1_handle dh = p->dh;
1406 for (ii = oid; *ii >= 0; ii++)
1410 sprintf(p, "%d", *ii);
1413 if ((dnew = data1_mk_tag_data_wd(dh, top,
1414 "schemaIdentifier", mem)))
1416 dnew->u.data.what = DATA1I_oid;
1417 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1418 memcpy(dnew->u.data.data, tmp, p - tmp);
1419 dnew->u.data.len = p - tmp;
1424 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1425 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1428 p->diagnostic = res;
1430 data1_free_tree(p->dh, onode);
1431 data1_free_tree(p->dh, node);
1435 else if (p->comp && !res)
1439 data1_pr_tree (p->dh, node, stdout);
1441 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1442 switch (p->output_format = (p->input_format != VAL_NONE ?
1443 p->input_format : VAL_SUTRS))
1447 data1_pr_tree (p->dh, node, stdout);
1449 /* default output encoding for XML is UTF-8 */
1450 data1_iconv (p->dh, mem, node,
1451 p->encoding ? p->encoding : "UTF-8",
1452 data1_get_encoding(p->dh, node));
1454 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1456 p->diagnostic = 238;
1459 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1460 memcpy (new_buf, p->rec_buf, p->rec_len);
1461 p->rec_buf = new_buf;
1465 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1467 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1469 p->diagnostic = 238; /* not available in requested syntax */
1471 p->rec_len = (size_t) (-1);
1474 /* ensure our data1 tree is UTF-8 */
1475 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1477 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1479 p->diagnostic = 238;
1481 p->rec_len = (size_t) (-1);
1484 /* ensure our data1 tree is UTF-8 */
1485 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1486 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1488 p->diagnostic = 238;
1490 p->rec_len = (size_t) (-1);
1494 data1_iconv (p->dh, mem, node, p->encoding,
1495 data1_get_encoding(p->dh, node));
1496 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1498 p->diagnostic = 238;
1501 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1502 memcpy (new_buf, p->rec_buf, p->rec_len);
1503 p->rec_buf = new_buf;
1508 data1_iconv (p->dh, mem, node, p->encoding,
1509 data1_get_encoding(p->dh, node));
1510 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1512 p->diagnostic = 238;
1515 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1516 memcpy (new_buf, p->rec_buf, p->rec_len);
1517 p->rec_buf = new_buf;
1521 if (!node->u.root.absyn)
1523 p->diagnostic = 238;
1526 for (marctab = node->u.root.absyn->marc; marctab;
1527 marctab = marctab->next)
1528 if (marctab->reference == p->input_format)
1532 p->diagnostic = 238;
1536 data1_iconv (p->dh, mem, node, p->encoding,
1537 data1_get_encoding(p->dh, node));
1538 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1539 selected, &p->rec_len)))
1540 p->diagnostic = 238;
1543 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1544 memcpy (new_buf, p->rec_buf, p->rec_len);
1545 p->rec_buf = new_buf;
1549 data1_free_tree(p->dh, node);
1551 data1_free_tree(p->dh, onode);
1559 * indent-tabs-mode: nil
1561 * vim: shiftwidth=4 tabstop=8 expandtab