1 /* $Id: recgrs.c,v 1.6 2006-09-28 18:38:47 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
396 data1_xpelement *xpe = 0;
399 struct xpath_location_step *xp;
401 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
405 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
406 xpe->match_state = -1; /* don't know if it matches yet */
408 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
411 int ok = xpe->match_state;
413 { /* don't know whether there is a match yet */
414 data1_xpelement *xpe1;
417 ok = dfa_match_first(xpe->dfa->states, pexpr);
420 /* mark this and following ones with same regexp */
421 for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
423 if (!strcmp(xpe1->regexp, xpe->regexp))
424 xpe1->match_state = ok;
428 assert (ok == 0 || ok == 1);
431 /* we have to check the perdicates up to the root node */
434 /* find the first tag up in the node structure */
435 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
438 /* go from inside out in the node structure, while going
439 backwards trough xpath location steps ... */
440 for (i = xpe->xpath_len - 1; i>0; i--)
442 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
443 i, xp[i].part, nn->u.tag.tag);
445 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
447 yaz_log(YLOG_DEBUG, " Predicates didn't match");
452 if (nn->which == DATA1N_tag)
464 yaz_log(YLOG_DEBUG, "Got it");
465 return xpe->termlists;
472 1 start element (tag)
474 3 start attr (and attr-exact)
482 Now, if there is a matching xelm described in abs, for the
483 indexed element or the attribute, then the data is handled according
484 to those definitions...
486 modified by pop, 2002-12-13
489 /* add xpath index for an attribute */
490 static void index_xpath_attr (char *tag_path, char *name, char *value,
491 char *structure, struct recExtractCtrl *p,
494 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
495 wrd->index_type = '0';
496 wrd->term_buf = tag_path;
497 wrd->term_len = strlen(tag_path);
501 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
502 wrd->index_type = 'w';
503 wrd->term_buf = value;
504 wrd->term_len = strlen(value);
507 wrd->index_name = ZEBRA_XPATH_ELM_END;
508 wrd->index_type = '0';
509 wrd->term_buf = tag_path;
510 wrd->term_len = strlen(tag_path);
515 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
520 /* we have to fetch the whole path to the data tag */
521 for (nn = n; nn; nn = nn->parent)
523 if (nn->which == DATA1N_tag)
525 size_t tlen = strlen(nn->u.tag.tag);
526 if (tlen + flen > (max - 2))
528 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
530 tag_path_full[flen++] = '/';
533 if (nn->which == DATA1N_root)
536 tag_path_full[flen] = 0;
540 static void index_xpath(struct source_parser *sp, data1_node *n,
541 struct recExtractCtrl *p,
542 int level, RecWord *wrd,
548 char tag_path_full[1024];
549 int termlist_only = 1;
552 if ((!n->root->u.root.absyn) ||
553 (n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)) {
560 wrd->term_buf = n->u.data.data;
561 wrd->term_len = n->u.data.len;
564 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
566 /* If we have a matching termlist... */
567 if (n->root->u.root.absyn &&
568 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
570 for (; tl; tl = tl->next)
572 /* need to copy recword because it may be changed */
574 wrd->index_type = *tl->structure;
575 memcpy (&wrd_tl, wrd, sizeof(*wrd));
577 sp_parse(sp, n, &wrd_tl, tl->source);
580 /* this is the ! case, so structure is for the xpath index */
581 wrd_tl.index_name = xpath_index;
582 if (p->flagShowRecords)
585 printf("%*sXPath index", (level + 1) * 4, "");
586 printf (" XData:\"");
587 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
588 fputc (wrd_tl.term_buf[i], stdout);
590 if (wrd_tl.term_len > 40)
592 fputc ('\n', stdout);
595 (*p->tokenAdd)(&wrd_tl);
598 /* this is just the old fashioned attribute based index */
599 wrd_tl.index_name = tl->index_name;
600 if (p->flagShowRecords)
603 printf("%*sIdx: [%s]", (level + 1) * 4, "",
605 printf("%s %s", tl->index_name, tl->source);
606 printf (" XData:\"");
607 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
608 fputc (wrd_tl.term_buf[i], stdout);
610 if (wrd_tl.term_len > 40)
612 fputc ('\n', stdout);
615 (*p->tokenAdd)(&wrd_tl);
619 /* xpath indexing is done, if there was no termlist given,
620 or no ! in the termlist, and default indexing is enabled... */
621 if (!p->flagShowRecords && !xpdone && !termlist_only)
623 wrd->index_name = xpath_index;
624 wrd->index_type = 'w';
629 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
631 wrd->index_type = '0';
632 wrd->term_buf = tag_path_full;
633 wrd->term_len = strlen(tag_path_full);
634 wrd->index_name = xpath_index;
635 if (p->flagShowRecords)
637 printf("%*s tag=", (level + 1) * 4, "");
638 for (i = 0; i<wrd->term_len && i < 40; i++)
639 fputc (wrd->term_buf[i], stdout);
650 /* Add tag start/end xpath index, only when there is a ! in
651 the apropriate xelm directive, or default xpath indexing
654 if (!(do_xpindex = 1 - termlist_only))
656 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
658 for (; tl; tl = tl->next)
666 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
669 if (xpath_is_start == 1) /* only for the starting tag... */
671 #define MAX_ATTR_COUNT 50
672 data1_termlist *tll[MAX_ATTR_COUNT];
676 /* get termlists for attributes, and find out, if we have to do xpath indexing */
677 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
682 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
684 int do_xpindex = 1 - termlist_only;
686 char attr_tag_path_full[1024];
688 /* this could be cached as well */
689 sprintf (attr_tag_path_full, "@%s/%s",
690 xp->name, tag_path_full);
692 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
694 /* if there is a ! in the xelm termlist, or default indexing is on,
695 proceed with xpath idx */
698 for (; tl; tl = tl->next)
707 /* attribute (no value) */
708 wrd->index_type = '0';
709 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
710 wrd->term_buf = xp->name;
711 wrd->term_len = strlen(xp->name);
717 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
719 /* attribute value exact */
720 strcpy (comb, xp->name);
722 strcat (comb, xp->value);
724 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
725 wrd->index_type = '0';
726 wrd->term_buf = comb;
727 wrd->term_len = strlen(comb);
737 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
739 char attr_tag_path_full[1024];
742 sprintf (attr_tag_path_full, "@%s/%s",
743 xp->name, tag_path_full);
747 /* If there is a termlist given (=xelm directive) */
748 for (; tl; tl = tl->next)
752 /* add xpath index for the attribute */
753 index_xpath_attr (attr_tag_path_full, xp->name,
754 xp->value, tl->structure,
758 /* index attribute value (only path/@attr) */
761 wrd->index_name = tl->index_name;
762 wrd->index_type = *tl->structure;
763 wrd->term_buf = xp->value;
764 wrd->term_len = strlen(xp->value);
770 /* if there was no termlist for the given path,
771 or the termlist didn't have a ! element, index
772 the attribute as "w" */
773 if ((!xpdone) && (!termlist_only))
775 index_xpath_attr (attr_tag_path_full, xp->name,
776 xp->value, "w", p, wrd);
785 static void index_termlist (struct source_parser *sp, data1_node *par,
787 struct recExtractCtrl *p, int level, RecWord *wrd)
789 data1_termlist *tlist = 0;
790 data1_datatype dtype = DATA1K_string;
793 * cycle up towards the root until we find a tag with an att..
794 * this has the effect of indexing locally defined tags with
795 * the attribute of their ancestor in the record.
798 while (!par->u.tag.element)
799 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
801 if (!par || !(tlist = par->u.tag.element->termlists))
803 if (par->u.tag.element->tag)
804 dtype = par->u.tag.element->tag->kind;
806 for (; tlist; tlist = tlist->next)
808 /* consider source */
810 assert(tlist->source);
811 sp_parse(sp, n, wrd, tlist->source);
813 if (wrd->term_buf && wrd->term_len)
815 if (p->flagShowRecords)
818 printf("%*sIdx: [%s]", (level + 1) * 4, "",
820 printf("%s %s", tlist->index_name, tlist->source);
821 printf (" XData:\"");
822 for (i = 0; i<wrd->term_len && i < 40; i++)
823 fputc (wrd->term_buf[i], stdout);
825 if (wrd->term_len > 40)
827 fputc ('\n', stdout);
831 wrd->index_type = *tlist->structure;
832 wrd->index_name = tlist->index_name;
839 static int dumpkeys_r(struct source_parser *sp,
840 data1_node *n, struct recExtractCtrl *p, int level,
843 for (; n; n = n->next)
845 if (p->flagShowRecords) /* display element description to user */
847 if (n->which == DATA1N_root)
849 printf("%*s", level * 4, "");
850 printf("Record type: '%s'\n", n->u.root.type);
852 else if (n->which == DATA1N_tag)
856 printf("%*s", level * 4, "");
857 if (!(e = n->u.tag.element))
858 printf("Local tag: '%s'\n", n->u.tag.tag);
861 printf("Elm: '%s' ", e->name);
864 data1_tag *t = e->tag;
866 printf("TagNam: '%s' ", t->names->name);
869 printf("%s[%d],", t->tagset->name, t->tagset->type);
872 if (t->which == DATA1T_numeric)
873 printf("%d)", t->value.numeric);
875 printf("'%s')", t->value.string);
882 if (n->which == DATA1N_tag)
884 index_termlist(sp, n, n, p, level, wrd);
885 /* index start tag */
886 if (n->root->u.root.absyn)
887 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
892 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
896 if (n->which == DATA1N_data)
898 data1_node *par = get_parent_tag(p->dh, n);
900 if (p->flagShowRecords)
902 printf("%*s", level * 4, "");
904 if (n->u.data.len > 256)
905 printf("'%.170s ... %.70s'\n", n->u.data.data,
906 n->u.data.data + n->u.data.len-70);
907 else if (n->u.data.len > 0)
908 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
914 index_termlist(sp, par, n, p, level, wrd);
916 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
920 if (n->which == DATA1N_tag)
923 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
927 if (p->flagShowRecords && n->which == DATA1N_root)
929 printf("%*s-------------\n\n", level * 4, "");
935 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
937 struct source_parser *sp = source_parser_create();
938 int r = dumpkeys_r(sp, n, p, 0, wrd);
939 source_parser_destroy(sp);
943 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
946 int oidtmp[OID_SIZE];
949 oe.proto = PROTO_Z3950;
950 oe.oclass = CLASS_SCHEMA;
953 oe.value = n->u.root.absyn->reference;
955 if ((oid_ent_to_oid (&oe, oidtmp)))
956 (*p->schemaAdd)(p, oidtmp);
960 /* data1_pr_tree(p->dh, n, stdout); */
962 return dumpkeys(n, p, &wrd);
965 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
967 data1_node *(*grs_read)(struct grs_read_info *))
970 struct grs_read_info gri;
972 int oidtmp[OID_SIZE];
975 gri.stream = p->stream;
978 gri.clientData = clientData;
980 n = (*grs_read)(&gri);
982 return RECCTRL_EXTRACT_EOF;
983 oe.proto = PROTO_Z3950;
984 oe.oclass = CLASS_SCHEMA;
986 if (!n->u.root.absyn)
987 return RECCTRL_EXTRACT_ERROR;
991 oe.value = n->u.root.absyn->reference;
992 if ((oid_ent_to_oid (&oe, oidtmp)))
993 (*p->schemaAdd)(p, oidtmp);
995 data1_concat_text(p->dh, mem, n);
997 /* ensure our data1 tree is UTF-8 */
998 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1001 data1_remove_idzebra_subtree (p->dh, n);
1004 data1_pr_tree (p->dh, n, stdout);
1007 (*p->init)(p, &wrd);
1008 if (dumpkeys(n, p, &wrd) < 0)
1010 return RECCTRL_EXTRACT_ERROR_GENERIC;
1012 return RECCTRL_EXTRACT_OK;
1015 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1016 data1_node *(*grs_read)(struct grs_read_info *))
1019 NMEM mem = nmem_create ();
1020 ret = grs_extract_sub(clientData, p, mem, grs_read);
1026 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1028 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1029 char **addinfo, ODR o)
1031 data1_esetname *eset;
1032 Z_Espec1 *espec = 0;
1037 case Z_RecordComp_simple:
1038 if (c->u.simple->which != Z_ElementSetNames_generic)
1039 return 26; /* only generic form supported. Fix this later */
1040 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1041 c->u.simple->u.generic)))
1043 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1044 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1045 return 25; /* invalid esetname */
1047 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1048 c->u.simple->u.generic);
1051 case Z_RecordComp_complex:
1052 if (c->u.complex->generic)
1054 /* insert check for schema */
1055 if ((p = c->u.complex->generic->elementSpec))
1059 case Z_ElementSpec_elementSetName:
1061 data1_getesetbyname(dh, n->u.root.absyn,
1062 p->u.elementSetName)))
1064 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1065 p->u.elementSetName);
1066 *addinfo = odr_strdup(o, p->u.elementSetName);
1067 return 25; /* invalid esetname */
1069 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1070 p->u.elementSetName);
1073 case Z_ElementSpec_externalSpec:
1074 if (p->u.externalSpec->which == Z_External_espec1)
1076 yaz_log(YLOG_DEBUG, "Got Espec-1");
1077 espec = p->u.externalSpec-> u.espec1;
1081 yaz_log(YLOG_LOG, "Unknown external espec.");
1082 return 25; /* bad. what is proper diagnostic? */
1089 return 26; /* fix */
1093 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1094 return data1_doespec1(dh, n, espec);
1098 yaz_log(YLOG_DEBUG, "Element: all match");
1103 /* Add Zebra info in separate namespace ...
1106 <metadata xmlns="http://www.indexdata.dk/zebra/">
1108 <localnumber>447</localnumber>
1109 <filename>records/genera.xml</filename>
1114 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1117 const char *idzebra_ns[3];
1118 const char *i2 = "\n ";
1119 const char *i4 = "\n ";
1122 idzebra_ns[0] = "xmlns";
1123 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1126 data1_mk_text (p->dh, mem, i2, top);
1128 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1130 data1_mk_text (p->dh, mem, "\n", top);
1132 data1_mk_text (p->dh, mem, i4, n);
1134 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1138 data1_mk_text (p->dh, mem, i4, n);
1139 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1141 data1_mk_text (p->dh, mem, i4, n);
1142 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1145 data1_mk_text (p->dh, mem, i4, n);
1146 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1148 data1_mk_text (p->dh, mem, i2, n);
1151 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1152 data1_node *(*grs_read)(struct grs_read_info *))
1154 data1_node *node = 0, *onode = 0, *top;
1157 int res, selected = 0;
1159 struct grs_read_info gri;
1160 const char *tagname;
1162 int requested_schema = VAL_NONE;
1163 data1_marctab *marctab;
1166 mem = nmem_create();
1167 gri.stream = p->stream;
1170 gri.clientData = clientData;
1172 yaz_log(YLOG_DEBUG, "grs_retrieve");
1173 node = (*grs_read)(&gri);
1180 data1_concat_text(p->dh, mem, node);
1182 data1_remove_idzebra_subtree (p->dh, node);
1185 data1_pr_tree (p->dh, node, stdout);
1187 top = data1_get_root_tag (p->dh, node);
1189 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1190 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1192 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1194 dnew->u.data.what = DATA1I_text;
1195 dnew->u.data.data = dnew->lbuf;
1196 sprintf(dnew->u.data.data, "%d", p->recordSize);
1197 dnew->u.data.len = strlen(dnew->u.data.data);
1200 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1201 if (tagname && p->score >= 0 &&
1202 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1204 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1205 dnew->u.data.what = DATA1I_num;
1206 dnew->u.data.data = dnew->lbuf;
1207 sprintf(dnew->u.data.data, "%d", p->score);
1208 dnew->u.data.len = strlen(dnew->u.data.data);
1211 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1212 "localControlNumber");
1213 if (tagname && p->localno > 0 &&
1214 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1216 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1217 dnew->u.data.what = DATA1I_text;
1218 dnew->u.data.data = dnew->lbuf;
1220 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1221 dnew->u.data.len = strlen(dnew->u.data.data);
1224 if (p->input_format == VAL_TEXT_XML)
1225 zebra_xml_metadata (p, top, mem);
1228 data1_pr_tree (p->dh, node, stdout);
1230 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1231 p->comp->u.complex->generic &&
1232 p->comp->u.complex->generic->which == Z_Schema_oid &&
1233 p->comp->u.complex->generic->schema.oid)
1235 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1237 requested_schema = oe->value;
1239 /* If schema has been specified, map if possible, then check that
1240 * we got the right one
1242 if (requested_schema != VAL_NONE)
1244 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1245 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1247 if (map->target_absyn_ref == requested_schema)
1250 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1259 if (node->u.root.absyn &&
1260 requested_schema != node->u.root.absyn->reference)
1262 p->diagnostic = 238;
1268 * Does the requested format match a known syntax-mapping? (this reflects
1269 * the overlap of schema and formatting which is inherent in the MARC
1272 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1273 if (node->u.root.absyn)
1274 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1276 if (map->target_absyn_ref == p->input_format)
1279 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1288 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1289 if (node->u.root.absyn &&
1290 node->u.root.absyn->reference != VAL_NONE &&
1291 p->input_format == VAL_GRS1)
1295 int oidtmp[OID_SIZE];
1297 oe.proto = PROTO_Z3950;
1298 oe.oclass = CLASS_SCHEMA;
1299 oe.value = node->u.root.absyn->reference;
1301 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1304 data1_handle dh = p->dh;
1308 for (ii = oid; *ii >= 0; ii++)
1312 sprintf(p, "%d", *ii);
1315 if ((dnew = data1_mk_tag_data_wd(dh, top,
1316 "schemaIdentifier", mem)))
1318 dnew->u.data.what = DATA1I_oid;
1319 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1320 memcpy(dnew->u.data.data, tmp, p - tmp);
1321 dnew->u.data.len = p - tmp;
1326 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1327 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1330 p->diagnostic = res;
1334 else if (p->comp && !res)
1338 data1_pr_tree (p->dh, node, stdout);
1340 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1341 switch (p->output_format = (p->input_format != VAL_NONE ?
1342 p->input_format : VAL_SUTRS))
1346 data1_pr_tree (p->dh, node, stdout);
1348 /* default output encoding for XML is UTF-8 */
1349 data1_iconv (p->dh, mem, node,
1350 p->encoding ? p->encoding : "UTF-8",
1351 data1_get_encoding(p->dh, node));
1353 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1355 p->diagnostic = 238;
1358 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1359 memcpy (new_buf, p->rec_buf, p->rec_len);
1360 p->rec_buf = new_buf;
1364 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1366 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1368 p->diagnostic = 238; /* not available in requested syntax */
1373 /* ensure our data1 tree is UTF-8 */
1374 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1376 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1378 p->diagnostic = 238;
1383 /* ensure our data1 tree is UTF-8 */
1384 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1385 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1387 p->diagnostic = 238;
1393 data1_iconv (p->dh, mem, node, p->encoding,
1394 data1_get_encoding(p->dh, node));
1395 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1397 p->diagnostic = 238;
1400 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1401 memcpy (new_buf, p->rec_buf, p->rec_len);
1402 p->rec_buf = new_buf;
1407 data1_iconv (p->dh, mem, node, p->encoding,
1408 data1_get_encoding(p->dh, node));
1409 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1411 p->diagnostic = 238;
1414 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1415 memcpy (new_buf, p->rec_buf, p->rec_len);
1416 p->rec_buf = new_buf;
1420 if (!node->u.root.absyn)
1422 p->diagnostic = 238;
1425 for (marctab = node->u.root.absyn->marc; marctab;
1426 marctab = marctab->next)
1427 if (marctab->reference == p->input_format)
1431 p->diagnostic = 238;
1435 data1_iconv (p->dh, mem, node, p->encoding,
1436 data1_get_encoding(p->dh, node));
1437 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1438 selected, &p->rec_len)))
1439 p->diagnostic = 238;
1442 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1443 memcpy (new_buf, p->rec_buf, p->rec_len);
1444 p->rec_buf = new_buf;
1454 * indent-tabs-mode: nil
1456 * vim: shiftwidth=4 tabstop=8 expandtab