1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <yaz/yaz-util.h>
25 #include <yaz/marcdisp.h>
26 #include <idzebra/recgrs.h>
31 #define MARCOMP_DEBUG 0
37 static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
39 struct marc_info *mi = (struct marc_info*) p->clientData;
44 int identifier_length;
47 int length_data_entry;
49 int length_implementation;
54 data1_node *res_root, *res_top;
56 data1_marctab *marctab;
58 if (p->stream->readf(p->stream, buf, 5) != 5)
60 while (*buf < '0' || *buf > '9')
64 yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)",
65 *buf & 0xff, *buf & 0xff);
69 if (p->stream->readf(p->stream, buf+4, 1) != 1)
72 record_length = atoi_n (buf, 5);
73 if (record_length < 25)
75 yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length);
79 read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
80 if (read_bytes < record_length-5)
82 yaz_log (YLOG_WARN, "Couldn't read whole MARC record");
85 /* skip until we meet a record separator */
86 while (buf[record_length-1] != ISO2709_RS)
88 if (record_length > sizeof(buf)-2)
90 read_bytes = p->stream->readf(p->stream, buf+record_length, 1);
95 /* read one byte ahead to see if there is more ... */
96 read_bytes = p->stream->readf(p->stream, buf+record_length, 1);
99 off_t cur_offset = p->stream->tellf(p->stream);
102 off_t end_offset = cur_offset - 1;
103 p->stream->endf(p->stream, &end_offset);
107 absynName = mi->type;
108 res_root = data1_mk_root (p->dh, p->mem, absynName);
111 yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax");
117 const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0};
119 res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root);
121 lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top);
122 data1_mk_text_n(p->dh, p->mem, buf, 24, lead);
125 res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
127 if ((marctab = data1_absyn_getmarctab(p->dh, res_root)))
129 memcpy(marctab->leader, buf, 24);
130 memcpy(marctab->implementation_codes, buf+6, 4);
131 marctab->implementation_codes[4] = '\0';
132 memcpy(marctab->user_systems, buf+17, 3);
133 marctab->user_systems[3] = '\0';
136 if (marctab && marctab->force_indicator_length >= 0)
137 indicator_length = marctab->force_indicator_length;
139 indicator_length = atoi_n (buf+10, 1);
140 if (marctab && marctab->force_identifier_length >= 0)
141 identifier_length = marctab->force_identifier_length;
143 identifier_length = atoi_n (buf+11, 1);
144 base_address = atoi_n (buf+12, 5);
146 length_data_entry = atoi_n (buf+20, 1);
147 length_starting = atoi_n (buf+21, 1);
148 length_implementation = atoi_n (buf+22, 1);
150 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
152 int l = 3 + length_data_entry + length_starting;
153 if (entry_p + l >= record_length)
155 yaz_log(YLOG_WARN, "MARC: Directory offset %d: end of record.",
159 /* check for digits in length info */
161 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
165 /* not all digits, so stop directory scan */
166 yaz_log(YLOG_LOG, "MARC: Bad directory");
169 entry_p += 3 + length_data_entry + length_starting;
171 end_of_directory = entry_p;
172 if (base_address != entry_p+1)
174 yaz_log(YLOG_WARN, "MARC: Base address does not follow directory");
176 for (entry_p = 24; entry_p != end_of_directory; )
184 data1_node *parent = res_top;
186 memcpy (tag, buf+entry_p, 3);
193 res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
196 fprintf (outf, "%s ", tag);
198 data_length = atoi_n (buf+entry_p, length_data_entry);
199 entry_p += length_data_entry;
200 data_offset = atoi_n (buf+entry_p, length_starting);
201 entry_p += length_starting;
202 i = data_offset + base_address;
203 end_offset = i+data_length-1;
205 if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
207 yaz_log(YLOG_WARN, "MARC: Bad offsets in data. Skipping rest");
211 if (memcmp (tag, "00", 2) && indicator_length)
213 /* generate indicator node */
216 const char *attr[10];
223 res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res);
225 for (j = 0; j<indicator_length; j++)
227 char str1[18], str2[2];
228 sprintf (str1, "ind%d", j+1);
235 data1_tag_add_attr (p->dh, p->mem, res, attr);
243 res = data1_mk_tag_n (p->dh, p->mem,
244 buf+i, indicator_length, 0 /* attr */, res);
246 for (j = 0; j<indicator_length; j++)
247 fprintf (outf, "%c", buf[j+i]);
250 i += indicator_length;
256 const char *attr[10];
262 res = data1_mk_tag(p->dh, p->mem, "controlfield", attr, res);
266 /* traverse sub fields */
268 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
270 if (memcmp (tag, "00", 2) && identifier_length)
279 for (j = 1; j<identifier_length && j < 9; j++)
280 code[j-1] = buf[i+j];
285 res = data1_mk_tag(p->dh, p->mem, "subfield",
290 res = data1_mk_tag_n (p->dh, p->mem,
291 buf+i+1, identifier_length-1,
292 0 /* attr */, parent);
295 fprintf (outf, " $");
296 for (j = 1; j<identifier_length; j++)
297 fprintf (outf, "%c", buf[j+i]);
300 i += identifier_length;
302 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
303 buf[i] != ISO2709_FS && i < end_offset)
306 fprintf (outf, "%c", buf[i]);
310 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
316 fprintf (outf, "%c", buf[i]);
323 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent);
326 fprintf (outf, "\n");
328 fprintf (outf, "-- separator but not at end of field\n");
329 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
330 fprintf (outf, "-- no separator at end of field\n");
337 * Locate some data under this node. This routine should handle variants
340 static char *get_data(data1_node *n, int *len)
346 if (n->which == DATA1N_data)
349 *len = n->u.data.len;
351 for (i = 0; i<*len; i++)
352 if (!d1_isspace(n->u.data.data[i]))
354 while (*len && d1_isspace(n->u.data.data[*len - 1]))
358 return n->u.data.data + i;
360 if (n->which == DATA1N_tag)
362 else if (n->which == DATA1N_data)
372 static data1_node *lookup_subfield(data1_node *node, const char *name)
376 for (p=node; p; p=p->next)
378 if (!yaz_matchstr(p->u.tag.tag, name))
384 static inline_subfield *lookup_inline_subfield(inline_subfield *pisf,
389 for (p=pisf; p; p=p->next)
391 if (!yaz_matchstr(p->name, name))
397 static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf,
398 inline_subfield *pisf)
402 for (p = psf; p && pisf; p = p->next)
404 if (p->which == MC_SF)
406 inline_subfield *found = lookup_inline_subfield(pisf, p->name);
410 if (strcmp(p->prefix, "_"))
412 wrbuf_puts(buf, " ");
413 wrbuf_puts(buf, p->prefix);
415 if (p->interval.start == -1)
417 wrbuf_puts(buf, found->data);
421 wrbuf_write(buf, found->data+p->interval.start,
422 p->interval.end-p->interval.start);
425 if (strcmp(p->suffix, "_"))
427 wrbuf_puts(buf, p->suffix);
428 wrbuf_puts(buf, " ");
431 yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
436 else if (p->which == MC_SFVARIANT)
438 inline_subfield *next;
441 next = cat_inline_subfield(p->u.child, buf, pisf);
447 else if (p->which == MC_SFGROUP)
452 for (pp = p->u.child, found = 0; pp; pp = pp->next)
454 if (!yaz_matchstr(pisf->name, p->name))
462 wrbuf_puts(buf, " (");
463 pisf = cat_inline_subfield(p->u.child, buf, pisf);
464 wrbuf_puts(buf, ") ");
471 static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield)
473 if (!pf || !subfield)
479 inline_field *pif=NULL;
482 if (yaz_matchstr(subfield->u.tag.tag, "1"))
484 subfield = subfield->next;
489 pif = inline_mk_field();
493 if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0)
495 yaz_log(YLOG_WARN, "inline subfield ($%s): parse error",
497 inline_destroy_field(pif);
501 } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1"));
505 if (pif && !yaz_matchstr(pif->name, pf->name))
507 if (!pf->list && pif->list)
509 wrbuf_puts(buf, pif->list->data);
519 ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0];
520 ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0];
522 if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
523 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])))
525 cat_inline_subfield(pf->list, buf, pif->list);
528 add separator for inline fields
532 wrbuf_puts(buf, "\n");
537 yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name);
541 inline_destroy_field(pif);
544 yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
548 static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf,
549 data1_node *subfield)
553 for (p = psf; p && subfield; p = p->next)
555 if (p->which == MC_SF)
557 data1_node *found = lookup_subfield(subfield, p->name);
563 if (strcmp(p->prefix, "_"))
565 wrbuf_puts(buf, " ");
566 wrbuf_puts(buf, p->prefix);
571 cat_inline_field(p->u.in_line, buf, found);
573 else if (p->interval.start == -1)
575 wrbuf_puts(buf, get_data(found, &len));
579 wrbuf_write(buf, get_data(found, &len)+p->interval.start,
580 p->interval.end-p->interval.start);
583 if (strcmp(p->suffix, "_"))
585 wrbuf_puts(buf, p->suffix);
586 wrbuf_puts(buf, " ");
589 yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
591 subfield = found->next;
594 else if (p->which == MC_SFVARIANT)
598 next = cat_subfield(p->u.child, buf, subfield);
599 if (next == subfield)
604 else if (p->which == MC_SFGROUP)
609 for (pp = p->u.child, found = 0; pp; pp = pp->next)
611 if (!yaz_matchstr(subfield->u.tag.tag, pp->name))
619 wrbuf_puts(buf, " (");
620 subfield = cat_subfield(p->u.child, buf, subfield);
621 wrbuf_puts(buf, ") ");
628 static data1_node *cat_field(struct grs_read_info *p, mc_field *pf,
629 WRBUF buf, data1_node *field)
631 data1_node *subfield;
638 if (yaz_matchstr(field->u.tag.tag, pf->name))
641 subfield = field->child;
647 check subfield without indicators
650 if (!pf->list && subfield->which == DATA1N_data)
654 if (pf->interval.start == -1)
656 wrbuf_puts(buf, get_data(field, &len));
660 wrbuf_write(buf, get_data(field, &len)+pf->interval.start,
661 pf->interval.end-pf->interval.start);
665 yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
674 ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0];
675 ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1];
678 ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
679 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))
683 yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
688 subfield = subfield->child;
693 cat_subfield(pf->list, buf, subfield);
696 yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
702 static int is_empty(char *s)
708 if (!isspace(*(unsigned char *)p))
714 static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt,
717 data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root);
718 data1_node *top = root->child;
724 c = mc_mk_context(mc_stmnt+3);
733 mc_destroy_context(c);
738 yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
740 if (!yaz_matchstr(pf->name, "ldr"))
744 yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
745 pf->interval.start, pf->interval.end);
749 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
750 data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start,
751 pf->interval.end-pf->interval.start+1, new);
760 if (!yaz_matchstr(field->u.tag.tag, pf->name))
765 yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
770 field = cat_field(p, pf, buf, field);
774 for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
778 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
779 data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new);
789 mc_destroy_field(pf);
790 mc_destroy_context(c);
794 data1_node *grs_read_marcxml(struct grs_read_info *p)
796 data1_node *root = grs_read_iso2709(p, 1);
802 for (e = data1_absyn_getelements(p->dh, root); e; e=e->next)
804 data1_tag *tag = e->tag;
806 if (tag && tag->which == DATA1T_string &&
807 !yaz_matchstr(tag->value.string, "mc?"))
808 parse_data1_tree(p, tag->value.string, root);
813 data1_node *grs_read_marc(struct grs_read_info *p)
815 data1_node *root = grs_read_iso2709(p, 0);
821 for (e = data1_absyn_getelements(p->dh, root); e; e=e->next)
823 data1_tag *tag = e->tag;
825 if (tag && tag->which == DATA1T_string &&
826 !yaz_matchstr(tag->value.string, "mc?"))
827 parse_data1_tree(p, tag->value.string, root);
832 static void *init_marc(Res res, RecType rt)
834 struct marc_info *p = xmalloc(sizeof(*p));
839 static ZEBRA_RES config_marc(void *clientData, Res res, const char *args)
841 struct marc_info *p = (struct marc_info*) clientData;
842 if (strlen(args) < sizeof(p->type))
843 strcpy(p->type, args);
847 static void destroy_marc(void *clientData)
849 struct marc_info *p = (struct marc_info*) clientData;
854 static int extract_marc(void *clientData, struct recExtractCtrl *ctrl)
856 return zebra_grs_extract(clientData, ctrl, grs_read_marc);
859 static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl)
861 return zebra_grs_retrieve(clientData, ctrl, grs_read_marc);
864 static struct recType marc_type = {
874 static int extract_marcxml(void *clientData, struct recExtractCtrl *ctrl)
876 return zebra_grs_extract(clientData, ctrl, grs_read_marcxml);
879 static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl)
881 return zebra_grs_retrieve(clientData, ctrl, grs_read_marcxml);
884 static struct recType marcxml_type = {
895 #ifdef IDZEBRA_STATIC_GRS_MARC
896 idzebra_filter_grs_marc
910 * indent-tabs-mode: nil
912 * vim: shiftwidth=4 tabstop=8 expandtab