From f4d0d08e56874eeabd12b02a161db2b6fad29aa1 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 9 Jun 2005 22:08:09 +0000 Subject: [PATCH] Backport support for 'melm' directive. --- NEWS | 2 ++ data1/d1_absyn.c | 41 +++++++++++++++++++++++++--- doc/recordmodel.xml | 19 +++++++++++-- tab/Makefile.am | 4 +-- tab/marc21.abs | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 7 deletions(-) create mode 100644 tab/marc21.abs diff --git a/NEWS b/NEWS index 338d857..7f86505 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +Added 'melm' directive to absyn format to simplify config files +for MARC-style databases. See tab/marc21.abs for an example. Added support for special slement set _sysno_ which returns a record ID for a record packed as a SUTRS record. diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 2da5e89..af85c0a 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.9.2.1 2004-08-24 14:06:31 adam Exp $ +/* $Id: d1_absyn.c,v 1.9.2.2 2005-06-09 22:08:10 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -450,6 +450,33 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, return 0; } +/* quinn + * Converts a 'melm' field[$subfield] pattern to a simple xpath + */ +static int melm2xpath(char *melm, char *buf) +{ + char *dollar; + char *field = melm; + char *subfield; + char *fieldtype; + if ((dollar = index(melm, '$'))) { + *dollar = '\0'; + subfield = ++dollar; + } else + subfield = ""; + if (field[0] == '0' && field[1] == '0') + fieldtype = "controlfield"; + else + fieldtype = "datafield"; + sprintf(buf, "/*/%s[@tag=\"%s\"]", fieldtype, field); + if (*subfield) + sprintf(buf + strlen(buf), "/subfield[@code=\"%s\"]", subfield); + else if (field[0] != '0' || field[1] != '0') + strcat(buf, "/subfield"); + yaz_log(YLOG_DEBUG, "Created xpath: '%s'", buf); + return 0; +} + const char *data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value) { @@ -692,20 +719,28 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, pop, 2003-01-17 */ - else if (!strcmp(cmd, "xelm")) { + else if (!strcmp(cmd, "xelm") || !strcmp(cmd, "melm")) { int i; char *p, *xpath_expr, *termlists; const char *regexp; struct DFA *dfa = dfa = dfa_init(); data1_termlist **tp; + char melm_xpath[128]; if (argc < 3) { yaz_log(LOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno); continue; } - xpath_expr = argv[1]; + + if (!strcmp(cmd, "melm")) { + if (melm2xpath(argv[1], melm_xpath) < 0) + continue; + xpath_expr = melm_xpath; + } else { + xpath_expr = argv[1]; + } termlists = argv[2]; regexp = mk_xpath_regexp(dh, xpath_expr); i = dfa_parse (dfa, ®exp); diff --git a/doc/recordmodel.xml b/doc/recordmodel.xml index b83fa67..408494f 100644 --- a/doc/recordmodel.xml +++ b/doc/recordmodel.xml @@ -1,5 +1,5 @@ - + The Record Model @@ -1040,7 +1040,22 @@ - + + melm field$subfield attributes + + + This directive is specifically for MARC-formatted records, + ingested either in the form of MARCXML documents, or in the + ISO2709/Z39.2 format using the grs.marcxml input filter. You can + specify indexing rules for any subfield, or you can leave off the + $subfield part and specify default rules + for all subfields of the given field (note: default rules should come + after any subfield-specific rules in the configuration file). The + attributes have the same syntax and meaning + as for the 'elm' directive above. + + + encoding encodingname diff --git a/tab/Makefile.am b/tab/Makefile.am index 9ce2b6b..30d5761 100644 --- a/tab/Makefile.am +++ b/tab/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.5 2004-05-21 13:25:17 adam Exp $ +## $Id: Makefile.am,v 1.5.2.1 2005-06-09 22:08:11 adam Exp $ tabdatadir = $(pkgdatadir)/tab tabdata_DATA = bib1.att dan1.att danbib.abs danmarc.abs danmarc.mar \ @@ -9,7 +9,7 @@ tabdata_DATA = bib1.att dan1.att danbib.abs danmarc.abs danmarc.mar \ nwi.flt refer.flt scan.chr sgml.flt soif.flt string.chr summary.abs \ summary.tag tagsetg.tag tagsetm.tag urx.chr usmarc.abs usmarc-b.est \ usmarc.flt usmarc.mar usmarc.tag var1.var wais.abs wais-b.est \ - wais-variant.est + wais-variant.est marc21.abs EXTRA_DIST = $(tabdata_DATA) diff --git a/tab/marc21.abs b/tab/marc21.abs new file mode 100644 index 0000000..a36ab78 --- /dev/null +++ b/tab/marc21.abs @@ -0,0 +1,75 @@ +# $Id: marc21.abs,v 1.2.2.1 2005-06-09 22:08:11 adam Exp $ + +# This is a fairly simple example of a set of MARC21 indexing rules. It +# results in a server which provides a passable Bath level 0 and 1 service +# (author, title, subject, keyword and exact services). Feel free to +# elaborate on it, and if you do, please consider sharing your additions. +# NOTE: This is designed to be used with the grs.marcxml input filter +# for ISO2709 (ANSI Z39.2) or grs.xml for MARCXML-formatted records. It +# won't work for the old grs.marc input filter, which yields a different +# internal structure. + +name marc21 +attset bib1.att + +esetname F @ +esetname B @ + +marc usmarc.mar + +xpath disable + +all any + +melm 100 author,author:p +melm 110 author +melm 111 author +melm 130 title +melm 240 title,title:p +melm 242 title,title:p +melm 243 title,title:p +melm 245$c author +melm 245 title,title:p +melm 246 title,title:p +melm 247 title,title:p +melm 400$t title,author +melm 400 author +melm 410$t title,author +melm 410 author +melm 411$t title,author +melm 411 author +melm 440$a title,title:p +melm 440 title +melm 490$a title,title:p +melm 490 title +melm 600$t title +melm 600 subject-heading,subject-heading:p +melm 610$t title +melm 610 subject-heading +melm 611$t title +melm 611 subject-heading +melm 630 subject-heading +melm 650 subject-heading,subject-heading:p +melm 651 subject-heading,subject-heading:p +melm 653 subject-heading,subject-heading:p +melm 654 subject-heading +melm 655 subject-heading +melm 656 subject-heading +melm 657 subject-heading +melm 700$t title,author +melm 700$a author,author:p +melm 700 author +melm 710$t title,author +melm 710$a author,author:p +melm 710 author +melm 711$t title,author +melm 711 author +melm 730 title +melm 740 title +melm 800$t title,author +melm 800 author +melm 810$t title,author +melm 810 author +melm 811$t title,author +melm 811 author +melm 830 title -- 1.7.10.4