Added test of DOM filter and use of input chain + snippets.
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 9 Jul 2008 08:58:14 +0000 (10:58 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 9 Jul 2008 08:58:14 +0000 (10:58 +0200)
test/xslt/Makefile.am
test/xslt/dcaddmeta.xsl [new file with mode: 0644]
test/xslt/dctoindex.xsl [new file with mode: 0644]
test/xslt/dom1.c
test/xslt/gpdctodc.xsl [new file with mode: 0644]
test/xslt/gutenberg-sample.xml [new file with mode: 0644]
test/xslt/gutenberg.xml [new file with mode: 0644]

index 60c8351..052f1b6 100644 (file)
@@ -14,6 +14,11 @@ EXTRA_DIST= \
      dom-config-del.xml \
      dom-brief.xsl \
      dom-snippet.xsl \
+     gutenberg.xml \
+     gutenberg-sample.xml \
+     dcaddmeta.xsl \
+     gpdctodc.xsl \
+     dctoindex.xsl \
      del-col.xml \
      id.xsl \
      index.xsl \
diff --git a/test/xslt/dcaddmeta.xsl b/test/xslt/dcaddmeta.xsl
new file mode 100644 (file)
index 0000000..c41a2f6
--- /dev/null
@@ -0,0 +1,17 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:m="http://www.loc.gov/MARC21/slim"
+  xmlns:z="http://indexdata.com/zebra-2.0"
+  exclude-result-prefixes="m z"
+  version="1.0">
+  <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
+  
+  <xsl:template match="/*">
+    <srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-schema"
+              xmlns:dc="http://purl.org/dc/elements/1.1/"
+             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+      <xsl:copy-of select="*"/>
+      <z:meta element_set_name="snippet"/>
+    </srw_dc:dc>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/test/xslt/dctoindex.xsl b/test/xslt/dctoindex.xsl
new file mode 100644 (file)
index 0000000..106c947
--- /dev/null
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+
+<!-- This maps any root element containing elements in the DC namespace to an index structure
+-->
+
+<xsl:stylesheet
+  version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:pgterms="http://www.gutenberg.org/rdfterms/"
+  xmlns:z="http://indexdata.com/zebra-2.0"
+  exclude-result-prefixes="pgterms">
+
+  <!-- Extract sort keys in addition to word keys -->
+  <xsl:variable name="sort">|title|date|creator|</xsl:variable>
+  <!-- Extract phrase keys in addition to word keys -->
+  <xsl:variable name="phrase">|title|date|creator|</xsl:variable>
+
+  <xsl:output method="xml" indent="yes"/>
+
+  <xsl:template match="/ignore">
+    <z:record/>
+  </xsl:template>
+
+  <xsl:template match="/*">
+    <z:record>
+      <z:index name="any:w">
+        <xsl:apply-templates/>
+      </z:index>
+
+      <xsl:call-template name="special-indexes"/>
+
+      <!--
+      <z:index name="anywhere:w">
+        <xsl:value-of select="normalize-space()"/>
+      </z:index>
+      -->
+    </z:record>
+  </xsl:template>
+
+  <xsl:template match="/*/*[namespace-uri() = 'http://purl.org/dc/elements/1.1/']">
+    <z:index name="{local-name()}:w">
+      <xsl:value-of select="."/>
+    </z:index>
+  </xsl:template>
+
+  <xsl:template name="special-indexes">
+    <xsl:for-each select="/*/*">
+      <xsl:if test="contains($sort, local-name(.))">
+       <z:index name="{local-name(.)}:s">
+         <xsl:value-of select="."/>
+       </z:index>
+      </xsl:if>
+      <xsl:if test="contains($phrase, local-name(.))">
+       <z:index name="{local-name(.)}:p">
+         <xsl:value-of select="."/>
+       </z:index>
+      </xsl:if>
+    </xsl:for-each>
+  </xsl:template>
+
+  <xsl:template match="text()"/>
+</xsl:stylesheet>
index ded19e5..d68ec7a 100644 (file)
@@ -126,6 +126,18 @@ void tst(int argc, char **argv)
     YAZ_CHECK(tl_query(zh, "@attr 1=title 3", 1));
     YAZ_CHECK(tl_query(zh, "@attr 1=title b", 1));
 
+    zh = index_some(zs, "dom.gutenberg.xml", "gutenberg-sample.xml");
+    YAZ_CHECK(tl_query(zh, "selected", 1));
+
+    YAZ_CHECK_EQ(tl_fetch_first_compare(
+                     zh, "zebra::snippet", yaz_oid_recsyn_xml,
+                     "<record xmlns=\"http://www.indexdata.com/zebra/\">\n"
+                     "  <snippet name=\"any\" type=\"w\">etext/1338\n"
+                     "    Project Gutenberg\n"
+                     "    <s>Selected</s> Prose of Oscar Wilde</snippet>\n"
+                     "</record>"),
+                 ZEBRA_OK);
+
     zebra_close(zh);
 
 
diff --git a/test/xslt/gpdctodc.xsl b/test/xslt/gpdctodc.xsl
new file mode 100644 (file)
index 0000000..787364a
--- /dev/null
@@ -0,0 +1,100 @@
+<xsl:stylesheet
+  version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:pgterms="http://www.gutenberg.org/rdfterms/"
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  xmlns:dcterms="http://purl.org/dc/terms/"
+  xmlns:dc="http://purl.org/dc/elements/1.1/"
+  exclude-result-prefixes="pgterms rdf dcterms">
+
+  <xsl:output method="xml" indent="yes"/>
+
+  <!-- Record element -->
+  <xsl:template match="/pgterms:etext">
+    <srw_dc:dc
+       xmlns:srw_dc="info:srw/schema/1/dc-schema"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xsi:schemaLocation="info:srw/schema/1/dc-schema
+       http://www.loc.gov/standards/sru/dc-schema.xsd">
+
+      <!-- Generate GP permalink -->
+      <dc:identifier>
+        <xsl:text>http://www.gutenberg.org/etext/</xsl:text>
+       <xsl:value-of select="substring(@rdf:ID, 6)"/>
+      </dc:identifier>
+
+      <xsl:apply-templates/>
+
+    </srw_dc:dc>
+  </xsl:template>
+
+  <!-- Ignore other elements by mapping into empty DOM XML trees -->
+  <xsl:template match="/*"/>
+
+  <!-- Any DC element (except special cases below -->
+  <xsl:template match="/pgterms:etext/*[namespace-uri() = 'http://purl.org/dc/elements/1.1/']">
+    <xsl:choose>
+      <xsl:when test="rdf:Bag">
+        <xsl:variable name="myname" select="name()"/>
+        <xsl:for-each select="rdf:Bag/*">
+         <xsl:call-template name="cond-display">
+           <xsl:with-param name="name" select="$myname"/>
+           <xsl:with-param name="value" select="."/>
+         </xsl:call-template>
+       </xsl:for-each>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:call-template name="cond-display">
+         <xsl:with-param name="name" select="name()"/>
+         <xsl:with-param name="value" select="."/>
+       </xsl:call-template>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <!-- Display this element only if we know and like what kind of content it has -->
+  <xsl:template name="cond-display">
+    <xsl:param name="name"/>
+    <xsl:param name="value"/>
+
+    <xsl:choose>
+      <xsl:when test="$value/text()">
+        <xsl:element name="{$name}">
+         <xsl:value-of select="normalize-space($value)"/>
+       </xsl:element>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:choose>
+         <xsl:when test="dcterms:LCSH or dcterms:W3CDTF or dcterms:ISO639-2">
+           <xsl:element name="{$name}">
+             <xsl:value-of select="normalize-space($value)"/>
+           </xsl:element>
+         </xsl:when>
+         <xsl:otherwise>
+           <unknown-type name="{$name}" type="{local-name($value/*)}">
+             <xsl:value-of select="normalize-space($value)"/>
+           </unknown-type>
+         </xsl:otherwise>
+       </xsl:choose>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="//dc:rights">
+    <dc:rights>
+      <xsl:value-of select="@rdf:resource"/>
+    </dc:rights>
+  </xsl:template>
+
+  <!-- This is hardly a DC element -->
+  <xsl:template match="//dc:tableOfContents">
+    <tableOfContents>
+      <xsl:value-of select="."/>
+    </tableOfContents>
+  </xsl:template>
+
+  <xsl:template match="text()"/>
+
+</xsl:stylesheet>
+
diff --git a/test/xslt/gutenberg-sample.xml b/test/xslt/gutenberg-sample.xml
new file mode 100644 (file)
index 0000000..ca90edc
--- /dev/null
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE rdf:RDF [
+  <!ENTITY pg  "Project Gutenberg">
+  <!ENTITY lic "http://www.gutenberg.org/license">
+  <!ENTITY f   "http://www.gutenberg.org/dirs/">
+]>
+
+
+<pgterms:etext rdf:ID="etext1338"
+xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:dc="http://purl.org/dc/elements/1.1/"
+         xmlns:dcterms="http://purl.org/dc/terms/"
+         xmlns:dcmitype="http://purl.org/dc/dcmitype/"
+         xmlns:cc="http://web.resource.org/cc/"
+         xmlns:pgterms="http://www.gutenberg.org/rdfterms/"
+         xml:base="http://www.gutenberg.org/feeds/catalog.rdf">
+  <dc:publisher>&pg;</dc:publisher>
+  <dc:title rdf:parseType="Literal">Selected Prose of Oscar Wilde</dc:title>
+  <dc:tableOfContents rdf:parseType="Literal">Preface by Robert Ross -- How They Struck a Contemporary -- The Quality of George Meredith -- Life in the Fallacious Model -- Life the Disciple -- Life the Plagiarist -- The Indispensable East -- The Influence of the Impressionists on Climate --</dc:tableOfContents>
+  <dc:creator rdf:parseType="Literal">Wilde, Oscar, 1854-1900</dc:creator>
+  <pgterms:friendlytitle rdf:parseType="Literal">Selected Prose of Oscar Wilde by Oscar Wilde</pgterms:friendlytitle>
+  <dc:language><dcterms:ISO639-2><rdf:value>en</rdf:value></dcterms:ISO639-2></dc:language>
+  <dc:subject>
+    <rdf:Bag>
+      <rdf:li><dcterms:LCSH><rdf:value>Essays</rdf:value></dcterms:LCSH></rdf:li>
+      <rdf:li><dcterms:LCSH><rdf:value>Short stories</rdf:value></dcterms:LCSH></rdf:li>
+    </rdf:Bag>
+  </dc:subject>
+  <dc:subject><dcterms:LCC><rdf:value>PR</rdf:value></dcterms:LCC></dc:subject>
+  <dc:created><dcterms:W3CDTF><rdf:value>1998-06-01</rdf:value></dcterms:W3CDTF></dc:created>
+  <dc:rights rdf:resource="&lic;" />
+</pgterms:etext>
diff --git a/test/xslt/gutenberg.xml b/test/xslt/gutenberg.xml
new file mode 100644 (file)
index 0000000..b2a8b4f
--- /dev/null
@@ -0,0 +1,22 @@
+<dom xmlns="http://indexdata.com/zebra-2.0">
+  <input>
+    <xmlreader level="0"/>
+    <xslt stylesheet="gpdctodc.xsl"/>
+  </input>
+
+  <extract>
+    <xslt stylesheet="dctoindex.xsl"/>
+  </extract>
+
+  <retrieve name="dc"/>
+
+  <retrieve name="dcsnippets">
+    <xslt stylesheet="dcaddmeta.xsl"/>
+    <meta/>
+  </retrieve>
+
+  <retrieve name="index">
+    <xslt stylesheet="dctoindex.xsl"/>
+  </retrieve>
+
+</dom>