Factor out record normalization
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 7 Oct 2009 12:19:14 +0000 (14:19 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 7 Oct 2009 12:19:14 +0000 (14:19 +0200)
src/Makefile.am
src/logic.c
src/normalize_record.c [new file with mode: 0644]
src/normalize_record.h [new file with mode: 0644]
src/pazpar2.h
src/pazpar2_config.h
win/makefile

index fbb7b16..6c846ea 100644 (file)
@@ -27,7 +27,7 @@ libpazpar2_a_SOURCES = pazpar2_config.c pazpar2_config.h eventl.c eventl.h \
        charsets.c charsets.h \
        client.c client.h connection.c connection.h host.h parameters.h \
        dirent.c direntz.h marcmap.c marcmap.h marchash.c marchash.h \
-       jenkins_hash.c jenkins_hash.h
+       jenkins_hash.c jenkins_hash.h normalize_record.c normalize_record.h
 
 pazpar2_SOURCES = pazpar2.c
 pazpar2_LDADD = libpazpar2.a $(YAZLIB)
index 09fb3fd..e393214 100644 (file)
@@ -68,7 +68,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "client.h"
 #include "settings.h"
 #include "normalize7bit.h"
-#include "marcmap.h"
 
 #define TERMLIST_HIGH_SCORE 25
 
@@ -244,52 +243,28 @@ static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
 xmlDoc *normalize_record(struct session_database *sdb, struct session *se,
                          const char *rec)
 {
-    struct database_retrievalmap *m;
     xmlDoc *rdoc = record_to_xml(sdb, rec);
+
     if (rdoc)
     {
-        for (m = sdb->map; m; m = m->next)
+        char *parms[MAX_XSLT_ARGS*2+1];
+        
+        insert_settings_parameters(sdb, se, parms);
+        
+        if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
         {
-            xmlDoc *new = 0;
+            yaz_log(YLOG_WARN, "Normalize failed from %s", sdb->database->url);
+        }
+        else
+        {
+            insert_settings_values(sdb, rdoc, se->service);
             
+            if (global_parameters.dump_records)
             {
-                xmlNodePtr root = 0;
-                char *parms[MAX_XSLT_ARGS*2+1];
-
-                insert_settings_parameters(sdb, se, parms);
-
-                if (m->stylesheet)
-                {
-                    new = xsltApplyStylesheet(m->stylesheet, rdoc, (const char **) parms);
-                }
-                else if (m->marcmap)
-                {
-                    new = marcmap_apply(m->marcmap, rdoc);
-                }
-
-                root = xmlDocGetRootElement(new);
-
-                if (!new || !root || !(root->children))
-                {
-                    yaz_log(YLOG_WARN, "XSLT transformation failed from %s",
-                            sdb->database->url);
-                    xmlFreeDoc(new);
-                    xmlFreeDoc(rdoc);
-                    return 0;
-                }
+                yaz_log(YLOG_LOG, "Normalized record from %s", 
+                        sdb->database->url);
+                log_xml_doc(rdoc);
             }
-            
-            xmlFreeDoc(rdoc);
-            rdoc = new;
-        }
-
-        insert_settings_values(sdb, rdoc, se->service);
-
-        if (global_parameters.dump_records)
-        {
-            yaz_log(YLOG_LOG, "Normalized record from %s", 
-                    sdb->database->url);
-            log_xml_doc(rdoc);
         }
     }
     return rdoc;
@@ -320,9 +295,6 @@ static int prepare_map(struct session *se, struct session_database *sdb)
     }
     if ((s = session_setting_oneval(sdb, PZ_XSLT)))
     {
-        char **stylesheets;
-        struct database_retrievalmap **m = &sdb->map;
-        int num, i;
         char auto_stylesheet[256];
 
         if (!strcmp(s, "auto"))
@@ -347,46 +319,8 @@ static int prepare_map(struct session *se, struct session_database *sdb)
                 yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet");
             }
         }
-        nmem_strsplit(se->session_nmem, ",", s, &stylesheets, &num);
-        for (i = 0; i < num; i++)
-        {
-            WRBUF fname = conf_get_fname(se->service, stylesheets[i]);
-            
-            (*m) = nmem_malloc(se->session_nmem, sizeof(**m));
-            (*m)->next = 0;
-            
-            // XSLT
-            if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-4], ".xsl")) 
-            {    
-                (*m)->marcmap = NULL;
-                if (!((*m)->stylesheet =
-                      xsltParseStylesheetFile((xmlChar *) wrbuf_cstr(fname))))
-                {
-                    yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load stylesheet: %s",
-                            stylesheets[i]);
-                    wrbuf_destroy(fname);
-                    return -1;
-                }
-            }
-            // marcmap
-            else if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-5], ".mmap"))
-            {
-                (*m)->stylesheet = NULL;
-                if (!((*m)->marcmap = marcmap_load(wrbuf_cstr(fname), se->session_nmem)))
-                {
-                    yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load marcmap: %s",
-                            stylesheets[i]);
-                    wrbuf_destroy(fname);
-                    return -1;
-                }
-            }
-            wrbuf_destroy(fname);
-            m = &(*m)->next;
-        }
+        sdb->map = normalize_record_create(se->service, s);
     }
-    if (!sdb->map)
-        yaz_log(YLOG_WARN, "No Normalization stylesheet for target %s",
-                sdb->database->url);
     return 0;
 }
 
@@ -618,10 +552,8 @@ static void session_init_databases_fun(void *context, struct database *db)
 // Doesn't free memory associated with sdb -- nmem takes care of that
 static void session_database_destroy(struct session_database *sdb)
 {
-    struct database_retrievalmap *m;
-
-    for (m = sdb->map; m; m = m->next)
-        xsltFreeStylesheet(m->stylesheet);
+    normalize_record_destroy(sdb->map);
+    sdb->map = 0;
 }
 
 // Initialize session_database list -- this represents this session's view
@@ -692,10 +624,7 @@ void session_apply_setting(struct session *se, char *dbname, char *setting,
     case PZ_XSLT:
         if (sdb->map)
         {
-            struct database_retrievalmap *m;
-            // We don't worry about the map structure -- it's in nmem
-            for (m = sdb->map; m; m = m->next)
-                xsltFreeStylesheet(m->stylesheet);
+            normalize_record_destroy(sdb->map);
             sdb->map = 0;
         }
         break;
diff --git a/src/normalize_record.c b/src/normalize_record.c
new file mode 100644 (file)
index 0000000..c7706e5
--- /dev/null
@@ -0,0 +1,176 @@
+/* This file is part of Pazpar2.
+   Copyright (C) 2006-2009 Index Data
+
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+*/
+
+#include <string.h>
+
+#include <yaz/yaz-util.h>
+#include <yaz/nmem.h>
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "normalize_record.h"
+
+#include "pazpar2_config.h"
+
+#include "marcmap.h"
+#include <libxslt/xslt.h>
+#include <libxslt/transform.h>
+
+struct normalize_step {
+    struct normalize_step *next;
+    xsltStylesheet *stylesheet;
+    struct marcmap *marcmap;
+};
+
+struct normalize_record_s {
+    struct normalize_step *steps;
+    char *spec;
+    NMEM nmem;
+};
+
+const char *normalize_record_get_spec(normalize_record_t nt)
+{
+    if (nt)
+        return nt->spec;
+    return 0;
+}
+
+normalize_record_t normalize_record_create(struct conf_service *service,
+                                           const char *spec)
+{
+    normalize_record_t nt = xmalloc(sizeof(*nt));
+    struct normalize_step **m;
+    int i, num;
+    int no_errors = 0;
+    char **stylesheets;
+
+    nt->nmem = nmem_create();
+
+    nt->spec = nmem_strdup(nt->nmem, spec);
+
+    m = &nt->steps;
+
+    nmem_strsplit(nt->nmem, ",", spec, &stylesheets, &num);
+    for (i = 0; i < num; i++)
+    {
+        WRBUF fname = conf_get_fname(service, stylesheets[i]);
+        
+        *m = nmem_malloc(nt->nmem, sizeof(**m));
+        (*m)->marcmap = NULL;
+        (*m)->stylesheet = NULL;
+        
+        // XSLT
+        if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-4], ".xsl")) 
+        {    
+            if (!((*m)->stylesheet =
+                  xsltParseStylesheetFile((xmlChar *) wrbuf_cstr(fname))))
+            {
+                yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load stylesheet: %s",
+                        stylesheets[i]);
+                no_errors++;
+            }
+        }
+        // marcmap
+        else if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-5], ".mmap"))
+        {
+            if (!((*m)->marcmap = marcmap_load(wrbuf_cstr(fname), nt->nmem)))
+            {
+                yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load marcmap: %s",
+                        stylesheets[i]);
+                no_errors++;
+            }
+        }
+        else
+        {
+            yaz_log(YLOG_FATAL, "Cannot handle stylesheet: %s", stylesheets[i]);
+            no_errors++;
+        }
+
+        wrbuf_destroy(fname);
+        m = &(*m)->next;
+    }
+    *m = 0;  /* terminate list of steps */
+
+    if (no_errors)
+    {
+        normalize_record_destroy(nt);
+        nt = 0;
+    }
+    return nt;
+}
+
+void normalize_record_destroy(normalize_record_t nt)
+{
+    if (nt)
+    {
+        struct normalize_step *m;
+        for (m = nt->steps; m; m = m->next)
+        {
+            if (m->stylesheet)
+                xsltFreeStylesheet(m->stylesheet);
+        }
+        nmem_destroy(nt->nmem);
+
+        xfree(nt);
+    }
+}
+
+int normalize_record_transform(normalize_record_t nt, xmlDoc **doc,
+    const char **parms)
+{
+    struct normalize_step *m;
+    for (m = nt->steps; m; m = m->next)
+    {
+        xmlNodePtr root = 0;
+        xmlDoc *new;
+        if (m->stylesheet)
+        {
+            new = xsltApplyStylesheet(m->stylesheet, *doc, parms);
+        }
+        else if (m->marcmap)
+        {
+            new = marcmap_apply(m->marcmap, *doc);
+        }
+        
+        root = xmlDocGetRootElement(new);
+        
+        if (!new || !root || !root->children)
+        {
+            if (new)
+                xmlFreeDoc(new);
+            xmlFreeDoc(*doc);
+            return -1;
+        }
+        xmlFreeDoc(*doc);
+        *doc = new;
+    }
+    return 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
diff --git a/src/normalize_record.h b/src/normalize_record.h
new file mode 100644 (file)
index 0000000..25f803f
--- /dev/null
@@ -0,0 +1,45 @@
+/* This file is part of Pazpar2.
+   Copyright (C) 2006-2009 Index Data
+
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+*/
+
+#ifndef NORMALIZE_RECORD_H
+#define NORMALIZE_RECORD_H
+typedef struct normalize_record_s *normalize_record_t;
+
+struct conf_service;
+
+normalize_record_t normalize_record_create(struct conf_service *service,
+                                           const char *spec);
+
+const char *normalize_record_get_spec(normalize_record_t nt);
+void normalize_record_destroy(normalize_record_t nt);
+
+int normalize_record_transform(normalize_record_t nt, xmlDoc **doc,
+    const char **parms);
+
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index b6771d1..f599f2d 100644 (file)
@@ -80,21 +80,13 @@ struct database_criterion {
     struct database_criterion *next;
 };
 
-// Normalization filter. Turns incoming record into internal representation
-// Simple sequence of stylesheets run in series.
-struct database_retrievalmap {
-    xsltStylesheet *stylesheet;
-    struct marcmap *marcmap;
-    struct database_retrievalmap *next;
-};
-
 // Represents a database as viewed from one session, possibly with settings overriden
 // for that session
 struct session_database
 {
     struct database *database;
     struct setting **settings;
-    struct database_retrievalmap *map;
+    normalize_record_t map;
     struct session_database *next;
 };
 
index 044eff7..1fd5d55 100644 (file)
@@ -20,8 +20,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #ifndef PAZPAR2_CONFIG_H
 #define PAZPAR2_CONFIG_H
 
-#include <libxslt/xslt.h>
-#include <libxslt/transform.h>
+#include "normalize_record.h"
 
 #include <yaz/nmem.h>
 #include "charsets.h"
index 2cd6557..8d3478d 100644 (file)
@@ -199,6 +199,7 @@ PAZPAR2_OBJS = \
    "$(OBJDIR)\jenkins_hash.obj" \
    "$(OBJDIR)\marcmap.obj" \
    "$(OBJDIR)\marchash.obj" \
+   "$(OBJDIR)\normalize_record.obj" \
    "$(OBJDIR)\connection.obj"