Retrieval transform operational
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index c2c33b8..b304074 100644 (file)
@@ -21,18 +21,38 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/zoom.h>
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
+#include "torus.hpp"
+
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
 
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
 #include <yaz/log.h>
 #include <yaz/zgdu.h>
+#include <yaz/querytowrbuf.h>
 
 namespace mp = metaproxy_1;
 namespace yf = mp::filter;
 
 namespace metaproxy_1 {
     namespace filter {
+        struct Zoom::Searchable {
+            std::string database;
+            std::string target;
+            std::string query_encoding;
+            std::string sru;
+            std::string request_syntax;
+            std::string element_set;
+            std::string record_encoding;
+            std::string transform_xsl_fname;
+            bool use_turbomarc;
+            bool piggyback;
+            Searchable();
+            ~Searchable();
+        };
         class Zoom::Backend {
             friend class Impl;
             friend class Frontend;
@@ -40,10 +60,18 @@ namespace metaproxy_1 {
             ZOOM_connection m_connection;
             ZOOM_resultset m_resultset;
             std::string m_frontend_database;
+            SearchablePtr sptr;
+            xsltStylesheetPtr xsp;
         public:
-            Backend();
+            Backend(SearchablePtr sptr);
             ~Backend();
-            void connect(std::string zurl);
+            void connect(std::string zurl, int *error, const char **addinfo);
+            void search_pqf(const char *pqf, Odr_int *hits,
+                            int *error, const char **addinfo);
+            void present(Odr_int start, Odr_int number, ZOOM_record *recs,
+                         int *error, const char **addinfo);
+            void set_option(const char *name, const char *value);
+            int get_error(const char **addinfo);
         };
         class Zoom::Frontend {
             friend class Impl;
@@ -51,28 +79,43 @@ namespace metaproxy_1 {
             bool m_is_virtual;
             bool m_in_use;
             yazpp_1::GDU m_init_gdu;
-            std::list<BackendPtr> m_backend_list;
+            BackendPtr m_backend;
             void handle_package(mp::Package &package);
             void handle_search(mp::Package &package);
             void handle_present(mp::Package &package);
-            BackendPtr get_backend_from_databases(std::string database);
+            BackendPtr get_backend_from_databases(std::string &database,
+                                                  int *error,
+                                                  const char **addinfo);
+            Z_Records *get_records(Odr_int start,
+                                   Odr_int number_to_present,
+                                   int *error,
+                                   const char **addinfo,
+                                   Odr_int *number_of_records_returned,
+                                   ODR odr, BackendPtr b,
+                                   Odr_oid *preferredRecordSyntax,
+                                   const char *element_set_name);
         public:
             Frontend(Impl *impl);
             ~Frontend();
         };
         class Zoom::Impl {
+            friend class Frontend;
         public:
             Impl();
             ~Impl();
             void process(metaproxy_1::Package & package);
-            void configure(const xmlNode * ptr);
+            void configure(const xmlNode * ptr, bool test_only);
         private:
             FrontendPtr get_frontend(mp::Package &package);
             void release_frontend(mp::Package &package);
+            void parse_torus(const xmlNode *ptr);
+
+            std::list<Zoom::SearchablePtr>m_searchables;
 
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
             boost::condition m_cond_session_ready;
+            mp::Torus torus;
         };
     }
 }
@@ -89,7 +132,7 @@ yf::Zoom::~Zoom()
 
 void yf::Zoom::configure(const xmlNode *xmlnode, bool test_only)
 {
-    m_p->configure(xmlnode);
+    m_p->configure(xmlnode, test_only);
 }
 
 void yf::Zoom::process(mp::Package &package) const
@@ -100,7 +143,7 @@ void yf::Zoom::process(mp::Package &package) const
 
 // define Implementation stuff
 
-yf::Zoom::Backend::Backend()
+yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr)
 {
     m_connection = ZOOM_connection_create(0);
     m_resultset = 0;
@@ -108,13 +151,58 @@ yf::Zoom::Backend::Backend()
 
 yf::Zoom::Backend::~Backend()
 {
+    if (xsp)
+        xsltFreeStylesheet(xsp);
     ZOOM_connection_destroy(m_connection);
     ZOOM_resultset_destroy(m_resultset);
 }
 
-void yf::Zoom::Backend::connect(std::string zurl)
+void yf::Zoom::Backend::connect(std::string zurl,
+                                int *error, const char **addinfo)
 {
     ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
+    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+}
+
+void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits,
+                                   int *error, const char **addinfo)
+{
+    m_resultset = ZOOM_connection_search_pqf(m_connection, pqf);
+    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    if (*error == 0)
+        *hits = ZOOM_resultset_size(m_resultset);
+    else
+        *hits = 0;
+}
+
+void yf::Zoom::Backend::present(Odr_int start, Odr_int number,
+                                ZOOM_record *recs,
+                                int *error, const char **addinfo)
+{
+    ZOOM_resultset_records(m_resultset, recs, start, number);
+    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+}
+
+void yf::Zoom::Backend::set_option(const char *name, const char *value)
+{
+    ZOOM_connection_option_set(m_connection, name, value);
+    if (m_resultset)
+        ZOOM_resultset_option_set(m_resultset, name, value);
+}
+
+int yf::Zoom::Backend::get_error(const char **addinfo)
+{
+    return ZOOM_connection_error(m_connection, 0, addinfo);
+}
+
+yf::Zoom::Searchable::Searchable()
+{
+    piggyback = true;
+    use_turbomarc = false;
+}
+
+yf::Zoom::Searchable::~Searchable()
+{
 }
 
 yf::Zoom::Frontend::Frontend(Impl *impl) : 
@@ -179,27 +267,381 @@ yf::Zoom::Impl::~Impl()
 { 
 }
 
-void yf::Zoom::Impl::configure(const xmlNode *xmlnode)
+void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
+{
+    if (!ptr1)
+        return ;
+    for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next)
+    {
+        if (ptr1->type != XML_ELEMENT_NODE)
+            continue;
+        if (!strcmp((const char *) ptr1->name, "record"))
+        {
+            const xmlNode *ptr2 = ptr1;
+            for (ptr2 = ptr2->children; ptr2; ptr2 = ptr2->next)
+            {
+                if (ptr2->type != XML_ELEMENT_NODE)
+                    continue;
+                if (!strcmp((const char *) ptr2->name, "layer"))
+                {
+                    Zoom::SearchablePtr s(new Searchable);
+
+                    const xmlNode *ptr3 = ptr2;
+                    for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
+                    {
+                        if (ptr3->type != XML_ELEMENT_NODE)
+                            continue;
+                        if (!strcmp((const char *) ptr3->name, "id"))
+                        {
+                            s->database = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name, "zurl"))
+                        {
+                            s->target = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name, "sru"))
+                        {
+                            s->sru = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "queryEncoding"))
+                        {
+                            s->query_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "piggyback"))
+                        {
+                            s->piggyback = mp::xml::get_bool(ptr3, true);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "requestSyntax"))
+                        {
+                            s->request_syntax = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "elementSet"))
+                        {
+                            s->element_set = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "recordEncoding"))
+                        {
+                            s->record_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "transform"))
+                        {
+                            s->transform_xsl_fname = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "useTurboMarc"))
+                        {
+                            yaz_log(YLOG_LOG, "seeing useTurboMarc");
+                            s->use_turbomarc = mp::xml::get_bool(ptr3, false);
+                            yaz_log(YLOG_LOG, "value=%s",
+                                    s->use_turbomarc ? "1" : "0");
+                                    
+                        }
+                    }
+                    if (s->database.length() && s->target.length())
+                    {
+                        yaz_log(YLOG_LOG, "add db=%s target=%s turbomarc=%s", 
+                                s->database.c_str(), s->target.c_str(),
+                                s->use_turbomarc ? "1" : "0");
+                        m_searchables.push_back(s);
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
 {
+    for (ptr = ptr->children; ptr; ptr = ptr->next)
+    {
+        if (ptr->type != XML_ELEMENT_NODE)
+            continue;
+        if (!strcmp((const char *) ptr->name, "records"))
+        {
+            parse_torus(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name, "torus"))
+        {
+            std::string url;
+            const struct _xmlAttr *attr;
+            for (attr = ptr->properties; attr; attr = attr->next)
+            {
+                if (!strcmp((const char *) attr->name, "url"))
+                    url = mp::xml::get_text(attr->children);
+                else
+                    throw mp::filter::FilterException(
+                        "Bad attribute " + std::string((const char *)
+                                                       attr->name));
+            }
+            torus.read_searchables(url);
+            xmlDoc *doc = torus.get_doc();
+            if (doc)
+            {
+                xmlNode *ptr = xmlDocGetRootElement(doc);
+                parse_torus(ptr);
+            }
+        }
+        else
+        {
+            throw mp::filter::FilterException
+                ("Bad element " 
+                 + std::string((const char *) ptr->name)
+                 + " in zoom filter");
+        }
+    }
 }
 
 yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
-    std::string database)
+    std::string &database, int *error, const char **addinfo)
 {
     std::list<BackendPtr>::const_iterator map_it;
-    map_it = m_backend_list.begin();
-    for (; map_it != m_backend_list.end(); map_it++)
-        if ((*map_it)->m_frontend_database == database)
-            return *map_it;
+    if (m_backend && m_backend->m_frontend_database == database)
+        return m_backend;
+
+    std::list<Zoom::SearchablePtr>::iterator map_s =
+        m_p->m_searchables.begin();
 
-    BackendPtr b(new Backend);
+    std::string c_db = mp::util::database_name_normalize(database);
+
+    while (map_s != m_p->m_searchables.end())
+    {
+        if (c_db.compare((*map_s)->database) == 0)
+            break;
+        map_s++;
+    }
+    if (map_s == m_p->m_searchables.end())
+    {
+        *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+        *addinfo = database.c_str();
+        BackendPtr b;
+        return b;
+    }
 
-    std::string url = "localhost:9999/" + database;
-    yaz_log(YLOG_LOG, "new backend url=%s", url.c_str());
-    b->connect(url);
+    xsltStylesheetPtr xsp = 0;
+    if ((*map_s)->transform_xsl_fname.length())
+    {
+        xmlDoc *xsp_doc = xmlParseFile((*map_s)->transform_xsl_fname.c_str());
+        if (!xsp_doc)
+        {
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            *addinfo = "xmlParseFile failed";
+            BackendPtr b;
+            return b;
+        }
+        xsp = xsltParseStylesheetDoc(xsp_doc);
+        if (!xsp)
+        {
+            *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+            *addinfo = "xsltParseStylesheetDoc failed";
+            BackendPtr b;
+            xmlFreeDoc(xsp_doc);
+            return b;
+        }
+    }
+
+    SearchablePtr sptr = *map_s;
+
+    m_backend.reset();
+
+    BackendPtr b(new Backend(sptr));
+
+    b->xsp = xsp;
+    b->m_frontend_database = database;
+
+    if (sptr->query_encoding.length())
+        b->set_option("rpnCharset", sptr->query_encoding.c_str());
+
+    std::string url;
+    if (sptr->sru.length())
+    {
+        url = "http://" + sptr->target;
+        b->set_option("sru", sptr->sru.c_str());
+    }
+    else
+        url = sptr->target;
+
+    b->connect(url, error, addinfo);
+    if (*error == 0)
+    {
+        m_backend = b;
+    }
     return b;
 }
 
+Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
+                                           Odr_int number_to_present,
+                                           int *error,
+                                           const char **addinfo,
+                                           Odr_int *number_of_records_returned,
+                                           ODR odr,
+                                           BackendPtr b,
+                                           Odr_oid *preferredRecordSyntax,
+                                           const char *element_set_name)
+{
+    *number_of_records_returned = 0;
+    Z_Records *records = 0;
+    bool enable_pz2_transform = false;
+
+    if (start < 0 || number_to_present <= 0)
+        return records;
+    
+    if (number_to_present > 10000)
+        number_to_present = 10000;
+    
+    ZOOM_record *recs = (ZOOM_record *)
+        odr_malloc(odr, number_to_present * sizeof(*recs));
+
+    char oid_name_str[OID_STR_MAX];
+    const char *syntax_name = 0;
+
+    if (preferredRecordSyntax)
+    {
+        if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
+            && !strcmp(element_set_name, "pz2"))
+        {
+            if (b->sptr->request_syntax.length())
+            {
+                syntax_name = b->sptr->request_syntax.c_str();
+                enable_pz2_transform = true;
+            }
+        }
+        else
+        {
+            syntax_name =
+                yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+        }
+    }
+
+    yaz_log(YLOG_LOG, "enable_pz2_transform %s", enable_pz2_transform ?
+            "enabled" : "disabled");
+
+    b->set_option("preferredRecordSyntax", syntax_name);
+
+    if (enable_pz2_transform)
+    {
+        element_set_name = "F";
+        if (b->sptr->element_set.length())
+            element_set_name = b->sptr->element_set.c_str();
+    }
+
+    b->set_option("elementSetName", element_set_name);
+
+    b->present(start, number_to_present, recs, error, addinfo);
+
+    Odr_int i = 0;
+    if (!*error)
+    {
+        for (i = 0; i < number_to_present; i++)
+            if (!recs[i])
+                break;
+    }
+    if (i > 0)
+    {  // only return records if no error and at least one record
+        char *odr_database = odr_strdup(odr,
+                                        b->m_frontend_database.c_str());
+        Z_NamePlusRecordList *npl = (Z_NamePlusRecordList *)
+            odr_malloc(odr, sizeof(*npl));
+        *number_of_records_returned = i;
+        npl->num_records = i;
+        npl->records = (Z_NamePlusRecord **)
+            odr_malloc(odr, i * sizeof(*npl->records));
+        for (i = 0; i < number_to_present; i++)
+        {
+            Z_NamePlusRecord *npr = 0;
+            const char *addinfo;
+            int sur_error = ZOOM_record_error(recs[i], 0 /* msg */,
+                                              &addinfo, 0 /* diagset */);
+                
+            if (sur_error)
+            {
+                npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
+                                            addinfo);
+            }
+            else if (enable_pz2_transform)
+            {
+                char rec_type_str[100];
+
+                strcpy(rec_type_str, b->sptr->use_turbomarc ?
+                       "txml" : "xml");
+                
+                // prevent buffer overflow ...
+                if (b->sptr->record_encoding.length() > 0 &&
+                    b->sptr->record_encoding.length() < 
+                    (sizeof(rec_type_str)-20))
+                {
+                    strcat(rec_type_str, "; charset=");
+                    strcat(rec_type_str, b->sptr->record_encoding.c_str());
+                }
+                
+                int rec_len;
+                const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
+                                                      &rec_len);
+                if (rec_buf && b->xsp)
+                {
+                    xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len);
+                    if (rec_doc)
+                    { 
+                        xmlDoc *rec_res;
+                        rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0);
+
+                        if (rec_res)
+                            xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len,
+                                                   rec_res, b->xsp);
+                    }
+                }
+
+                if (rec_buf)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord =
+                        z_ext_record_xml(odr, rec_buf, rec_len);
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        rec_type_str);
+                }
+            }
+            else
+            {
+                Z_External *ext =
+                    (Z_External *) ZOOM_record_get(recs[i], "ext", 0);
+                if (ext)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord = ext;
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        "ZOOM_record, type ext");
+                }
+            }
+            npl->records[i] = npr;
+        }
+        records = (Z_Records*) odr_malloc(odr, sizeof(*records));
+        records->which = Z_Records_DBOSD;
+        records->u.databaseOrSurDiagnostics = npl;
+    }
+    return records;
+}
+    
+
 void yf::Zoom::Frontend::handle_search(mp::Package &package)
 {
     Z_GDU *gdu = package.request().get();
@@ -214,38 +656,102 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         package.response() = apdu_res;
         return;
     }
-    BackendPtr b = get_backend_from_databases(sr->databaseNames[0]);
-    switch (sr->query->which)
+
+    int error = 0;
+    const char *addinfo = 0;
+    std::string db(sr->databaseNames[0]);
+    BackendPtr b = get_backend_from_databases(db, &error, &addinfo);
+    if (error)
     {
-    case Z_Query_type_1:
-    case Z_Query_type_101:
         apdu_res = 
             odr.create_searchResponse(
-                apdu_req,
-                YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
-                "search filter do not handle type-1/type-101 yet");
+                apdu_req, error, addinfo);
         package.response() = apdu_res;
-        break;
-    default:
+        return;
+    }
+
+    b->set_option("setname", "default");
+
+    Odr_int hits = 0;
+    Z_Query *query = sr->query;
+    if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
+    {
+        WRBUF w = wrbuf_alloc();
+        yaz_rpnquery_to_wrbuf(w, query->u.type_1);
+
+        b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
+        wrbuf_destroy(w);
+    }
+    else
+    {
         apdu_res = 
-            odr.create_searchResponse(
-                apdu_req,
-                YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
+            odr.create_searchResponse(apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
         package.response() = apdu_res;
         return;
     }
+    
+    const char *element_set_name = 0;
+    Odr_int number_to_present = 0;
+    if (!error)
+        mp::util::piggyback_sr(sr, hits, number_to_present, &element_set_name);
+    
+    Odr_int number_of_records_returned = 0;
+    Z_Records *records = get_records(
+        0, number_to_present, &error, &addinfo,
+        &number_of_records_returned, odr, b, sr->preferredRecordSyntax,
+        element_set_name);
+    apdu_res = odr.create_searchResponse(apdu_req, error, addinfo);
+    if (records)
+    {
+        apdu_res->u.searchResponse->records = records;
+        apdu_res->u.searchResponse->numberOfRecordsReturned =
+            odr_intdup(odr, number_of_records_returned);
+    }
+    apdu_res->u.searchResponse->resultCount = odr_intdup(odr, hits);
+    package.response() = apdu_res;
 }
 
 void yf::Zoom::Frontend::handle_present(mp::Package &package)
 {
     Z_GDU *gdu = package.request().get();
     Z_APDU *apdu_req = gdu->u.z3950;
+    Z_APDU *apdu_res = 0;
+    Z_PresentRequest *pr = apdu_req->u.presentRequest;
+
     mp::odr odr;
-    package.response() = odr.create_close(
-        apdu_req,
-        Z_Close_protocolError,
-        "zoom filter has not implemented present request yet");
-    package.session().close();
+    if (!m_backend)
+    {
+        package.response() = odr.create_presentResponse(
+            apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, 0);
+        return;
+    }
+    const char *element_set_name = 0;
+    Z_RecordComposition *comp = pr->recordComposition;
+    if (comp && comp->which != Z_RecordComp_simple)
+    {
+        package.response() = odr.create_presentResponse(
+            apdu_req, 
+            YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP, 0);
+        return;
+    }
+    if (comp && comp->u.simple->which == Z_ElementSetNames_generic)
+        element_set_name = comp->u.simple->u.generic;
+    Odr_int number_of_records_returned = 0;
+    int error = 0;
+    const char *addinfo = 0;
+    Z_Records *records = get_records(
+        *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested,
+        &error, &addinfo, &number_of_records_returned, odr, m_backend,
+        pr->preferredRecordSyntax, element_set_name);
+
+    apdu_res = odr.create_presentResponse(apdu_req, error, addinfo);
+    if (records)
+    {
+        apdu_res->u.presentResponse->records = records;
+        apdu_res->u.presentResponse->numberOfRecordsReturned =
+            odr_intdup(odr, number_of_records_returned);
+    }
+    package.response() = apdu_res;
 }
 
 void yf::Zoom::Frontend::handle_package(mp::Package &package)