From: Adam Dickmeiss Date: Wed, 29 Feb 2012 13:17:41 +0000 (+0100) Subject: query_rewrite: charset conversion of query terms X-Git-Tag: v1.3.26~32 X-Git-Url: http://sru.miketaylor.org.uk/?a=commitdiff_plain;h=fbd550a4d263e71607b13109ad8f7d92dfcbc741;p=metaproxy-moved-to-github.git query_rewrite: charset conversion of query terms This is an optional feature. Enabled by element in configuration with attribute 'from' and 'to' that specifies from/to encodings. The default 'from' encoding is UTF-8. --- diff --git a/etc/config5.xml b/etc/config5.xml index be9cc9e..d963e0c 100644 --- a/etc/config5.xml +++ b/etc/config5.xml @@ -13,15 +13,14 @@ F %Y%m%d-%H%M%S - my.log + B %Y%m%d-%H%M%S - my.log diff --git a/src/filter_query_rewrite.cpp b/src/filter_query_rewrite.cpp index 0538c7b..6b2bfb9 100644 --- a/src/filter_query_rewrite.cpp +++ b/src/filter_query_rewrite.cpp @@ -23,9 +23,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "filter_query_rewrite.hpp" +#include #include #include #include +#include #include #include @@ -43,13 +45,14 @@ namespace metaproxy_1 { void configure(const xmlNode * ptr); private: xsltStylesheetPtr m_stylesheet; - }; + std::string charset_from; + std::string charset_to; + }; } } -yf::QueryRewrite::Rep::Rep() +yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8") { - m_stylesheet = 0; } yf::QueryRewrite::Rep::~Rep() @@ -91,17 +94,17 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const mp::odr odr; Z_SearchRequest *req = apdu_req->u.searchRequest; - xmlDocPtr doc_input = 0; - yaz_query2xml(req->query, &doc_input); - - if (!doc_input) - { - error_code = YAZ_BIB1_MALFORMED_QUERY; - addinfo = "converion from Query to XML failed"; - } - else + if (m_stylesheet) { - if (m_stylesheet) + xmlDocPtr doc_input = 0; + yaz_query2xml(req->query, &doc_input); + + if (!doc_input) + { + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "converion from Query to XML failed"; + } + else { xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet, doc_input, 0); @@ -117,10 +120,27 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const &error_code, &addinfo); xmlFreeDoc(doc_res); } + xmlFreeDoc(doc_input); + } + } + if (charset_to.length() && charset_from.length() && + (req->query->which == Z_Query_type_1 + || req->query->which == Z_Query_type_101)) + { + yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(), + charset_from.c_str()); + if (cd) + { + int r = yaz_query_charset_convert_rpnquery_check( + req->query->u.type_1, odr, cd); + yaz_iconv_close(cd); + if (r) + { /* query could not be char converted */ + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "could not convert query to target charset"; + } } - xmlFreeDoc(doc_input); } - package.request() = gdu; if (error_code) { Z_APDU *f_apdu = @@ -128,6 +148,7 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const package.response() = f_apdu; return; } + package.request() = gdu; } } package.move(); @@ -140,7 +161,7 @@ void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr) if (ptr->type != XML_ELEMENT_NODE) continue; - if (mp::xml::check_element_mp(ptr, "xslt")) + if (mp::xml::is_element_mp(ptr, "xslt")) { if (m_stylesheet) { @@ -173,6 +194,25 @@ void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr) + "' in query_rewrite filter"); } } + else if (mp::xml::is_element_mp(ptr, "charset")) + { + for (struct _xmlAttr *attr = ptr->properties; + attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "from")) + { + charset_from = mp::xml::get_text(attr); + } + else if (!strcmp((const char *) attr->name, "to")) + { + charset_to = mp::xml::get_text(attr); + } + else + throw mp::filter::FilterException + ("Invalid attribute inside charset inside " + "query_rewrite filter"); + } + } else { throw mp::filter::FilterException diff --git a/xml/schema/filter_query_rewrite.rnc b/xml/schema/filter_query_rewrite.rnc index 2d91d24..77a1c3a 100644 --- a/xml/schema/filter_query_rewrite.rnc +++ b/xml/schema/filter_query_rewrite.rnc @@ -8,5 +8,8 @@ filter_query_rewrite = attribute name { xsd:NCName }?, element mp:xslt { attribute stylesheet { xsd:string } - } - + }?, + element mp:charset { + attribute from { xsd:string }?, + attribute to { xsd:string }? + }?