X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=src%2Ffilter_zoom.cpp;h=44986d65392234d86cce0855c197e6045a3cc7d7;hb=ec6d4b11a9ea36bb2b02f8def619d3eaa59e00e3;hp=436444ef1e13aeb7745f900d887cc963685d7045;hpb=7dcf766ccdb4773635481a86b1a43c3c86c32d42;p=metaproxy-moved-to-github.git diff --git a/src/filter_zoom.cpp b/src/filter_zoom.cpp index 436444e..44986d6 100644 --- a/src/filter_zoom.cpp +++ b/src/filter_zoom.cpp @@ -19,12 +19,20 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "config.hpp" #include "filter_zoom.hpp" #include +#include #include #include #include "torus.hpp" +#include +#include + #include #include +#include +#include +#include +#include #include #include #include @@ -36,34 +44,49 @@ namespace yf = mp::filter; namespace metaproxy_1 { namespace filter { - struct Zoom::Searchable { + struct Zoom::Searchable : boost::noncopyable { + std::string authentication; + std::string cfAuth; + std::string cfProxy; + std::string cfSubDb; std::string database; std::string target; std::string query_encoding; std::string sru; + std::string request_syntax; + std::string element_set; + std::string record_encoding; + std::string transform_xsl_fname; + bool use_turbomarc; bool piggyback; + CCL_bibset ccl_bibset; Searchable(); ~Searchable(); }; - class Zoom::Backend { + class Zoom::Backend : boost::noncopyable { friend class Impl; friend class Frontend; std::string zurl; ZOOM_connection m_connection; ZOOM_resultset m_resultset; std::string m_frontend_database; + SearchablePtr sptr; + xsltStylesheetPtr xsp; public: - Backend(); + Backend(SearchablePtr sptr); ~Backend(); void connect(std::string zurl, int *error, const char **addinfo); void search_pqf(const char *pqf, Odr_int *hits, int *error, const char **addinfo); + void search_cql(const char *cql, Odr_int *hits, + int *error, const char **addinfo); void present(Odr_int start, Odr_int number, ZOOM_record *recs, int *error, const char **addinfo); void set_option(const char *name, const char *value); + const char *get_option(const char *name); int get_error(const char **addinfo); }; - class Zoom::Frontend { + class Zoom::Frontend : boost::noncopyable { friend class Impl; Impl *m_p; bool m_is_virtual; @@ -98,14 +121,14 @@ namespace metaproxy_1 { private: FrontendPtr get_frontend(mp::Package &package); void release_frontend(mp::Package &package); - void parse_torus(const xmlNode *ptr); - - std::listm_searchables; - + SearchablePtr parse_torus(const xmlNode *ptr); + struct cql_node *convert_cql_fields(struct cql_node *cn, ODR odr); std::map m_clients; boost::mutex m_mutex; boost::condition m_cond_session_ready; - mp::Torus torus; + std::string torus_url; + std::map fieldmap; + std::string xsldir; }; } } @@ -133,14 +156,17 @@ void yf::Zoom::process(mp::Package &package) const // define Implementation stuff -yf::Zoom::Backend::Backend() +yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr) { m_connection = ZOOM_connection_create(0); m_resultset = 0; + xsp = 0; } yf::Zoom::Backend::~Backend() { + if (xsp) + xsltFreeStylesheet(xsp); ZOOM_connection_destroy(m_connection); ZOOM_resultset_destroy(m_resultset); } @@ -163,6 +189,22 @@ void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits, *hits = 0; } +void yf::Zoom::Backend::search_cql(const char *cql, Odr_int *hits, + int *error, const char **addinfo) +{ + ZOOM_query q = ZOOM_query_create(); + + ZOOM_query_cql(q, cql); + + m_resultset = ZOOM_connection_search(m_connection, q); + ZOOM_query_destroy(q); + *error = ZOOM_connection_error(m_connection, 0, addinfo); + if (*error == 0) + *hits = ZOOM_resultset_size(m_resultset); + else + *hits = 0; +} + void yf::Zoom::Backend::present(Odr_int start, Odr_int number, ZOOM_record *recs, int *error, const char **addinfo) @@ -178,6 +220,11 @@ void yf::Zoom::Backend::set_option(const char *name, const char *value) ZOOM_resultset_option_set(m_resultset, name, value); } +const char *yf::Zoom::Backend::get_option(const char *name) +{ + return ZOOM_connection_option_get(m_connection, name); +} + int yf::Zoom::Backend::get_error(const char **addinfo) { return ZOOM_connection_error(m_connection, 0, addinfo); @@ -186,10 +233,13 @@ int yf::Zoom::Backend::get_error(const char **addinfo) yf::Zoom::Searchable::Searchable() { piggyback = true; + use_turbomarc = true; + ccl_bibset = ccl_qual_mk(); } yf::Zoom::Searchable::~Searchable() { + ccl_qual_rm(&ccl_bibset); } yf::Zoom::Frontend::Frontend(Impl *impl) : @@ -254,10 +304,11 @@ yf::Zoom::Impl::~Impl() { } -void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1) +yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus(const xmlNode *ptr1) { + SearchablePtr notfound; if (!ptr1) - return ; + return notfound; for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next) { if (ptr1->type != XML_ELEMENT_NODE) @@ -271,79 +322,139 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1) continue; if (!strcmp((const char *) ptr2->name, "layer")) { - Zoom::Searchable s; + Zoom::SearchablePtr s(new Searchable); const xmlNode *ptr3 = ptr2; for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next) { if (ptr3->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr3->name, "id")) + if (!strcmp((const char *) ptr3->name, + "authentication")) + { + s->authentication = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "cfAuth")) + { + s->cfAuth = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "cfProxy")) + { + s->cfProxy = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "cfSubDb")) { - s.database = mp::xml::get_text(ptr3); + s->cfSubDb = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, "id")) + { + s->database = mp::xml::get_text(ptr3); } else if (!strcmp((const char *) ptr3->name, "zurl")) { - s.target = mp::xml::get_text(ptr3); + s->target = mp::xml::get_text(ptr3); } else if (!strcmp((const char *) ptr3->name, "sru")) { - s.sru = mp::xml::get_text(ptr3); + s->sru = mp::xml::get_text(ptr3); } else if (!strcmp((const char *) ptr3->name, "queryEncoding")) { - s.query_encoding = mp::xml::get_text(ptr3); + s->query_encoding = mp::xml::get_text(ptr3); } else if (!strcmp((const char *) ptr3->name, "piggyback")) { - s.piggyback = mp::xml::get_bool(ptr3, true); + s->piggyback = mp::xml::get_bool(ptr3, true); + } + else if (!strcmp((const char *) ptr3->name, + "requestSyntax")) + { + s->request_syntax = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "elementSet")) + { + s->element_set = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "recordEncoding")) + { + s->record_encoding = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "transform")) + { + s->transform_xsl_fname = mp::xml::get_text(ptr3); + } + else if (!strcmp((const char *) ptr3->name, + "useTurboMarc")) + { + ; // useTurboMarc is ignored + } + else if (!strncmp((const char *) ptr3->name, + "cclmap_", 7)) + { + std::string value = mp::xml::get_text(ptr3); + ccl_qual_fitem(s->ccl_bibset, value.c_str(), + (const char *) ptr3->name + 7); } } - if (s.database.length() && s.target.length()) - { - yaz_log(YLOG_LOG, "add db=%s target=%s", - s.database.c_str(), s.target.c_str()); - m_searchables.push_back(s); - } + return s; } } } } + return notfound; } - void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only) { for (ptr = ptr->children; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "records")) - { - parse_torus(ptr); - } else if (!strcmp((const char *) ptr->name, "torus")) { - std::string url; const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { if (!strcmp((const char *) attr->name, "url")) - url = mp::xml::get_text(attr->children); + torus_url = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "xsldir")) + xsldir = mp::xml::get_text(attr->children); else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) attr->name)); } - torus.read_searchables(url); - xmlDoc *doc = torus.get_doc(); - if (doc) + } + else if (!strcmp((const char *) ptr->name, "fieldmap")) + { + const struct _xmlAttr *attr; + std::string ccl_field; + std::string cql_field; + for (attr = ptr->properties; attr; attr = attr->next) { - xmlNode *ptr = xmlDocGetRootElement(doc); - parse_torus(ptr); + if (!strcmp((const char *) attr->name, "ccl")) + ccl_field = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "cql")) + cql_field = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); } + if (cql_field.length()) + fieldmap[cql_field] = ccl_field; + } + else if (!strcmp((const char *) ptr->name, "records")) + { + yaz_log(YLOG_WARN, "records ignored!"); } else { @@ -362,43 +473,121 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases( if (m_backend && m_backend->m_frontend_database == database) return m_backend; - std::list::iterator map_s = - m_p->m_searchables.begin(); - - std::string c_db = mp::util::database_name_normalize(database); - - while (map_s != m_p->m_searchables.end()) + std::string db_args; + std::string cf_parm; + std::string torus_db; + size_t db_arg_pos = database.find(','); + if (db_arg_pos != std::string::npos) { - if (c_db.compare(map_s->database) == 0) - break; - map_s++; + torus_db = database.substr(0, db_arg_pos); + db_args = database.substr(db_arg_pos+1); } - if (map_s == m_p->m_searchables.end()) + else + torus_db = database; + + xmlDoc *doc = mp::get_searchable(m_p->torus_url, torus_db); + if (!doc) { *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; *addinfo = database.c_str(); BackendPtr b; return b; } + SearchablePtr sptr = m_p->parse_torus(xmlDocGetRootElement(doc)); + xmlFreeDoc(doc); + if (!sptr) + { + *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; + *addinfo = database.c_str(); + BackendPtr b; + return b; + } + + xsltStylesheetPtr xsp = 0; + if (sptr->transform_xsl_fname.length()) + { + std::string fname; + + if (m_p->xsldir.length()) + fname = m_p->xsldir + "/" + sptr->transform_xsl_fname; + else + fname = sptr->transform_xsl_fname; + xmlDoc *xsp_doc = xmlParseFile(fname.c_str()); + if (!xsp_doc) + { + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + *addinfo = "xmlParseFile failed"; + BackendPtr b; + return b; + } + xsp = xsltParseStylesheetDoc(xsp_doc); + if (!xsp) + { + *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; + *addinfo = "xsltParseStylesheetDoc failed"; + BackendPtr b; + xmlFreeDoc(xsp_doc); + return b; + } + } m_backend.reset(); - BackendPtr b(new Backend); + BackendPtr b(new Backend(sptr)); + b->xsp = xsp; b->m_frontend_database = database; + std::string authentication = sptr->authentication; + + if (sptr->query_encoding.length()) + b->set_option("rpnCharset", sptr->query_encoding.c_str()); + + if (sptr->cfAuth.length()) + { + b->set_option("user", sptr->cfAuth.c_str()); + if (authentication.length()) + { + size_t found = authentication.find('/'); + if (found != std::string::npos) + { + cf_parm += "user=" + mp::util::uri_encode(authentication.substr(0, found)) + + "&password=" + mp::util::uri_encode(authentication.substr(found+1)); + } + else + cf_parm += "user=" + mp::util::uri_encode(authentication); + } + } + else if (authentication.length()) + b->set_option("user", authentication.c_str()); - if (map_s->query_encoding.length()) - b->set_option("rpnCharset", map_s->query_encoding.c_str()); + if (sptr->cfProxy.length()) + { + if (cf_parm.length()) + cf_parm += "&"; + cf_parm += "proxy=" + mp::util::uri_encode(sptr->cfProxy); + } + if (sptr->cfSubDb.length()) + { + if (cf_parm.length()) + cf_parm += "&"; + cf_parm += "subdatabase=" + mp::util::uri_encode(sptr->cfSubDb); + } std::string url; - if (map_s->sru.length()) + if (sptr->sru.length()) { - url = "http://" + map_s->target; - b->set_option("sru", map_s->sru.c_str()); + url = "http://" + sptr->target; + b->set_option("sru", sptr->sru.c_str()); } else - url = map_s->target; - + { + url = sptr->target; + } + if (db_args.length()) + url += "," + db_args; + else if (cf_parm.length()) + url += "," + cf_parm; + yaz_log(YLOG_LOG, "url=%s", url.c_str()); b->connect(url, error, addinfo); if (*error == 0) { @@ -419,6 +608,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, { *number_of_records_returned = 0; Z_Records *records = 0; + bool enable_pz2_transform = false; if (start < 0 || number_to_present <= 0) return records; @@ -433,10 +623,33 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, const char *syntax_name = 0; if (preferredRecordSyntax) - syntax_name = - yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str); + { + if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml) + && element_set_name && + !strcmp(element_set_name, "pz2")) + { + if (b->sptr->request_syntax.length()) + { + syntax_name = b->sptr->request_syntax.c_str(); + enable_pz2_transform = true; + } + } + else + { + syntax_name = + yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str); + } + } + b->set_option("preferredRecordSyntax", syntax_name); - + + if (enable_pz2_transform) + { + element_set_name = "F"; + if (b->sptr->element_set.length()) + element_set_name = b->sptr->element_set.c_str(); + } + b->set_option("elementSetName", element_set_name); b->present(start, number_to_present, recs, error, addinfo); @@ -470,17 +683,73 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, npr = zget_surrogateDiagRec(odr, odr_database, sur_error, addinfo); } + else if (enable_pz2_transform) + { + char rec_type_str[100]; + + strcpy(rec_type_str, b->sptr->use_turbomarc ? + "txml" : "xml"); + + // prevent buffer overflow ... + if (b->sptr->record_encoding.length() > 0 && + b->sptr->record_encoding.length() < + (sizeof(rec_type_str)-20)) + { + strcat(rec_type_str, "; charset="); + strcat(rec_type_str, b->sptr->record_encoding.c_str()); + } + + int rec_len; + const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str, + &rec_len); + if (rec_buf && b->xsp) + { + xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len); + if (rec_doc) + { + xmlDoc *rec_res; + rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0); + + if (rec_res) + xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len, + rec_res, b->xsp); + } + } + + if (rec_buf) + { + npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr)); + npr->databaseName = odr_database; + npr->which = Z_NamePlusRecord_databaseRecord; + npr->u.databaseRecord = + z_ext_record_xml(odr, rec_buf, rec_len); + } + else + { + npr = zget_surrogateDiagRec( + odr, odr_database, + YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, + rec_type_str); + } + } else { - npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr)); Z_External *ext = (Z_External *) ZOOM_record_get(recs[i], "ext", 0); - npr->databaseName = odr_database; if (ext) { + npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr)); + npr->databaseName = odr_database; npr->which = Z_NamePlusRecord_databaseRecord; npr->u.databaseRecord = ext; } + else + { + npr = zget_surrogateDiagRec( + odr, odr_database, + YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, + "ZOOM_record, type ext"); + } } npl->records[i] = npr; } @@ -491,6 +760,38 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, return records; } +struct cql_node *yf::Zoom::Impl::convert_cql_fields(struct cql_node *cn, + ODR odr) +{ + struct cql_node *r = 0; + if (!cn) + return 0; + switch (cn->which) + { + case CQL_NODE_ST: + if (cn->u.st.index) + { + std::map::const_iterator it; + it = fieldmap.find(cn->u.st.index); + if (it == fieldmap.end()) + return cn; + if (it->second.length()) + cn->u.st.index = odr_strdup(odr, it->second.c_str()); + else + cn->u.st.index = 0; + } + break; + case CQL_NODE_BOOL: + r = convert_cql_fields(cn->u.boolean.left, odr); + if (!r) + r = convert_cql_fields(cn->u.boolean.right, odr); + break; + case CQL_NODE_SORT: + r = convert_cql_fields(cn->u.sort.search, odr); + break; + } + return r; +} void yf::Zoom::Frontend::handle_search(mp::Package &package) { @@ -524,13 +825,73 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) Odr_int hits = 0; Z_Query *query = sr->query; + WRBUF ccl_wrbuf = 0; + WRBUF pqf_wrbuf = 0; + if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101) { - WRBUF w = wrbuf_alloc(); - yaz_rpnquery_to_wrbuf(w, query->u.type_1); + // RPN + pqf_wrbuf = wrbuf_alloc(); + yaz_rpnquery_to_wrbuf(pqf_wrbuf, query->u.type_1); + } + else if (query->which == Z_Query_type_2) + { + // CCL + ccl_wrbuf = wrbuf_alloc(); + wrbuf_write(ccl_wrbuf, (const char *) query->u.type_2->buf, + query->u.type_2->len); + } + else if (query->which == Z_Query_type_104 && + query->u.type_104->which == Z_External_CQL) + { + // CQL + const char *cql = query->u.type_104->u.cql; + CQL_parser cp = cql_parser_create(); + int r = cql_parser_string(cp, cql); + if (r) + { + cql_parser_destroy(cp); + apdu_res = + odr.create_searchResponse(apdu_req, + YAZ_BIB1_MALFORMED_QUERY, + "CQL syntax error"); + package.response() = apdu_res; + return; + } + struct cql_node *cn = cql_parser_result(cp); + struct cql_node *cn_error = m_p->convert_cql_fields(cn, odr); + if (cn_error) + { + // hopefully we are getting a ptr to a index+relation+term node + addinfo = 0; + if (cn_error->which == CQL_NODE_ST) + addinfo = cn_error->u.st.index; + + apdu_res = + odr.create_searchResponse(apdu_req, + YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, + addinfo); + package.response() = apdu_res; + return; + } + char ccl_buf[1024]; - b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo); - wrbuf_destroy(w); + r = cql_to_ccl_buf(cn, ccl_buf, sizeof(ccl_buf)); + if (r == 0) + { + ccl_wrbuf = wrbuf_alloc(); + wrbuf_puts(ccl_wrbuf, ccl_buf); + } + cql_parser_destroy(cp); + if (r) + { + apdu_res = + odr.create_searchResponse(apdu_req, + YAZ_BIB1_MALFORMED_QUERY, + "CQL to CCL conversion error"); + package.response() = apdu_res; + return; + } } else { @@ -539,6 +900,80 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) package.response() = apdu_res; return; } + + if (ccl_wrbuf) + { + // CCL to PQF + assert(pqf_wrbuf == 0); + int cerror, cpos; + struct ccl_rpn_node *cn; + yaz_log(YLOG_LOG, "CCL: %s", wrbuf_cstr(ccl_wrbuf)); + cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(ccl_wrbuf), + &cerror, &cpos); + wrbuf_destroy(ccl_wrbuf); + if (!cn) + { + char *addinfo = odr_strdup(odr, ccl_err_msg(cerror)); + int z3950_diag = YAZ_BIB1_MALFORMED_QUERY; + + switch (cerror) + { + case CCL_ERR_UNKNOWN_QUAL: + z3950_diag = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; + break; + case CCL_ERR_TRUNC_NOT_LEFT: + case CCL_ERR_TRUNC_NOT_RIGHT: + case CCL_ERR_TRUNC_NOT_BOTH: + z3950_diag = YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE; + break; + } + apdu_res = + odr.create_searchResponse(apdu_req, z3950_diag, addinfo); + package.response() = apdu_res; + return; + } + pqf_wrbuf = wrbuf_alloc(); + ccl_pquery(pqf_wrbuf, cn); + ccl_rpn_delete(cn); + } + + assert(pqf_wrbuf); + if (b->get_option("sru")) + { + cql_transform_t cqlt = cql_transform_create(); + Z_RPNQuery *zquery; + WRBUF wrb = wrbuf_alloc(); + int status; + + zquery = p_query_rpn(odr, wrbuf_cstr(pqf_wrbuf)); + status = cql_transform_rpn2cql_wrbuf(cqlt, wrb, zquery); + + cql_transform_close(cqlt); + + if (status == 0) + { + yaz_log(YLOG_LOG, "search CQL: %s", wrbuf_cstr(wrb)); + b->search_cql(wrbuf_cstr(wrb), &hits, &error, &addinfo); + } + + wrbuf_destroy(wrb); + wrbuf_destroy(pqf_wrbuf); + if (status) + { + apdu_res = + odr.create_searchResponse(apdu_req, YAZ_BIB1_MALFORMED_QUERY, + "can not convert from RPN to CQL"); + package.response() = apdu_res; + return; + } + } + else + { + yaz_log(YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf)); + b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo); + wrbuf_destroy(pqf_wrbuf); + } + const char *element_set_name = 0; Odr_int number_to_present = 0;