X-Git-Url: http://sru.miketaylor.org.uk/?a=blobdiff_plain;f=src%2Ffilter_http_rewrite.cpp;h=bf01abae23a461ebbb71a5897b326f3c086b5951;hb=bb6b58cbec0e41a8ba2403e540185e77882e8741;hp=3fd5fc2c51c3c3feb5b94c3fdc8583194bf4317b;hpb=3ea5cbf51b66f787daac04cd6c625c217e09080e;p=metaproxy-moved-to-github.git diff --git a/src/filter_http_rewrite.cpp b/src/filter_http_rewrite.cpp index 3fd5fc2..bf01aba 100644 --- a/src/filter_http_rewrite.cpp +++ b/src/filter_http_rewrite.cpp @@ -21,19 +21,18 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include "filter_http_rewrite.hpp" +#include "html_parser.hpp" #include #include +#include #include #include +#include #include -#if HAVE_SYS_TYPES_H -#include -#endif - namespace mp = metaproxy_1; namespace yf = mp::filter; @@ -47,7 +46,7 @@ namespace metaproxy_1 { const std::string search_replace( std::map & vars, const std::string & txt) const; - std::string sub_vars ( + std::string sub_vars( const std::map & vars) const; void parse_groups(); }; @@ -64,12 +63,15 @@ namespace metaproxy_1 { std::string header; std::string attr; std::string tag; + bool reqline; RulePtr rule; }; - class HttpRewrite::Section { + class HttpRewrite::Phase { public: + Phase(); std::list within_list; + int m_verbose; void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq, std::map & vars) const; void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, @@ -78,11 +80,29 @@ namespace metaproxy_1 { char **content_buf, int *content_len, std::map & vars) const; }; + class HttpRewrite::Event : public HTMLParserEvent { + void openTagStart(const char *tag, int tag_len); + void anyTagEnd(const char *tag, int tag_len, int close_it); + void attribute(const char *tag, int tag_len, + const char *attr, int attr_len, + const char *value, int val_len, + const char *sep); + void closeTag(const char *tag, int tag_len); + void text(const char *value, int len); + const Phase *m_phase; + WRBUF m_w; + std::stack::const_iterator> s_within; + std::map &m_vars; + public: + Event(const Phase *p, std::map &vars); + ~Event(); + const char *result(); + }; } } yf::HttpRewrite::HttpRewrite() : - req_section(new Section), res_section(new Section) + req_phase(new Phase), res_phase(new Phase) { } @@ -101,10 +121,10 @@ void yf::HttpRewrite::process(mp::Package & package) const { Z_HTTP_Request *hreq = gdu->u.HTTP_Request; mp::odr o; - req_section->rewrite_reqline(o, hreq, vars); + req_phase->rewrite_reqline(o, hreq, vars); yaz_log(YLOG_LOG, ">> Request headers"); - req_section->rewrite_headers(o, hreq->headers, vars); - req_section->rewrite_body(o, + req_phase->rewrite_headers(o, hreq->headers, vars); + req_phase->rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars); package.request() = gdu; } @@ -116,14 +136,14 @@ void yf::HttpRewrite::process(mp::Package & package) const yaz_log(YLOG_LOG, "Response code %d", hres->code); mp::odr o; yaz_log(YLOG_LOG, "<< Respose headers"); - res_section->rewrite_headers(o, hres->headers, vars); - res_section->rewrite_body(o, &hres->content_buf, + res_phase->rewrite_headers(o, hres->headers, vars); + res_phase->rewrite_body(o, &hres->content_buf, &hres->content_len, vars); package.response() = gdu; } } -void yf::HttpRewrite::Section::rewrite_reqline (mp::odr & o, +void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, std::map & vars) const { @@ -143,79 +163,227 @@ void yf::HttpRewrite::Section::rewrite_reqline (mp::odr & o, path += hreq->path; } - std::list::const_iterator it = within_list.begin(); - if (it != within_list.end()) - { - RulePtr rule = it->rule; - yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); - std::string npath = rule->test_patterns(vars, path); - if (!npath.empty()) + std::list::const_iterator it = within_list.begin(); + for (; it != within_list.end(); it++) + if (it->reqline) { - yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str()); - hreq->path = odr_strdup(o, npath.c_str()); + RulePtr rule = it->rule; + yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); + std::string npath = rule->test_patterns(vars, path); + if (!npath.empty()) + { + yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str()); + hreq->path = odr_strdup(o, npath.c_str()); + } } - } } -void yf::HttpRewrite::Section::rewrite_headers(mp::odr & o, +void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, std::map & vars) const { - for (Z_HTTP_Header *header = headers; - header != 0; - header = header->next) + for (Z_HTTP_Header *header = headers; header; header = header->next) { - std::string sheader(header->name); - sheader += ": "; - sheader += header->value; - yaz_log(YLOG_LOG, "%s: %s", header->name, header->value); - std::list::const_iterator it = within_list.begin(); - if (it == within_list.end()) - continue; - RulePtr rule = it->rule; - - std::string out = rule->test_patterns(vars, sheader); - if (!out.empty()) + for (; it != within_list.end(); it++) { - size_t pos = out.find(": "); - if (pos == std::string::npos) + if (it->header.length() > 0 && + yaz_strcasecmp(it->header.c_str(), header->name) == 0) { - yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring"); - continue; + std::string sheader(header->name); + sheader += ": "; + sheader += header->value; + + RulePtr rule = it->rule; + std::string out = rule->test_patterns(vars, sheader); + if (!out.empty()) + { + size_t pos = out.find(": "); + if (pos == std::string::npos) + { + yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring"); + continue; + } + header->name = odr_strdup(o, out.substr(0, pos).c_str()); + header->value = odr_strdup(o, + out.substr(pos + 2, + std::string::npos).c_str()); + } } - header->name = odr_strdup(o, out.substr(0, pos).c_str()); - header->value = odr_strdup(o, out.substr(pos+2, - std::string::npos).c_str()); } } } -void yf::HttpRewrite::Section::rewrite_body(mp::odr & o, +void yf::HttpRewrite::Phase::rewrite_body(mp::odr & o, char **content_buf, int *content_len, std::map & vars) const { if (*content_buf) { + int i; + for (i = 0; i < *content_len; i++) + if ((*content_buf)[i] == 0) + return; // binary content. skip - std::list::const_iterator it = within_list.begin(); - if (it != within_list.end()) + HTMLParser parser; + Event ev(this, vars); + + parser.set_verbose(m_verbose); + + std::string buf(*content_buf, *content_len); + + parser.parse(ev, buf.c_str()); + const char *res = ev.result(); + *content_buf = odr_strdup(o, res); + *content_len = strlen(res); + } +} + +yf::HttpRewrite::Event::Event(const Phase *p, + std::map & vars + ) : m_phase(p), m_vars(vars) +{ + m_w = wrbuf_alloc(); +} + +yf::HttpRewrite::Event::~Event() +{ + wrbuf_destroy(m_w); +} + +const char *yf::HttpRewrite::Event::result() +{ + return wrbuf_cstr(m_w); +} + +void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len) +{ + wrbuf_putc(m_w, '<'); + wrbuf_write(m_w, tag, tag_len); + + std::string t(tag, tag_len); + std::list::const_iterator it = m_phase->within_list.begin(); + for (; it != m_phase->within_list.end(); it++) + { + if (it->tag.length() > 0 && yaz_strcasecmp(it->tag.c_str(), + t.c_str()) == 0) { - RulePtr rule = it->rule; + std::vector attr; + boost::split(attr, it->attr, boost::is_any_of(",")); + size_t i; + for (i = 0; i < attr.size(); i++) + { + if (attr[i].compare("#text") == 0) + { + s_within.push(it); + return; + } + } + } + } +} + +void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len, + int close_it) +{ + if (close_it) + { + if (!s_within.empty()) + { + std::list::const_iterator it = s_within.top(); + std::string t(tag, tag_len); + if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + s_within.pop(); + } + } + if (close_it) + wrbuf_putc(m_w, '/'); + wrbuf_putc(m_w, '>'); +} - std::string body(*content_buf); - std::string nbody = rule->test_patterns(vars, body); - if (!nbody.empty()) +void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len, + const char *attr, int attr_len, + const char *value, int val_len, + const char *sep) +{ + std::list::const_iterator it = m_phase->within_list.begin(); + bool subst = false; + + for (; it != m_phase->within_list.end(); it++) + { + std::string t(tag, tag_len); + if (it->tag.length() == 0 || + yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + { + std::string a(attr, attr_len); + std::vector attr; + boost::split(attr, it->attr, boost::is_any_of(",")); + size_t i; + for (i = 0; i < attr.size(); i++) { - *content_buf = odr_strdup(o, nbody.c_str()); - *content_len = nbody.size(); + if (attr[i].compare("#text") && + yaz_strcasecmp(attr[i].c_str(), a.c_str()) == 0) + subst = true; } } + if (subst) + break; } + + wrbuf_putc(m_w, ' '); + wrbuf_write(m_w, attr, attr_len); + if (value) + { + wrbuf_puts(m_w, "="); + wrbuf_puts(m_w, sep); + + std::string output; + if (subst) + { + std::string input(value, val_len); + output = it->rule->test_patterns(m_vars, input); + } + if (output.empty()) + wrbuf_write(m_w, value, val_len); + else + wrbuf_puts(m_w, output.c_str()); + wrbuf_puts(m_w, sep); + } +} + +void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len) +{ + if (!s_within.empty()) + { + std::list::const_iterator it = s_within.top(); + std::string t(tag, tag_len); + if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + s_within.pop(); + } + wrbuf_puts(m_w, "::const_iterator it = m_phase->within_list.end(); + if (!s_within.empty()) + it = s_within.top(); + std::string output; + if (it != m_phase->within_list.end()) + { + std::string input(value, len); + output = it->rule->test_patterns(m_vars, input); + } + if (output.empty()) + wrbuf_write(m_w, value, len); + else + wrbuf_puts(m_w, output.c_str()); +} + + /** * Tests pattern from the vector in order and executes recipe on the first match. @@ -255,8 +423,7 @@ const std::string yf::HttpRewrite::Replace::search_replace( = group_index.find(i); if (it != group_index.end()) { //it is - if (!what[i].str().empty()) - vars[it->second] = what[i]; + vars[it->second] = what[i]; } } @@ -337,8 +504,8 @@ void yf::HttpRewrite::Replace::parse_groups() regex = res; } -std::string yf::HttpRewrite::Replace::sub_vars ( - const std::map & vars) const +std::string yf::HttpRewrite::Replace::sub_vars( + const std::map & vars) const { std::string out; bool esc = false; @@ -387,10 +554,19 @@ std::string yf::HttpRewrite::Replace::sub_vars ( return out; } +yf::HttpRewrite::Phase::Phase() : m_verbose(0) +{ +} -void yf::HttpRewrite::configure_section(const xmlNode *ptr, - Section §ion) +void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) { + static const char *names[2] = { "verbose", 0 }; + std::string values[1]; + values[0] = "0"; + mp::xml::parse_attr(ptr, names, values); + + phase.m_verbose = atoi(values[0].c_str()); + std::map rules; for (ptr = ptr->children; ptr; ptr = ptr->next) { @@ -436,14 +612,13 @@ void yf::HttpRewrite::configure_section(const xmlNode *ptr, + std::string((const char *) p->name) + " in http_rewrite filter"); } - if (!rule->replace_list.empty()) - rules[values[0]] = rule; + rules[values[0]] = rule; } else if (!strcmp((const char *) ptr->name, "within")) { - static const char *names[5] = - { "header", "attr", "tag", "rule", 0 }; - std::string values[4]; + static const char *names[6] = + { "header", "attr", "tag", "rule", "reqline", 0 }; + std::string values[5]; mp::xml::parse_attr(ptr, names, values); Within w; w.header = values[0]; @@ -456,7 +631,8 @@ void yf::HttpRewrite::configure_section(const xmlNode *ptr, ("Reference to non-existing rule '" + values[3] + "' in http_rewrite filter"); w.rule = it->second; - section.within_list.push_back(w); + w.reqline = values[4] == "1"; + phase.within_list.push_back(w); } else { @@ -477,11 +653,11 @@ void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only, continue; else if (!strcmp((const char *) ptr->name, "request")) { - configure_section(ptr, *req_section); + configure_phase(ptr, *req_phase); } else if (!strcmp((const char *) ptr->name, "response")) { - configure_section(ptr, *res_section); + configure_phase(ptr, *res_phase); } else {