From fe8ab21a7dfdd452298ec356395b2956c25d18fa Mon Sep 17 00:00:00 2001 From: Jakub Skoczen Date: Mon, 10 Jun 2013 17:28:10 +0200 Subject: [PATCH] Refactor HTTP rewrite filter and tests --- src/filter_http_rewrite.cpp | 247 ++++++++++++++++++++++--------------------- src/filter_http_rewrite.hpp | 52 ++------- src/test_filter_rewrite.cpp | 51 +++++---- 3 files changed, 168 insertions(+), 182 deletions(-) diff --git a/src/filter_http_rewrite.cpp b/src/filter_http_rewrite.cpp index 249a757..55ae35c 100644 --- a/src/filter_http_rewrite.cpp +++ b/src/filter_http_rewrite.cpp @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#include +#include #include #if HAVE_SYS_TYPES_H @@ -38,7 +38,47 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA namespace mp = metaproxy_1; namespace yf = mp::filter; -yf::HttpRewrite::HttpRewrite() +namespace metaproxy_1 { + namespace filter { + class HttpRewrite::RuleScope { + public: + std::vector tags; + std::vector attrs; + std::string content_type; + }; + class HttpRewrite::Rule { + public: + enum Section { METHOD, HEADER, BODY }; + std::string regex; + std::string recipe; + std::map group_index; + std::vector scopes; + Section section; + const std::string search_replace( + std::map & vars, + const std::string & txt) const; + std::string sub_vars ( + const std::map & vars) const; + void parse_groups(); + }; + class HttpRewrite::Rules { + public: + std::vector rules; + void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, + std::map & vars) const; + void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, + std::map & vars) const; + void rewrite_body (mp::odr & o, + char **content_buf, int *content_len, + std::map & vars) const; + const std::string test_patterns( + std::map & vars, + const std::string & txt) const; + }; + } +} + +yf::HttpRewrite::HttpRewrite() : req_rules(new Rules), res_rules(new Rules) { } @@ -57,11 +97,12 @@ void yf::HttpRewrite::process(mp::Package & package) const { Z_HTTP_Request *hreq = gdu->u.HTTP_Request; mp::odr o; - rewrite_reqline(o, hreq, vars); + req_rules->rewrite_reqline(o, hreq, vars); yaz_log(YLOG_LOG, ">> Request headers"); - rewrite_headers(o, hreq->headers, vars, req_uri_pats, req_groups_bynum); - rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars, - req_uri_pats, req_groups_bynum); + req_rules->rewrite_headers(o, hreq->headers, vars); + req_rules->rewrite_body(o, + &hreq->content_buf, &hreq->content_len, + vars); package.request() = gdu; } package.move(); @@ -72,14 +113,15 @@ void yf::HttpRewrite::process(mp::Package & package) const yaz_log(YLOG_LOG, "Response code %d", hres->code); mp::odr o; yaz_log(YLOG_LOG, "<< Respose headers"); - rewrite_headers(o, hres->headers, vars, res_uri_pats, res_groups_bynum); - rewrite_body(o, &hres->content_buf, &hres->content_len, vars, - res_uri_pats, res_groups_bynum); + res_rules->rewrite_headers(o, hres->headers, vars); + res_rules->rewrite_body(o, &hres->content_buf, + &hres->content_len, vars); package.response() = gdu; } } -void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, +void yf::HttpRewrite::Rules::rewrite_reqline (mp::odr & o, + Z_HTTP_Request *hreq, std::map & vars) const { //rewrite the request line @@ -99,7 +141,7 @@ void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, } yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); std::string npath = - test_patterns(vars, path, req_uri_pats, req_groups_bynum); + test_patterns(vars, path); if (!npath.empty()) { yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str()); @@ -107,10 +149,9 @@ void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, } } -void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers, - std::map & vars, - const spair_vec & uri_pats, - const std::vector > & groups_bynum) const +void yf::HttpRewrite::Rules::rewrite_headers(mp::odr & o, + Z_HTTP_Header *headers, + std::map & vars) const { for (Z_HTTP_Header *header = headers; header != 0; @@ -120,7 +161,7 @@ void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers, sheader += ": "; sheader += header->value; yaz_log(YLOG_LOG, "%s: %s", header->name, header->value); - std::string out = test_patterns(vars, sheader, uri_pats, groups_bynum); + std::string out = test_patterns(vars, sheader); if (!out.empty()) { size_t pos = out.find(": "); @@ -136,16 +177,16 @@ void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers, } } -void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *content_len, - std::map & vars, - const spair_vec & uri_pats, - const std::vector > & groups_bynum) const +void yf::HttpRewrite::Rules::rewrite_body (mp::odr & o, + char **content_buf, + int *content_len, + std::map & vars) const { if (*content_buf) { std::string body(*content_buf); std::string nbody = - test_patterns(vars, body, uri_pats, groups_bynum); + test_patterns(vars, body); if (!nbody.empty()) { *content_buf = odr_strdup(o, nbody.c_str()); @@ -158,33 +199,24 @@ void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *conten * Tests pattern from the vector in order and executes recipe on the first match. */ -const std::string yf::HttpRewrite::test_patterns( +const std::string yf::HttpRewrite::Rules::test_patterns( std::map & vars, - const std::string & txt, - const spair_vec & uri_pats, - const std::vector > & groups_bynum_vec) - const + const std::string & txt) const { - for (unsigned i = 0; i < uri_pats.size(); i++) + for (unsigned i = 0; i < rules.size(); i++) { - std::string out = search_replace(vars, txt, - uri_pats[i].first, uri_pats[i].second, - groups_bynum_vec[i]); + std::string out = rules[i].search_replace(vars, txt); if (!out.empty()) return out; } return ""; } - -const std::string yf::HttpRewrite::search_replace( +const std::string yf::HttpRewrite::Rule::search_replace( std::map & vars, - const std::string & txt, - const std::string & uri_re, - const std::string & uri_pat, - const std::map & groups_bynum) const + const std::string & txt) const { //exec regex against value - boost::regex re(uri_re); + boost::regex re(regex); boost::smatch what; std::string::const_iterator start, end; start = txt.begin(); @@ -197,8 +229,8 @@ const std::string yf::HttpRewrite::search_replace( { //check if the group is named std::map::const_iterator it - = groups_bynum.find(i); - if (it != groups_bynum.end()) + = group_index.find(i); + if (it != group_index.end()) { //it is if (!what[i].str().empty()) vars[it->second] = what[i]; @@ -206,7 +238,7 @@ const std::string yf::HttpRewrite::search_replace( } //prepare replacement string - std::string rvalue = sub_vars(uri_pat, vars); + std::string rvalue = sub_vars(vars); yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", what.str(0).c_str(), rvalue.c_str()); out.append(start, what[0].first); @@ -219,77 +251,70 @@ const std::string yf::HttpRewrite::search_replace( return out; } -void yf::HttpRewrite::parse_groups( - const spair_vec & uri_pats, - std::vector > & groups_bynum_vec) +void yf::HttpRewrite::Rule::parse_groups() { - for (unsigned h = 0; h < uri_pats.size(); h++) + int gnum = 0; + bool esc = false; + const std::string & str = regex; + yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str()); + for (unsigned i = 0; i < str.size(); ++i) { - int gnum = 0; - bool esc = false; - //regex is first, subpat is second - std::string str = uri_pats[h].first; - //for each pair we have an indexing map - std::map groups_bynum; - for (unsigned i = 0; i < str.size(); ++i) + if (!esc && str[i] == '\\') { - if (!esc && str[i] == '\\') - { - esc = true; - continue; - } - if (!esc && str[i] == '(') //group starts + esc = true; + continue; + } + if (!esc && str[i] == '(') //group starts + { + gnum++; + if (i+1 < str.size() && str[i+1] == '?') //group with attrs { - gnum++; - if (i+1 < str.size() && str[i+1] == '?') //group with attrs + i++; + if (i+1 < str.size() && str[i+1] == ':') //non-capturing { + if (gnum > 0) gnum--; i++; - if (i+1 < str.size() && str[i+1] == ':') //non-capturing - { - if (gnum > 0) gnum--; - i++; - continue; - } - if (i+1 < str.size() && str[i+1] == 'P') //optional, python - i++; - if (i+1 < str.size() && str[i+1] == '<') //named + continue; + } + if (i+1 < str.size() && str[i+1] == 'P') //optional, python + i++; + if (i+1 < str.size() && str[i+1] == '<') //named + { + i++; + std::string gname; + bool term = false; + while (++i < str.size()) { - i++; - std::string gname; - bool term = false; - while (++i < str.size()) - { - if (str[i] == '>') { term = true; break; } - if (!isalnum(str[i])) - throw mp::filter::FilterException - ("Only alphanumeric chars allowed, found " - " in '" - + str - + "' at " - + boost::lexical_cast(i)); - gname += str[i]; - } - if (!term) + if (str[i] == '>') { term = true; break; } + if (!isalnum(str[i])) throw mp::filter::FilterException - ("Unterminated group name '" + gname - + " in '" + str +"'"); - groups_bynum[gnum] = gname; - yaz_log(YLOG_LOG, "Found named group '%s' at $%d", - gname.c_str(), gnum); + ("Only alphanumeric chars allowed, found " + " in '" + + str + + "' at " + + boost::lexical_cast(i)); + gname += str[i]; } + if (!term) + throw mp::filter::FilterException + ("Unterminated group name '" + gname + + " in '" + str +"'"); + group_index[gnum] = gname; + yaz_log(YLOG_LOG, "Found named group '%s' at $%d", + gname.c_str(), gnum); } } - esc = false; } - groups_bynum_vec.push_back(groups_bynum); + esc = false; } } -std::string yf::HttpRewrite::sub_vars (const std::string & in, - const std::map & vars) +std::string yf::HttpRewrite::Rule::sub_vars ( + const std::map & vars) const { std::string out; bool esc = false; + const std::string & in = recipe; for (unsigned i = 0; i < in.size(); ++i) { if (!esc && in[i] == '\\') @@ -334,20 +359,8 @@ std::string yf::HttpRewrite::sub_vars (const std::string & in, return out; } -void yf::HttpRewrite::configure( - const spair_vec req_uri_pats, - const spair_vec res_uri_pats) -{ - //TODO should we really copy them out? - this->req_uri_pats = req_uri_pats; - this->res_uri_pats = res_uri_pats; - //pick up names - parse_groups(req_uri_pats, req_groups_bynum); - parse_groups(res_uri_pats, res_groups_bynum); -} - - -static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & dest) +void yf::HttpRewrite::configure_rules(const xmlNode *ptr, + Rules & rules) { for (ptr = ptr->children; ptr; ptr = ptr->next) { @@ -355,14 +368,14 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des continue; else if (!strcmp((const char *) ptr->name, "rewrite")) { - std::string from, to; + Rule rule; const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { if (!strcmp((const char *) attr->name, "from")) - from = mp::xml::get_text(attr->children); + rule.regex = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "to")) - to = mp::xml::get_text(attr->children); + rule.recipe = mp::xml::get_text(attr->children); else throw mp::filter::FilterException ("Bad attribute " @@ -370,9 +383,10 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des + " in rewrite section of http_rewrite"); } yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", - from.c_str(), to.c_str()); - if (!from.empty()) - dest.push_back(std::make_pair(from, to)); + rule.regex.c_str(), rule.recipe.c_str()); + rule.parse_groups(); + if (!rule.regex.empty()) + rules.rules.push_back(rule); } else { @@ -387,19 +401,17 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only, const char *path) { - spair_vec req_uri_pats; - spair_vec res_uri_pats; for (ptr = ptr->children; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; else if (!strcmp((const char *) ptr->name, "request")) { - configure_rules(ptr, req_uri_pats); + configure_rules(ptr, *req_rules); } else if (!strcmp((const char *) ptr->name, "response")) { - configure_rules(ptr, res_uri_pats); + configure_rules(ptr, *res_rules); } else { @@ -409,7 +421,6 @@ void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only, + " in http_rewrite1 filter"); } } - configure(req_uri_pats, res_uri_pats); } static mp::filter::Base* filter_creator() diff --git a/src/filter_http_rewrite.hpp b/src/filter_http_rewrite.hpp index 11b43e6..d611142 100644 --- a/src/filter_http_rewrite.hpp +++ b/src/filter_http_rewrite.hpp @@ -20,59 +20,25 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define FILTER_HTTP_REWRITE_HPP #include -#include -#include -#include +#include namespace mp = metaproxy_1; namespace metaproxy_1 { namespace filter { class HttpRewrite : public Base { + class Rules; + class Rule; + class RuleScope; + boost::scoped_ptr req_rules; + boost::scoped_ptr res_rules; + void configure_rules(const xmlNode *ptr, Rules & rules); public: - typedef std::pair string_pair; - typedef std::vector spair_vec; - typedef spair_vec::iterator spv_iter; HttpRewrite(); ~HttpRewrite(); void process(metaproxy_1::Package & package) const; - void configure(const xmlNode * ptr, bool test_only, - const char *path); - void configure(const spair_vec req_uri_pats, - const spair_vec res_uri_pats); - private: - spair_vec req_uri_pats; - spair_vec res_uri_pats; - std::vector > req_groups_bynum; - std::vector > res_groups_bynum; - void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, - std::map & vars) const; - void rewrite_headers (mp::odr & o, Z_HTTP_Header *headers, - std::map & vars, - const spair_vec & uri_pats, - const std::vector > & groups_bynum_vec) const; - void rewrite_body (mp::odr & o, char **content_buf, int *content_len, - std::map & vars, - const spair_vec & uri_pats, - const std::vector > - & groups_bynum) const; - const std::string test_patterns( - std::map & vars, - const std::string & txt, - const spair_vec & uri_pats, - const std::vector > - & groups_bynum) const; - const std::string search_replace( - std::map & vars, - const std::string & txt, - const std::string & uri_re, - const std::string & uri_pat, - const std::map & groups_bynum) const; - static void parse_groups( - const spair_vec & uri_pats, - std::vector > & groups_bynum_vec); - static std::string sub_vars (const std::string & in, - const std::map & vars); + void configure(const xmlNode * ptr, + bool test_only, const char *path); }; } } diff --git a/src/test_filter_rewrite.cpp b/src/test_filter_rewrite.cpp index d266274..cb9c47c 100644 --- a/src/test_filter_rewrite.cpp +++ b/src/test_filter_rewrite.cpp @@ -60,27 +60,36 @@ BOOST_AUTO_TEST_CASE( test_filter_rewrite_1 ) std::cout << "Running non-xml config test case" << std::endl; mp::RouterChain router; mp::filter::HttpRewrite fhr; - - //configure the filter - mp::filter::HttpRewrite::spair_vec vec_req; - vec_req.push_back(std::make_pair( - "(?http\\:\\/\\/s?)(?[^\\/?#]+)\\/(?[^\\/]+)" - "\\/(?[^\\/]+)(?.*)", - "${proto}${host}${path}" - )); - vec_req.push_back(std::make_pair( - "(?:Host\\: )(.*)", - "Host: ${host}" - )); - - mp::filter::HttpRewrite::spair_vec vec_res; - vec_res.push_back(std::make_pair( - "(?http\\:\\/\\/s?)(?[^\\/?# \"'>]+)\\/(?[^ \"'>]+)", - "${proto}${pxhost}/${pxpath}/${host}/${path}" - )); - - fhr.configure(vec_req, vec_res); - + + std::string xmlconf = + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n" + ; + + std::cout << xmlconf; + + // reading and parsing XML conf + xmlDocPtr doc = xmlParseMemory(xmlconf.c_str(), xmlconf.size()); + BOOST_CHECK(doc); + xmlNode *root_element = xmlDocGetRootElement(doc); + fhr.configure(root_element, true, ""); + xmlFreeDoc(doc); + router.append(fhr); // create an http request -- 1.7.10.4