1 /* This file is part of Metaproxy.
2 Copyright (C) 2005-2013 Index Data
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
35 #include <sys/types.h>
38 namespace mp = metaproxy_1;
39 namespace yf = mp::filter;
41 namespace metaproxy_1 {
43 class HttpRewrite::RuleScope {
45 std::vector<std::string> tags;
46 std::vector<std::string> attrs;
47 std::string content_type;
49 class HttpRewrite::Rule {
51 enum Section { METHOD, HEADER, BODY };
54 std::map<int, std::string> group_index;
55 std::vector<RuleScope> scopes;
57 const std::string search_replace(
58 std::map<std::string, std::string> & vars,
59 const std::string & txt) const;
60 std::string sub_vars (
61 const std::map<std::string, std::string> & vars) const;
64 class HttpRewrite::Rules {
66 std::vector<Rule> rules;
67 void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
68 std::map<std::string, std::string> & vars) const;
69 void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
70 std::map<std::string, std::string> & vars) const;
71 void rewrite_body (mp::odr & o,
72 char **content_buf, int *content_len,
73 std::map<std::string, std::string> & vars) const;
74 const std::string test_patterns(
75 std::map<std::string, std::string> & vars,
76 const std::string & txt) const;
81 yf::HttpRewrite::HttpRewrite() : req_rules(new Rules), res_rules(new Rules)
85 yf::HttpRewrite::~HttpRewrite()
89 void yf::HttpRewrite::process(mp::Package & package) const
91 yaz_log(YLOG_LOG, "HttpRewrite begins....");
92 Z_GDU *gdu = package.request().get();
93 //map of request/response vars
94 std::map<std::string, std::string> vars;
96 if (gdu && gdu->which == Z_GDU_HTTP_Request)
98 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
100 req_rules->rewrite_reqline(o, hreq, vars);
101 yaz_log(YLOG_LOG, ">> Request headers");
102 req_rules->rewrite_headers(o, hreq->headers, vars);
103 req_rules->rewrite_body(o,
104 &hreq->content_buf, &hreq->content_len,
106 package.request() = gdu;
109 gdu = package.response().get();
110 if (gdu && gdu->which == Z_GDU_HTTP_Response)
112 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
113 yaz_log(YLOG_LOG, "Response code %d", hres->code);
115 yaz_log(YLOG_LOG, "<< Respose headers");
116 res_rules->rewrite_headers(o, hres->headers, vars);
117 res_rules->rewrite_body(o, &hres->content_buf,
118 &hres->content_len, vars);
119 package.response() = gdu;
123 void yf::HttpRewrite::Rules::rewrite_reqline (mp::odr & o,
124 Z_HTTP_Request *hreq,
125 std::map<std::string, std::string> & vars) const
127 //rewrite the request line
129 if (strstr(hreq->path, "http://") == hreq->path)
131 yaz_log(YLOG_LOG, "Path in the method line is absolute, "
132 "possibly a proxy request");
137 //TODO what about proto
139 path += z_HTTP_header_lookup(hreq->headers, "Host");
142 yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
144 test_patterns(vars, path);
147 yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
148 hreq->path = odr_strdup(o, npath.c_str());
152 void yf::HttpRewrite::Rules::rewrite_headers(mp::odr & o,
153 Z_HTTP_Header *headers,
154 std::map<std::string, std::string> & vars) const
156 for (Z_HTTP_Header *header = headers;
158 header = header->next)
160 std::string sheader(header->name);
162 sheader += header->value;
163 yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
164 std::string out = test_patterns(vars, sheader);
167 size_t pos = out.find(": ");
168 if (pos == std::string::npos)
170 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
173 header->name = odr_strdup(o, out.substr(0, pos).c_str());
174 header->value = odr_strdup(o, out.substr(pos+2,
175 std::string::npos).c_str());
180 void yf::HttpRewrite::Rules::rewrite_body (mp::odr & o,
183 std::map<std::string, std::string> & vars) const
187 std::string body(*content_buf);
189 test_patterns(vars, body);
192 *content_buf = odr_strdup(o, nbody.c_str());
193 *content_len = nbody.size();
199 * Tests pattern from the vector in order and executes recipe on
202 const std::string yf::HttpRewrite::Rules::test_patterns(
203 std::map<std::string, std::string> & vars,
204 const std::string & txt) const
206 for (unsigned i = 0; i < rules.size(); i++)
208 std::string out = rules[i].search_replace(vars, txt);
209 if (!out.empty()) return out;
214 const std::string yf::HttpRewrite::Rule::search_replace(
215 std::map<std::string, std::string> & vars,
216 const std::string & txt) const
218 //exec regex against value
219 boost::regex re(regex);
221 std::string::const_iterator start, end;
225 while (regex_search(start, end, what, re)) //find next full match
228 for (i = 1; i < what.size(); ++i)
230 //check if the group is named
231 std::map<int, std::string>::const_iterator it
232 = group_index.find(i);
233 if (it != group_index.end())
235 if (!what[i].str().empty())
236 vars[it->second] = what[i];
240 //prepare replacement string
241 std::string rvalue = sub_vars(vars);
242 yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
243 what.str(0).c_str(), rvalue.c_str());
244 out.append(start, what[0].first);
246 start = what[0].second; //move search forward
248 //if we had a match cat the last part
249 if (start != txt.begin())
250 out.append(start, end);
254 void yf::HttpRewrite::Rule::parse_groups()
258 const std::string & str = regex;
259 yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
260 for (unsigned i = 0; i < str.size(); ++i)
262 if (!esc && str[i] == '\\')
267 if (!esc && str[i] == '(') //group starts
270 if (i+1 < str.size() && str[i+1] == '?') //group with attrs
273 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
275 if (gnum > 0) gnum--;
279 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
281 if (i+1 < str.size() && str[i+1] == '<') //named
286 while (++i < str.size())
288 if (str[i] == '>') { term = true; break; }
289 if (!isalnum(str[i]))
290 throw mp::filter::FilterException
291 ("Only alphanumeric chars allowed, found "
295 + boost::lexical_cast<std::string>(i));
299 throw mp::filter::FilterException
300 ("Unterminated group name '" + gname
301 + " in '" + str +"'");
302 group_index[gnum] = gname;
303 yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
304 gname.c_str(), gnum);
312 std::string yf::HttpRewrite::Rule::sub_vars (
313 const std::map<std::string, std::string> & vars) const
317 const std::string & in = recipe;
318 for (unsigned i = 0; i < in.size(); ++i)
320 if (!esc && in[i] == '\\')
325 if (!esc && in[i] == '$') //var
327 if (i+1 < in.size() && in[i+1] == '{') //ref prefix
332 while (++i < in.size())
334 if (in[i] == '}') { term = true; break; }
337 if (!term) throw mp::filter::FilterException
338 ("Unterminated var ref in '"+in+"' at "
339 + boost::lexical_cast<std::string>(i));
340 std::map<std::string, std::string>::const_iterator it
342 if (it != vars.end())
349 throw mp::filter::FilterException
350 ("Malformed or trimmed var ref in '"
351 +in+"' at "+boost::lexical_cast<std::string>(i));
362 void yf::HttpRewrite::configure_rules(const xmlNode *ptr,
365 for (ptr = ptr->children; ptr; ptr = ptr->next)
367 if (ptr->type != XML_ELEMENT_NODE)
369 else if (!strcmp((const char *) ptr->name, "rewrite"))
372 const struct _xmlAttr *attr;
373 for (attr = ptr->properties; attr; attr = attr->next)
375 if (!strcmp((const char *) attr->name, "from"))
376 rule.regex = mp::xml::get_text(attr->children);
377 else if (!strcmp((const char *) attr->name, "to"))
378 rule.recipe = mp::xml::get_text(attr->children);
380 throw mp::filter::FilterException
382 + std::string((const char *) attr->name)
383 + " in rewrite section of http_rewrite");
385 yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
386 rule.regex.c_str(), rule.recipe.c_str());
388 if (!rule.regex.empty())
389 rules.rules.push_back(rule);
393 throw mp::filter::FilterException
395 + std::string((const char *) ptr->name)
396 + " in http_rewrite1 filter");
401 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
404 for (ptr = ptr->children; ptr; ptr = ptr->next)
406 if (ptr->type != XML_ELEMENT_NODE)
408 else if (!strcmp((const char *) ptr->name, "request"))
410 configure_rules(ptr, *req_rules);
412 else if (!strcmp((const char *) ptr->name, "response"))
414 configure_rules(ptr, *res_rules);
418 throw mp::filter::FilterException
420 + std::string((const char *) ptr->name)
421 + " in http_rewrite1 filter");
426 static mp::filter::Base* filter_creator()
428 return new mp::filter::HttpRewrite;
432 struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
443 * c-file-style: "Stroustrup"
444 * indent-tabs-mode: nil
446 * vim: shiftwidth=4 tabstop=8 expandtab