1 /* This file is part of Metaproxy.
2 Copyright (C) 2005-2013 Index Data
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "html_parser.hpp"
24 #include <metaproxy/util.hpp>
26 #include <boost/lexical_cast.hpp>
30 #define BOOST_AUTO_TEST_MAIN
31 #define BOOST_TEST_DYN_LINK
33 #include <boost/test/auto_unit_test.hpp>
35 using namespace boost::unit_test;
36 namespace mp = metaproxy_1;
38 class MyEvent : public mp::HTMLParserEvent
42 void openTagStart(const char *tag, int tag_len) {
44 out.append(tag, tag_len);
47 void attribute(const char *tag, int tag_len,
48 const char *attr, int attr_len,
49 const char *value, int val_len, const char *sep) {
51 out.append(attr, attr_len);
56 out.append(value, val_len);
60 void anyTagEnd(const char *tag, int tag_len, int close_it) {
65 void closeTag(const char *tag, int tag_len) {
67 out.append(tag, tag_len);
69 void text(const char *value, int len) {
70 out.append(value, len);
74 BOOST_AUTO_TEST_CASE( test_html_parser_1 )
80 "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
81 "<hr><table ></table ><a href=\"x\"/></body></html>";
82 const char* expected =
83 "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
84 "<hr><table></table ><a href=\"x\"/></body></html>";
89 BOOST_CHECK_EQUAL(std::string(expected), e.out);
90 if (std::string(expected) != e.out)
92 std::cout << "Expected" << std::endl;
93 std::cout << expected << std::endl;
94 std::cout << "Got" << std::endl;
95 std::cout << e.out << std::endl;
98 catch (std::exception & e)
100 std::cout << e.what();
101 std::cout << std::endl;
106 BOOST_AUTO_TEST_CASE( test_html_parser_2 )
112 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"
115 " <TITLE>YAZ 4.2.60</TITLE>\n"
118 " <P><A HREF=\"http://www.indexdata.com/yaz/\">YAZ</A> 4.2.60</P>\n"
119 " <P>Error: 404</P>\n"
120 " <P>Description: Not Found</P>\n"
124 const char* expected = html;
129 BOOST_CHECK_EQUAL(std::string(expected), e.out);
130 if (std::string(expected) != e.out)
132 std::cout << "Expected" << std::endl;
133 std::cout << expected << std::endl;
134 std::cout << "Got" << std::endl;
135 std::cout << e.out << std::endl;
138 catch (std::exception & e)
140 std::cout << e.what();
141 std::cout << std::endl;
146 BOOST_AUTO_TEST_CASE( test_html_parser_3 )
152 "<?xml version=\"1.0\" strandalone=\"no\"?>\n"
153 "<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\"\n"
154 " \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"\n"
156 " <!ENTITY % local SYSTEM \"local.ent\">\n"
161 const char* expected = html;
166 BOOST_CHECK_EQUAL(std::string(expected), e.out);
167 if (std::string(expected) != e.out)
169 std::cout << "Expected" << std::endl;
170 std::cout << expected << std::endl;
171 std::cout << "Got" << std::endl;
172 std::cout << e.out << std::endl;
175 catch (std::exception & e)
177 std::cout << e.what();
178 std::cout << std::endl;
183 BOOST_AUTO_TEST_CASE( test_html_parser_4 )
189 "<?xml version=\"1.0\" strandalone=\"no\"? ax>\n"
190 "<book><x ? href/><!-- hello > --></book>";
192 const char* expected = html;
197 BOOST_CHECK_EQUAL(std::string(expected), e.out);
198 if (std::string(expected) != e.out)
200 std::cout << "Expected" << std::endl;
201 std::cout << expected << std::endl;
202 std::cout << "Got" << std::endl;
203 std::cout << e.out << std::endl;
206 catch (std::exception & e)
208 std::cout << e.what();
209 std::cout << std::endl;
214 BOOST_AUTO_TEST_CASE( test_html_parser_5 )
222 const char* expected = html;
227 BOOST_CHECK_EQUAL(std::string(expected), e.out);
228 if (std::string(expected) != e.out)
230 std::cout << "Expected" << std::endl;
231 std::cout << expected << std::endl;
232 std::cout << "Got" << std::endl;
233 std::cout << e.out << std::endl;
236 catch (std::exception & e)
238 std::cout << e.what();
239 std::cout << std::endl;
244 BOOST_AUTO_TEST_CASE( test_html_parser_6 )
250 "<html><script><x;</script></html>";
252 const char* expected = html;
257 BOOST_CHECK_EQUAL(std::string(expected), e.out);
258 if (std::string(expected) != e.out)
260 std::cout << "Expected" << std::endl;
261 std::cout << expected << std::endl;
262 std::cout << "Got" << std::endl;
263 std::cout << e.out << std::endl;
266 catch (std::exception & e)
268 std::cout << e.what();
269 std::cout << std::endl;
278 * c-file-style: "Stroustrup"
279 * indent-tabs-mode: nil
281 * vim: shiftwidth=4 tabstop=8 expandtab