Option attribute values for HTML parser
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 1 Jul 2013 12:04:31 +0000 (14:04 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 1 Jul 2013 12:04:31 +0000 (14:04 +0200)
src/filter_http_rewrite.cpp
src/html_parser.cpp
src/test_html_parser.cpp

index 5a4ca83..bf01aba 100644 (file)
@@ -334,20 +334,23 @@ void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
 
     wrbuf_putc(m_w, ' ');
     wrbuf_write(m_w, attr, attr_len);
-    wrbuf_puts(m_w, "=");
-    wrbuf_puts(m_w, sep);
-
-    std::string output;
-    if (subst)
+    if (value)
     {
-        std::string input(value, val_len);
-        output = it->rule->test_patterns(m_vars, input);
+        wrbuf_puts(m_w, "=");
+        wrbuf_puts(m_w, sep);
+
+        std::string output;
+        if (subst)
+        {
+            std::string input(value, val_len);
+            output = it->rule->test_patterns(m_vars, input);
+        }
+        if (output.empty())
+            wrbuf_write(m_w, value, val_len);
+        else
+            wrbuf_puts(m_w, output.c_str());
+        wrbuf_puts(m_w, sep);
     }
-    if (output.empty())
-        wrbuf_write(m_w, value, val_len);
-    else
-        wrbuf_puts(m_w, output.c_str());
-    wrbuf_puts(m_w, sep);
 }
 
 void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
index e704620..8c4426d 100644 (file)
@@ -109,33 +109,32 @@ int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event,
     if (!i)
         return skipSpace(cp);
     i += skipSpace(cp + i);
-    if (cp[i] != '=')
-        return 0;
-
-    i++;
-    i += skipSpace(cp + i);
-    if (cp[i] == '\"' || cp[i] == '\'')
-    {
-        *tr = cp[i];
-        v0 = ++i;
-        while (cp[i] != *tr && cp[i])
-            i++;
-        v1 = i;
-        if (cp[i])
-            i++;
-    }
-    else
+    if (cp[i] == '=')
     {
-        *tr = 0;
-        v0 = i;
-        while (cp[i] && !strchr(SPACECHR ">", cp[i]))
-            i++;
-        v1 = i;
+        i++;
+        i += skipSpace(cp + i);
+        if (cp[i] == '\"' || cp[i] == '\'')
+        {
+            *tr = cp[i];
+            v0 = ++i;
+            while (cp[i] != *tr && cp[i])
+                i++;
+            v1 = i;
+            if (cp[i])
+                i++;
+        }
+        else
+        {
+            *tr = 0;
+            v0 = i;
+            while (cp[i] && !strchr(SPACECHR ">", cp[i]))
+                i++;
+            v1 = i;
+        }
+        *value = cp + v0;
+        *val_len = v1 - v0;
+        i += skipSpace(cp + i);
     }
-    *value = cp + v0;
-    *val_len = v1 - v0;
-
-    i += skipSpace(cp + i);
     return i;
 }
 
index baf42bc..9840c1d 100644 (file)
@@ -49,10 +49,13 @@ public:
                    const char *value, int val_len, const char *sep) {
         out += " ";
         out.append(attr, attr_len);
-        out += "=";
-        out += sep;
-        out.append(value, val_len);
-        out += sep;
+        if (value)
+        {
+            out += "=";
+            out += sep;
+            out.append(value, val_len);
+            out += sep;
+        }
     }
     void anyTagEnd(const char *tag, int tag_len, int close_it) {
         if (close_it)
@@ -183,12 +186,12 @@ BOOST_AUTO_TEST_CASE( test_html_parser_4 )
     {
         mp::HTMLParser hp;
         const char* html =
-            "<\"?xml version=\"1.0\" strandalone=\"no\"?  ax>\n"
-            "<book></book>";  // <book badboy></book> does not work
+            "<?xml version=\"1.0\" strandalone=\"no\"?  ax>\n"
+            "<book><x ? href/></book>";
 
         const char* expected = html;
         MyEvent e;
-        hp.set_verbose(1);
+        hp.set_verbose(0);
         hp.parse(e, html);
 
         BOOST_CHECK_EQUAL(std::string(expected), e.out);
@@ -200,7 +203,37 @@ BOOST_AUTO_TEST_CASE( test_html_parser_4 )
             std::cout << e.out << std::endl;
         }
     }
-    catch (std::exception & e) 
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_5 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<x link/>";
+
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
+        {
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
+        }
+    }
+    catch (std::exception & e)
     {
         std::cout << e.what();
         std::cout << std::endl;
@@ -208,6 +241,7 @@ BOOST_AUTO_TEST_CASE( test_html_parser_4 )
     }
 }
 
+
 /*
  * Local variables:
  * c-basic-offset: 4