diff src/xml.c @ 111835:05fc128a856f

Return CDATA sections (like <style>foo</style>) as text nodes. Also ignore blank HTML nodes.
author Lars Magne Ingebrigtsen <larsi@gnus.org>
date Mon, 06 Dec 2010 17:59:52 +0100
parents 5687cf9288cf
children 6378d1b57038
line wrap: on
line diff
--- a/src/xml.c	Mon Dec 06 11:51:37 2010 -0500
+++ b/src/xml.c	Mon Dec 06 17:59:52 2010 +0100
@@ -62,7 +62,7 @@
 
       return Fnreverse (result);
     }
-  else if (node->type == XML_TEXT_NODE)
+  else if (node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE)
     {
       if (node->content)
 	return build_string (node->content);
@@ -105,7 +105,8 @@
     doc = htmlReadMemory (BYTE_POS_ADDR (CHAR_TO_BYTE (istart)),
 			  bytes, burl, "utf-8",
 			  HTML_PARSE_RECOVER|HTML_PARSE_NONET|
-			  HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR);
+			  HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR|
+			  HTML_PARSE_NOBLANKS);
   else
     doc = xmlReadMemory (BYTE_POS_ADDR (CHAR_TO_BYTE (istart)),
 			 bytes, burl, "utf-8",