From f4f0f80d2118437e5047ba266f92d7acb3c38fb7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 11 Apr 2011 16:41:01 +0400 Subject: update HTMLPurifier; enable embedded flash video in articles --- lib/htmlpurifier/library/HTMLPurifier/Lexer.php | 46 ++++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) mode change 100755 => 100644 lib/htmlpurifier/library/HTMLPurifier/Lexer.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/Lexer.php') diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer.php old mode 100755 new mode 100644 index 945886998..9bdbbbb25 --- a/lib/htmlpurifier/library/HTMLPurifier/Lexer.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer.php @@ -73,12 +73,12 @@ class HTMLPurifier_Lexer HTMLPurifier_Lexer::create() is deprecated, please instead use %Core.LexerImpl", E_USER_WARNING); } else { - $lexer = $config->get('Core', 'LexerImpl'); + $lexer = $config->get('Core.LexerImpl'); } $needs_tracking = - $config->get('Core', 'MaintainLineNumbers') || - $config->get('Core', 'CollectErrors'); + $config->get('Core.MaintainLineNumbers') || + $config->get('Core.CollectErrors'); $inst = null; if (is_object($lexer)) { @@ -230,6 +230,17 @@ class HTMLPurifier_Lexer ); } + /** + * Special Internet Explorer conditional comments should be removed. + */ + protected static function removeIEConditional($string) { + return preg_replace( + '##si', // probably should generalize for all strings + '', + $string + ); + } + /** * Callback function for escapeCDATA() that does the work. * @@ -252,10 +263,12 @@ class HTMLPurifier_Lexer public function normalize($html, $config, $context) { // normalize newlines to \n - $html = str_replace("\r\n", "\n", $html); - $html = str_replace("\r", "\n", $html); + if ($config->get('Core.NormalizeNewlines')) { + $html = str_replace("\r\n", "\n", $html); + $html = str_replace("\r", "\n", $html); + } - if ($config->get('HTML', 'Trusted')) { + if ($config->get('HTML.Trusted')) { // escape convoluted CDATA $html = $this->escapeCommentedCDATA($html); } @@ -263,9 +276,19 @@ class HTMLPurifier_Lexer // escape CDATA $html = $this->escapeCDATA($html); + $html = $this->removeIEConditional($html); + // extract body from document if applicable - if ($config->get('Core', 'ConvertDocumentToFragment')) { - $html = $this->extractBody($html); + if ($config->get('Core.ConvertDocumentToFragment')) { + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + $new_html = $this->extractBody($html); + if ($e && $new_html != $html) { + $e->send(E_WARNING, 'Lexer: Extracted body'); + } + $html = $new_html; } // expand entities that aren't the big five @@ -276,6 +299,11 @@ class HTMLPurifier_Lexer // represent non-SGML characters (horror, horror!) $html = HTMLPurifier_Encoder::cleanUTF8($html); + // if processing instructions are to removed, remove them now + if ($config->get('Core.RemoveProcessingInstructions')) { + $html = preg_replace('#<\?.+?\?>#s', '', $html); + } + return $html; } @@ -285,7 +313,7 @@ class HTMLPurifier_Lexer */ public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.+?)!is', $html, $matches); + $result = preg_match('!]*>(.*)!is', $html, $matches); if ($result) { return $matches[1]; } else { -- cgit v1.2.3-54-g00ecf