From 5b18c93622e97b9a251f3b85bdb088022fd5c0f3 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 18 Mar 2013 20:59:48 +0400 Subject: tweak hotkey map notation to allow stuff like shift-arrows --- include/functions.php | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 8c2ced801..2120a96b9 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1928,23 +1928,23 @@ "(191)|/" => "search_dialog", // "article" => array( "s" => "toggle_mark", - "S" => "toggle_publ", + "*s" => "toggle_publ", "u" => "toggle_unread", - "T" => "edit_tags", - "D" => "dismiss_selected", - "X" => "dismiss_read", + "*t" => "edit_tags", + "*d" => "dismiss_selected", + "*x" => "dismiss_read", "o" => "open_in_new_window", "c p" => "catchup_below", "c n" => "catchup_above", - "N" => "article_scroll_down", - "P" => "article_scroll_up", - "a W" => "toggle_widescreen", + "*n" => "article_scroll_down", + "*p" => "article_scroll_up", + "a *w" => "toggle_widescreen", "e" => "email_article", "a q" => "close_article", // "article_selection" => array( "a a" => "select_all", "a u" => "select_unread", - "a U" => "select_marked", + "a *u" => "select_marked", "a p" => "select_published", "a i" => "select_invert", "a n" => "select_none", @@ -1955,9 +1955,9 @@ "f e" => "feed_edit", "f q" => "feed_catchup", "f x" => "feed_reverse", - "f D" => "feed_debug_update", - "f C" => "toggle_combined_mode", - "Q" => "catchup_all", + "f *d" => "feed_debug_update", + "f *c" => "toggle_combined_mode", + "*q" => "catchup_all", "x" => "cat_toggle_collapse", // "goto" => array( "g a" => "goto_all", @@ -1965,7 +1965,7 @@ "g s" => "goto_marked", "g p" => "goto_published", "g t" => "goto_tagcloud", - "g P" => "goto_prefs", + "g *p" => "goto_prefs", // "other" => array( "(9)|Tab" => "select_article_cursor", // tab "c l" => "create_label", -- cgit v1.2.3-54-g00ecf From 2130fe7a18d7e1863575feda9ec39654541eee77 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 18 Mar 2013 22:26:36 +0400 Subject: add experimental HOOK_SANITIZE --- classes/pluginhost.php | 1 + include/functions.php | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/functions.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 592629881..79544b01b 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -21,6 +21,7 @@ class PluginHost { const HOOK_RENDER_ARTICLE = 10; const HOOK_RENDER_ARTICLE_CDM = 11; const HOOK_FEED_FETCHED = 12; + const HOOK_SANITIZE = 13; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/functions.php b/include/functions.php index 2120a96b9..ae9561337 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2597,12 +2597,25 @@ //$node = $doc->getElementsByTagName('body')->item(0); - $doc->removeChild($doc->firstChild); //remove doctype - $res = $doc->saveHTML(); - $config = array('safe' => 1, 'deny_attribute' => 'style, width, height, class, id', 'comment' => 1, 'cdata' => 1, 'balance' => 0); - $spec = 'img=width,height'; - $res = htmLawed($res, $config, $spec); + $beforehooks = $res; + + global $pluginhost; + if ($pluginhost) { + foreach ($pluginhost->get_hooks($pluginhost::HOOK_SANITIZE) as $p) { + $res = $p->hook_sanitize($res); + } + } + + // nothing changed, use standard filters + if ($beforehooks == $res) { + $doc->removeChild($doc->firstChild); //remove doctype + $res = $doc->saveHTML(); + + $config = array('safe' => 1, 'deny_attribute' => 'style, width, height, class, id', 'comment' => 1, 'cdata' => 1, 'balance' => 0); + $spec = 'img=width,height'; + $res = htmLawed($res, $config, $spec); + } return $res; } -- cgit v1.2.3-54-g00ecf From be124dc2361539455121fa29c77bf60a85014b13 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 00:00:13 +0400 Subject: Revert "add experimental HOOK_SANITIZE" This reverts commit 2130fe7a18d7e1863575feda9ec39654541eee77. --- classes/pluginhost.php | 1 - include/functions.php | 23 +++++------------------ 2 files changed, 5 insertions(+), 19 deletions(-) (limited to 'include/functions.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 79544b01b..592629881 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -21,7 +21,6 @@ class PluginHost { const HOOK_RENDER_ARTICLE = 10; const HOOK_RENDER_ARTICLE_CDM = 11; const HOOK_FEED_FETCHED = 12; - const HOOK_SANITIZE = 13; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/functions.php b/include/functions.php index ae9561337..2120a96b9 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2597,25 +2597,12 @@ //$node = $doc->getElementsByTagName('body')->item(0); + $doc->removeChild($doc->firstChild); //remove doctype + $res = $doc->saveHTML(); - $beforehooks = $res; - - global $pluginhost; - if ($pluginhost) { - foreach ($pluginhost->get_hooks($pluginhost::HOOK_SANITIZE) as $p) { - $res = $p->hook_sanitize($res); - } - } - - // nothing changed, use standard filters - if ($beforehooks == $res) { - $doc->removeChild($doc->firstChild); //remove doctype - $res = $doc->saveHTML(); - - $config = array('safe' => 1, 'deny_attribute' => 'style, width, height, class, id', 'comment' => 1, 'cdata' => 1, 'balance' => 0); - $spec = 'img=width,height'; - $res = htmLawed($res, $config, $spec); - } + $config = array('safe' => 1, 'deny_attribute' => 'style, width, height, class, id', 'comment' => 1, 'cdata' => 1, 'balance' => 0); + $spec = 'img=width,height'; + $res = htmLawed($res, $config, $spec); return $res; } -- cgit v1.2.3-54-g00ecf From e2b0054be49b513a63b3870fcc65f45f30b5b338 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 01:10:02 +0400 Subject: experimental patch to enable compatibility with php safe_mode --- include/functions.php | 54 ++++++++++++++++++++++++++++++++++++++++++++-- include/sanity_check.php | 4 ++-- plugins/af_unburn/init.php | 53 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 105 insertions(+), 6 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 2120a96b9..435ce24b4 100644 --- a/include/functions.php +++ b/include/functions.php @@ -286,11 +286,12 @@ global $fetch_last_error; if (function_exists('curl_init') && !ini_get("open_basedir")) { - $ch = curl_init($url); + //$ch = curl_init($url); + $ch = curl_init(geturl($url)); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : 15); curl_setopt($ch, CURLOPT_TIMEOUT, $timeout ? $timeout : 45); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, 20); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); @@ -3906,6 +3907,55 @@ function implements_interface($class, $interface) { return in_array($interface, class_implements($class)); } + + function geturl($url){ + + (function_exists('curl_init')) ? '' : die('cURL Must be installed for geturl function to work. Ask your host to enable it or uncomment extension=php_curl.dll in php.ini'); + + $curl = curl_init(); + $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; + $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; + $header[] = "Cache-Control: max-age=0"; + $header[] = "Connection: keep-alive"; + $header[] = "Keep-Alive: 300"; + $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; + $header[] = "Accept-Language: en-us,en;q=0.5"; + $header[] = "Pragma: "; + + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0'); + curl_setopt($curl, CURLOPT_HTTPHEADER, $header); + curl_setopt($curl, CURLOPT_HEADER, true); + curl_setopt($curl, CURLOPT_REFERER, $url); + curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate'); + curl_setopt($curl, CURLOPT_AUTOREFERER, true); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + //curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); //CURLOPT_FOLLOWLOCATION Disabled... + curl_setopt($curl, CURLOPT_TIMEOUT, 60); + + $html = curl_exec($curl); + + $status = curl_getinfo($curl); + curl_close($curl); + + if($status['http_code']!=200){ + if($status['http_code'] == 301 || $status['http_code'] == 302) { + list($header) = explode("\r\n\r\n", $html, 2); + $matches = array(); + preg_match("/(Location:|URI:)[^(\n)]*/", $header, $matches); + $url = trim(str_replace($matches[1],"",$matches[0])); + $url_parsed = parse_url($url); + return (isset($url_parsed))? geturl($url, $referer):''; + } + $oline=''; + foreach($status as $key=>$eline){$oline.='['.$key.']'.$eline.' ';} + $line =$oline." \r\n ".$url."\r\n-----------------\r\n"; + $handle = @fopen('./curl.error.log', 'a'); + fwrite($handle, $line); + return FALSE; + } + return $url; + } function get_minified_js($files) { require_once 'lib/jshrink/Minifier.php'; diff --git a/include/sanity_check.php b/include/sanity_check.php index 4925486a3..0373196b3 100644 --- a/include/sanity_check.php +++ b/include/sanity_check.php @@ -125,9 +125,9 @@ array_push($errors, "PHP support for iconv is required to handle multiple charsets."); } - if (ini_get("safe_mode")) { + /* if (ini_get("safe_mode")) { array_push($errors, "PHP safe mode setting is not supported."); - } + } */ if ((PUBSUBHUBBUB_HUB || PUBSUBHUBBUB_ENABLED) && !function_exists("curl_init")) { array_push($errors, "PHP support for CURL is required for PubSubHubbub."); diff --git a/plugins/af_unburn/init.php b/plugins/af_unburn/init.php index b68796fb4..a0c51c97e 100644 --- a/plugins/af_unburn/init.php +++ b/plugins/af_unburn/init.php @@ -29,11 +29,11 @@ class Af_Unburn extends Plugin { if (strpos($article["plugin_data"], "unburn,$owner_uid:") === FALSE) { - $ch = curl_init($article["link"]); + $ch = curl_init(geturl($article["link"])); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT); $contents = @curl_exec($ch); @@ -74,5 +74,54 @@ class Af_Unburn extends Plugin { return $article; } + + function geturl($url){ + + (function_exists('curl_init')) ? '' : die('cURL Must be installed for geturl function to work. Ask your host to enable it or uncomment extension=php_curl.dll in php.ini'); + + $curl = curl_init(); + $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; + $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; + $header[] = "Cache-Control: max-age=0"; + $header[] = "Connection: keep-alive"; + $header[] = "Keep-Alive: 300"; + $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; + $header[] = "Accept-Language: en-us,en;q=0.5"; + $header[] = "Pragma: "; + + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0'); + curl_setopt($curl, CURLOPT_HTTPHEADER, $header); + curl_setopt($curl, CURLOPT_HEADER, true); + curl_setopt($curl, CURLOPT_REFERER, $url); + curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate'); + curl_setopt($curl, CURLOPT_AUTOREFERER, true); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + //curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); //CURLOPT_FOLLOWLOCATION Disabled... + curl_setopt($curl, CURLOPT_TIMEOUT, 60); + + $html = curl_exec($curl); + + $status = curl_getinfo($curl); + curl_close($curl); + + if($status['http_code']!=200){ + if($status['http_code'] == 301 || $status['http_code'] == 302) { + list($header) = explode("\r\n\r\n", $html, 2); + $matches = array(); + preg_match("/(Location:|URI:)[^(\n)]*/", $header, $matches); + $url = trim(str_replace($matches[1],"",$matches[0])); + $url_parsed = parse_url($url); + return (isset($url_parsed))? geturl($url, $referer):''; + } + $oline=''; + foreach($status as $key=>$eline){$oline.='['.$key.']'.$eline.' ';} + $line =$oline." \r\n ".$url."\r\n-----------------\r\n"; + $handle = @fopen('./curl.error.log', 'a'); + fwrite($handle, $line); + return FALSE; + } + return $url; + } } ?> -- cgit v1.2.3-54-g00ecf From 7b6c1ca77161841ebe54e0e1dc88576f58937b19 Mon Sep 17 00:00:00 2001 From: Valdis Vitolins Date: Mon, 18 Mar 2013 23:13:30 +0200 Subject: Resource files for Latvian were added --- include/functions.php | 1 + 1 file changed, 1 insertion(+) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 2120a96b9..ee5e5f965 100644 --- a/include/functions.php +++ b/include/functions.php @@ -51,6 +51,7 @@ "hu_HU" => "Magyar (Hungarian)", "it_IT" => "Italiano", "ja_JP" => "日本語 (Japanese)", + "lv_LV" => "Latviešu", "nb_NO" => "Norwegian bokmål", "pl_PL" => "Polski", "ru_RU" => "Русский", -- cgit v1.2.3-54-g00ecf From e88c1943577993ffc661e92456b3d20c3b4ce832 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 09:02:40 +0400 Subject: print_select, print_select_hash: remove line endings and other special symbols (closes #593) --- include/functions.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 538e43118..43a9270eb 100644 --- a/include/functions.php +++ b/include/functions.php @@ -461,6 +461,8 @@ else $sel = ""; + $v = trim($v); + print ""; } print ""; @@ -474,6 +476,8 @@ else $sel = ""; + $v = trim($v); + print ""; } @@ -3908,7 +3912,7 @@ function implements_interface($class, $interface) { return in_array($interface, class_implements($class)); } - + function geturl($url){ (function_exists('curl_init')) ? '' : die('cURL Must be installed for geturl function to work. Ask your host to enable it or uncomment extension=php_curl.dll in php.ini'); -- cgit v1.2.3-54-g00ecf From f0bd8e6531756b151c051f8ce5099d59408e71db Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 09:25:36 +0400 Subject: rework image caching to work without permanent article content rewriting (refs #582) --- image.php | 35 ++++++++++++++++++++++++++++------- include/functions.php | 15 +++++++++++---- include/rssfuncs.php | 2 +- 3 files changed, 40 insertions(+), 12 deletions(-) (limited to 'include/functions.php') diff --git a/image.php b/image.php index 91490ea22..cccda64b7 100644 --- a/image.php +++ b/image.php @@ -4,14 +4,35 @@ require_once "config.php"; - $url = base64_decode($_GET['url']); + // backwards compatible wrapper for old-style image caching + /* if (isset($_GET['url'])) { + $url = base64_decode($_GET['url']); - $filename = CACHE_DIR . '/images/' . sha1($url) . '.png'; + $filename = CACHE_DIR . '/images/' . sha1($url) . '.png'; - if (file_exists($filename)) { - header("Content-type: image/png"); - echo file_get_contents($filename); - } else { - header("Location: $url"); + if (file_exists($filename)) { + header("Content-type: image/png"); + echo file_get_contents($filename); + } else { + header("Location: $url"); + } + + return; + } */ + + @$hash = basename($_GET['hash']); + + if ($hash) { + + $filename = CACHE_DIR . '/images/' . $hash . '.png'; + + if (file_exists($filename)) { + header("Content-type: image/png"); + echo file_get_contents($filename); + } else { + header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found"); + echo "File not found."; + } } + ?> diff --git a/include/functions.php b/include/functions.php index 43a9270eb..a7ac07b53 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2590,10 +2590,17 @@ $entry->setAttribute('href', rewrite_relative_url($site_url, $entry->getAttribute('href'))); - if ($entry->hasAttribute('src')) - if (preg_match('/^image.php\?i=[a-z0-9]+$/', $entry->getAttribute('src')) == 0) - $entry->setAttribute('src', - rewrite_relative_url($site_url, $entry->getAttribute('src'))); + if ($entry->hasAttribute('src')) { + $src = rewrite_relative_url($site_url, $entry->getAttribute('src')); + + $cached_filename = CACHE_DIR . '/images/' . sha1($src) . '.png'; + + if (file_exists($cached_filename)) { + $src = SELF_URL_PATH . '/image.php?hash=' . sha1($src); + } + + $entry->setAttribute('src', $src); + } } if (strtolower($entry->nodeName) == "a") { diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 1180b0adb..df1d16986 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -581,7 +581,7 @@ } if ($cache_images && is_writable(CACHE_DIR . '/images')) - $entry_content = cache_images($entry_content, $site_url, $debug_enabled); + cache_images($entry_content, $site_url, $debug_enabled); $entry_content = db_escape_string($entry_content, false); -- cgit v1.2.3-54-g00ecf From a0f2a3e105074380f0f02882ab075ec48049f63a Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 09:33:04 +0400 Subject: enable automatic decoding of gzipped data when CURL is not available --- include/functions.php | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index a7ac07b53..ad87c90c2 100644 --- a/include/functions.php +++ b/include/functions.php @@ -352,6 +352,9 @@ $data = @file_get_contents($url); + $gzdecoded = gzdecode($data); + if ($gzdecoded) $data = $gzdecoded; + if (!$data && function_exists('error_get_last')) { $error = error_get_last(); $fetch_last_error = $error["message"]; @@ -3997,4 +4000,123 @@ return $rv; } + // http://php.net/gzencode + function gzdecode($data) { + $len = strlen($data); + if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { + return null; // Not GZIP format (See RFC 1952) + } + $method = ord(substr($data,2,1)); // Compression method + $flags = ord(substr($data,3,1)); // Flags + if ($flags & 31 != $flags) { + // Reserved bits are set -- NOT ALLOWED by RFC 1952 + return null; + } + // NOTE: $mtime may be negative (PHP integer limitations) + $mtime = unpack("V", substr($data,4,4)); + $mtime = $mtime[1]; + $xfl = substr($data,8,1); + $os = substr($data,8,1); + $headerlen = 10; + $extralen = 0; + $extra = ""; + if ($flags & 4) { + // 2-byte length prefixed EXTRA data in header + if ($len - $headerlen - 2 < 8) { + return false; // Invalid format + } + $extralen = unpack("v",substr($data,8,2)); + $extralen = $extralen[1]; + if ($len - $headerlen - 2 - $extralen < 8) { + return false; // Invalid format + } + $extra = substr($data,10,$extralen); + $headerlen += 2 + $extralen; + } + + $filenamelen = 0; + $filename = ""; + if ($flags & 8) { + // C-style string file NAME data in header + if ($len - $headerlen - 1 < 8) { + return false; // Invalid format + } + $filenamelen = strpos(substr($data,8+$extralen),chr(0)); + if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { + return false; // Invalid format + } + $filename = substr($data,$headerlen,$filenamelen); + $headerlen += $filenamelen + 1; + } + + $commentlen = 0; + $comment = ""; + if ($flags & 16) { + // C-style string COMMENT data in header + if ($len - $headerlen - 1 < 8) { + return false; // Invalid format + } + $commentlen = strpos(substr($data,8+$extralen+$filenamelen),chr(0)); + if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { + return false; // Invalid header format + } + $comment = substr($data,$headerlen,$commentlen); + $headerlen += $commentlen + 1; + } + + $headercrc = ""; + if ($flags & 1) { + // 2-bytes (lowest order) of CRC32 on header present + if ($len - $headerlen - 2 < 8) { + return false; // Invalid format + } + $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; + $headercrc = unpack("v", substr($data,$headerlen,2)); + $headercrc = $headercrc[1]; + if ($headercrc != $calccrc) { + return false; // Bad header CRC + } + $headerlen += 2; + } + + // GZIP FOOTER - These be negative due to PHP's limitations + $datacrc = unpack("V",substr($data,-8,4)); + $datacrc = $datacrc[1]; + $isize = unpack("V",substr($data,-4)); + $isize = $isize[1]; + + // Perform the decompression: + $bodylen = $len-$headerlen-8; + if ($bodylen < 1) { + // This should never happen - IMPLEMENTATION BUG! + return null; + } + $body = substr($data,$headerlen,$bodylen); + $data = ""; + if ($bodylen > 0) { + switch ($method) { + case 8: + // Currently the only supported compression method: + $data = gzinflate($body); + break; + default: + // Unknown compression method + return false; + } + } else { + // I'm not sure if zero-byte body content is allowed. + // Allow it for now... Do nothing... + } + + // Verifiy decompressed size and CRC32: + // NOTE: This may fail with large data sizes depending on how + // PHP's integer limitations affect strlen() since $isize + // may be negative for large sizes. + if ($isize != strlen($data) || crc32($data) != $datacrc) { + // Bad format! Length or CRC doesn't match! + return false; + } + return $data; + } + ?> -- cgit v1.2.3-54-g00ecf From 254a3f56a901d94a99a1a425ceecf62e8fd06051 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 11:13:57 +0400 Subject: add prototype simple remover of baaaad tags based on domdocument --- include/functions.php | 40 ++- lib/htmLawed.php | 723 -------------------------------------------------- 2 files changed, 35 insertions(+), 728 deletions(-) delete mode 100644 lib/htmLawed.php (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index ad87c90c2..341177b0a 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2611,16 +2611,46 @@ } } - //$node = $doc->getElementsByTagName('body')->item(0); + $entries = $xpath->query('//iframe'); + foreach ($entries as $entry) { + $entry->setAttribute('sandbox', true); + } $doc->removeChild($doc->firstChild); //remove doctype + $doc = strip_harmful_tags($doc); $res = $doc->saveHTML(); + return $res; + } - $config = array('safe' => 1, 'deny_attribute' => 'style, width, height, class, id', 'comment' => 1, 'cdata' => 1, 'balance' => 0); - $spec = 'img=width,height'; - $res = htmLawed($res, $config, $spec); + function strip_harmful_tags($doc) { + $entries = $doc->getElementsByTagName("*"); - return $res; + $allowed_elements = array('p', 'br', 'div', 'table', 'tr', 'td', 'th', + 'ul', 'ol', 'li', 'blockquote', 'span', 'html', 'body', 'a', 'img', + 'iframe', 'video', 'audio', 'source'); + + $disallowed_attributes = array('id', 'style', 'class'); + + foreach ($entries as $entry) { + if (!in_array($entry->nodeName, $allowed_elements)) { + $entry->parentNode->removeChild($entry); + } + + if ($entry->hasAttributes()) { + foreach (iterator_to_array($entry->attributes) as $attr) { + + if (strpos($attr->nodeName, 'on') === 0) { + $entry->removeAttributeNode($attr); + } + + if (in_array($attr->nodeName, $disallowed_attributes)) { + $entry->removeAttributeNode($attr); + } + } + } + } + + return $doc; } function check_for_update($link) { diff --git a/lib/htmLawed.php b/lib/htmLawed.php deleted file mode 100644 index 3ef5a971e..000000000 --- a/lib/htmLawed.php +++ /dev/null @@ -1,723 +0,0 @@ -1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby -if(!empty($C['safe'])){ - unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']); -} -$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; -if($x == '-*'){$e = array();} -elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));} -else{ - if(isset($x[1])){ - preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER); - for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];} - foreach($m as $v){ - if($v[0] == '+'){$e[substr($v, 1)] = 1;} - if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);} - } - } -} -$C['elements'] =& $e; -// config attrs -$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : ''; -$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : ''))); -if(isset($x['on*'])){ - unset($x['on*']); - $x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1); -} -$C['deny_attribute'] = $x; -// config URL -$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https'; -$C['schemes'] = array(); -foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){ - $x = $x2 = null; list($x, $x2) = explode(':', $v, 2); - if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));} -} -if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);} -if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);} -$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; -if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){ - $C['base_url'] = $C['abs_url'] = 0; -} -// config rest -$C['and_mark'] = empty($C['and_mark']) ? 0 : 1; -$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0; -$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; -$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1; -$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); -$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; -$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); -$C['css_expression'] = empty($C['css_expression']) ? 0 : 1; -$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; -$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; -$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; -$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; -$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; -$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1; -$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; -$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1; -$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; -$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; -$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; -$C['style_pass'] = empty($C['style_pass']) ? 0 : 1; -$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; -$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1; -$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; - -if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];} -$GLOBALS['C'] = $C; -$S = is_array($S) ? $S : hl_spec($S); -if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];} -$GLOBALS['S'] = $S; - -$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); -if($C['clean_ms_char']){ - $x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ'); - $x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"')); - $t = strtr($t, $x); -} -if($C['cdata'] or $C['comment']){$t = preg_replace_callback('``sm', 'hl_cmtcd', $t);} -$t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t)); -if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();} -if($C['hook']){$t = $C['hook']($t, $C, $S);} -if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){ - $GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime()); -} -// main -$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t); -$t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t; -$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t; -$t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t; -unset($C, $e); -if(isset($reC)){$GLOBALS['C'] = $reC;} -if(isset($reS)){$GLOBALS['S'] = $reS;} -return $t; -// eof -} - -function hl_attrval($t, $p){ -// check attr val against $S -$o = 1; $l = strlen($t); -foreach($p as $k=>$v){ - switch($k){ - case 'maxlen':if($l > $v){$o = 0;} - break; case 'minlen': if($l < $v){$o = 0;} - break; case 'maxval': if((float)($t) > $v){$o = 0;} - break; case 'minval': if((float)($t) < $v){$o = 0;} - break; case 'match': if(!preg_match($v, $t)){$o = 0;} - break; case 'nomatch': if(preg_match($v, $t)){$o = 0;} - break; case 'oneof': - $m = 0; - foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}} - $o = $m; - break; case 'noneof': - $m = 1; - foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}} - $o = $m; - break; default: - break; - } - if(!$o){break;} -} -return ($o ? $t : (isset($p['default']) ? $p['default'] : 0)); -// eof -} - -function hl_bal($t, $do=1, $in='div'){ -// balance tags -// by content -$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block -$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty -$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI -$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline -$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal -$cN2 = array_keys($cN); -$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child -if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);} -$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other -$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing -// block/inline type; ins & del both type; #pcdata: text -$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1); -$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); -$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values -$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI -$eF = $eB + $eI; - -// $in sets allowed child -$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div'; -if(isset($cE[$in])){ - return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t)); -} -if(isset($cS[$in])){$inOk = $cS[$in];} -elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;} -elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);} -elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);} -if(isset($cO[$in])){$inOk = $inOk + $cO[$in];} -if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);} - -$t = explode('<', $t); -$ok = $q = array(); // $q seq list of open non-empty ele -ob_start(); - -for($i=-1, $ci=count($t); ++$i<$ci;){ - // allowed $ok in parent $p - if($ql = count($q)){ - $p = array_pop($q); - $q[] = $p; - if(isset($cS[$p])){$ok = $cS[$p];} - elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} - elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} - elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} - if(isset($cO[$p])){$ok = $ok + $cO[$p];} - if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} - }else{$ok = $inOk; unset($cI['del'], $cI['ins']);} - // bad tags, & ele content - if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ - echo '<', $s, $e, $a, '>'; - } - if(isset($x[0])){ - if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ - echo '
', $x, '
'; - } - elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} - elseif(strpos($x, "\x02\x04")){ - foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ - echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); - } - }elseif($do > 4){echo preg_replace('`\S`', '', $x);} - } - // get markup - if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} - $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r; - // close tag - if($s){ - if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen - if($p == $e){array_pop($q); echo ''; unset($e); continue;} // Last open - $add = ''; // Nesting - close open tags that need to be - for($j=-1, $cj=count($q); ++$j<$cj;){ - if(($d = array_pop($q)) == $e){break;} - else{$add .= "";} - } - echo $add, ''; unset($e); continue; - } - // open tag - // $cB ele needs $eB ele as child - if(isset($cB[$e]) && strlen(trim($x))){ - $t[$i] = "{$e}{$a}>"; - array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue; - } - if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){ - array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue; - } - // if no open ele, $in = parent; mostly immediate parent-child relation should hold - if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){ - if(!isset($ok[$e])){ - if($ql && isset($cT[$p])){echo ''; unset($e, $x); --$i;} - continue; - } - if(!isset($cE[$e])){$q[] = $e;} - echo '<', $e, $a, '>'; unset($e); continue; - } - // specific parent-child - if(isset($cS[$p][$e])){ - if(!isset($cE[$e])){$q[] = $e;} - echo '<', $e, $a, '>'; unset($e); continue; - } - // nesting - $add = ''; - $q2 = array(); - for($k=-1, $kc=count($q); ++$k<$kc;){ - $d = $q[$k]; - $ok2 = array(); - if(isset($cS[$d])){$q2[] = $d; continue;} - $ok2 = isset($cI[$d]) ? $eI : $eF; - if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];} - if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);} - if(!isset($ok2[$e])){ - if(!$k && !isset($inOk[$e])){continue 2;} - $add = ""; - for(;++$k<$kc;){$add = "{$add}";} - break; - } - else{$q2[] = $d;} - } - $q = $q2; - if(!isset($cE[$e])){$q[] = $e;} - echo $add, '<', $e, $a, '>'; unset($e); continue; -} - -// end -if($ql = count($q)){ - $p = array_pop($q); - $q[] = $p; - if(isset($cS[$p])){$ok = $cS[$p];} - elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} - elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} - elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} - if(isset($cO[$p])){$ok = $ok + $cO[$p];} - if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} -}else{$ok = $inOk; unset($cI['del'], $cI['ins']);} -if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ - echo '<', $s, $e, $a, '>'; -} -if(isset($x[0])){ - if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ - echo '
', $x, '
'; - } - elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} - elseif(strpos($x, "\x02\x04")){ - foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ - echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); - } - }elseif($do > 4){echo preg_replace('`\S`', '', $x);} -} -while(!empty($q) && ($e = array_pop($q))){echo '';} -$o = ob_get_contents(); -ob_end_clean(); -return $o; -// eof -} - -function hl_cmtcd($t){ -// comment/CDATA sec handler -$t = $t[0]; -global $C; -if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;} -if($v == 1){return '';} -if($n == 'comment'){ - if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';} -} -else{$t = substr($t, 1, -1);} -$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; -return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); -// eof -} - -function hl_ent($t){ -// entitity handler -global $C; -$t = $t[1]; -static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1); -static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255'); -if($t[0] != '#'){ - return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';'; -} -if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){ - return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};"; -} -return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';'; -// eof -} - -function hl_prot($p, $c=null){ -// check URL scheme -global $C; -$b = $a = ''; -if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);} -$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; -static $d = 'denied:'; -if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} -if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param -if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot - return "{$b}{$d}{$p}{$a}"; -} -if($C['abs_url']){ - if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel - $p = substr($p, strlen($C['base_url'])); - }elseif(empty($m[1])){ // Make URL abs - if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;} - elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;} - elseif(strcspn($p, './')){$p = $C['base_url']. $p;} - else{ - preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m); - $p = preg_replace('`(?<=/)\./`', '', $m[2]. $p); - while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){ - $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p); - } - $p = $m[1]. $p; - } - } -} -return "{$b}{$p}{$a}"; -// eof -} - -function hl_regex($p){ -// ?regex -if(empty($p)){return 0;} -if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;} -else{ini_set('track_errors', 1);} -unset($php_errormsg); -if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);} -preg_match($p, ''); -if($d){ini_set('display_errors', 1);} -$r = isset($php_errormsg) ? 0 : 1; -if($t){$php_errormsg = isset($o) ? $o : null;} -else{ini_set('track_errors', 0);} -return $r; -// eof -} - -function hl_spec($t){ -// final $spec -$s = array(); -$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t))); -for($i = count(($t = explode(';', $t))); --$i>=0;){ - $w = $t[$i]; - if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;} - $y = $n = array(); - foreach(explode(',', $a) as $v){ - if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;} - if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;} - if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;} - if(!isset($m[2])){$y[$x] = 1; continue;} - foreach(explode('/', $m[2]) as $m){ - if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;} - $y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1)); - } - if(isset($y[$x]['match']) && !hl_regex($y[$x]['match'])){unset($y[$x]['match']);} - if(isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);} - } - if(!count($y) && !count($n)){continue;} - foreach(explode(',', substr($w, 0, $e)) as $v){ - if(!strlen(($v = strtolower($v)))){continue;} - if(count($y)){$s[$v] = $y;} - if(count($n)){$s[$v]['n'] = $n;} - } -} -return $s; -// eof -} - -function hl_tag($t){ -// tag/attribute handler -global $C; -$t = $t[0]; -// invalid < > -if($t == '< '){return '< ';} -if($t == '>'){return '>';} -if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)){ - return str_replace(array('<', '>'), array('<', '>'), $t); -}elseif(!isset($C['elements'][($e = strtolower($m[2]))])){ - return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''); -} -// attr string -$a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3])); -// tag transform -static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated -if($C['make_tag_strict'] && isset($eD[$e])){ - $trt = hl_tag2($e, $a, $C['make_tag_strict']); - if(!$e){return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');} -} -// close tag -static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele -if(!empty($m[1])){ - return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); -} - -// open tag & attr -static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'flashvars'=>array('embed'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific -static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty -static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src -static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions - -if($C['lc_std_val']){ - // predef attr vals for $eAL & $aNE ele - static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1); - static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1); - $lcase = isset($eAL[$e]) ? 1 : 0; -} - -$depTr = 0; -if($C['no_deprecated_attr']){ - // dep attr:applicable ele - static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1)); - static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1); - $depTr = isset($eAD[$e]) ? 1 : 0; -} - -// attr name-vals -if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comment/CDATA sec -$mode = 0; $a = trim($a, ' /'); $aA = array(); -while(strlen($a)){ - $w = 0; - switch($mode){ - case 0: // Name - if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){ - $nm = strtolower($m[0]); - $w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0]))); - } - break; case 1: - if($a[0] == '='){ // = - $w = 1; $mode = 2; $a = ltrim($a, '= '); - }else{ // No val - $w = 1; $mode = 0; $a = ltrim($a); - $aA[$nm] = ''; - } - break; case 2: // Val - if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ - $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; - $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); - } - break; - } - if($w == 0){ // Parse errs, deal with space, " & ' - $a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a); - $mode = 0; - } -} -if($mode == 1){$aA[$nm] = '';} - -// clean attrs -global $S; -$rl = isset($S[$e]) ? $S[$e] : array(); -$a = array(); $nfr = 0; -foreach($aA as $k=>$v){ - if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){ - if(isset($aNE[$k])){$v = $k;} - elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues - $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; - } - if($k == 'style' && !$C['style_pass']){ - if(false !== strpos($v, '&#')){ - static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\'); - $v = strtr($v, $sC); - } - $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); - $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; - }elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){ - $v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); - $v = hl_prot($v, $k); - if($k == 'href'){ // X-spam - if($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0){ - $v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v); - }elseif($C['anti_link_spam']){ - $r1 = $C['anti_link_spam'][1]; - if(!empty($r1) && preg_match($r1, $v)){continue;} - $r0 = $C['anti_link_spam'][0]; - if(!empty($r0) && preg_match($r0, $v)){ - if(isset($a['rel'])){ - if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';} - }elseif(isset($aA['rel'])){ - if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;} - }else{$a['rel'] = 'nofollow';} - } - } - } - } - if(isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($v, $rl[$k])) === 0){continue;} - $a[$k] = str_replace('"', '"', $v); - } -} -if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';} - -// rqd attr -static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50')); -if(isset($eAR[$e])){ - foreach($eAR[$e] as $k=>$v){ - if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;} - } -} - -// depr attrs -if($depTr){ - $c = array(); - foreach($a as $k=>$v){ - if($k == 'style' or !isset($aND[$k][$e])){continue;} - if($k == 'align'){ - unset($a['align']); - if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;} - elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';} - else{$c[] = 'text-align: '. $v;} - }elseif($k == 'bgcolor'){ - unset($a['bgcolor']); - $c[] = 'background-color: '. $v; - }elseif($k == 'border'){ - unset($a['border']); $c[] = "border: {$v}px"; - }elseif($k == 'bordercolor'){ - unset($a['bordercolor']); $c[] = 'border-color: '. $v; - }elseif($k == 'clear'){ - unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both'); - }elseif($k == 'compact'){ - unset($a['compact']); $c[] = 'font-size: 85%'; - }elseif($k == 'height' or $k == 'width'){ - unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto'); - }elseif($k == 'hspace'){ - unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px"; - }elseif($k == 'language' && !isset($a['type'])){ - unset($a['language']); - $a['type'] = 'text/'. strtolower($v); - }elseif($k == 'name'){ - if($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);} - if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;} - }elseif($k == 'noshade'){ - unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray'; - }elseif($k == 'nowrap'){ - unset($a['nowrap']); $c[] = 'white-space: nowrap'; - }elseif($k == 'size'){ - unset($a['size']); $c[] = 'size: '. $v. 'px'; - }elseif($k == 'start' or $k == 'value'){ - unset($a[$k]); - }elseif($k == 'type'){ - unset($a['type']); - static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal'); - $c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal'); - }elseif($k == 'vspace'){ - unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px"; - } - } - if(count($c)){ - $c = implode('; ', $c); - $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';'; - } -} -// unique ID -if($C['unique_ids'] && isset($a['id'])){ - if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)){unset($a['id']); - }else{ - while(isset($GLOBALS['hl_Ids'][$id])){$id = $C['unique_ids']. $id;} - $GLOBALS['hl_Ids'][($a['id'] = $id)] = 1; - } -} -// xml:lang -if($C['xml:lang'] && isset($a['lang'])){ - $a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang']; - if($C['xml:lang'] == 2){unset($a['lang']);} -} -// for transformed tag -if(!empty($trt)){ - $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt; -} -// return with empty ele / -if(empty($C['hook_tag'])){ - $aA = ''; - foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";} - return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>'; -} -else{return $C['hook_tag']($e, $a);} -// eof -} - -function hl_tag2(&$e, &$a, $t=1){ -// transform tag -if($e == 'center'){$e = 'div'; return 'text-align: center;';} -if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';} -if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';} -if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';} -static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); -if($e == 'font'){ - $a2 = ''; - if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ - $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; - } - if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ - $a2 .= ' color: '. trim($m[2]). ';'; - } - if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){ - $a2 .= ' font-size: '. $fs[$m]. ';'; - } - $e = 'span'; return ltrim($a2); -} -if($t == 2){$e = 0; return 0;} -return ''; -// eof -} - -function hl_tidy($t, $w, $p){ -// Tidy/compact HTM -if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' ]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); -if(($w = strtolower($w)) == -1){ - return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); -} -$s = strpos(" $w", 't') ? "\t" : ' '; -$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); -$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; -$a = array('br'=>1); -$b = array('button'=>1, 'input'=>1, 'option'=>1); -$c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); -$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -$T = explode('<', $t); -$X = 1; -while($X){ - $n = $N; - $t = $T; - ob_start(); - if(isset($d[$p])){echo str_repeat($s, ++$n);} - echo ltrim(array_shift($t)); - for($i=-1, $j=count($t); ++$i<$j;){ - $r = ''; list($e, $r) = explode('>', $t[$i]); - $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); - $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); - $e = "<$e>"; - if(isset($d[$y])){ - if(!$x){ - if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} - else{++$N; ob_end_clean(); continue 2;} - } - else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} - echo ltrim($r); continue; - } - $f = "\n". str_repeat($s, $n); - if(isset($c[$y])){ - if(!$x){echo $e, $f, ltrim($r);} - else{echo $f, $e, $r;} - }elseif(isset($b[$y])){echo $f, $e, $r; - }elseif(isset($a[$y])){echo $e, $f, ltrim($r); - }elseif(!$y){echo $f, $e, $f, ltrim($r); - }else{echo $e, $r;} - } - $X = 0; -} -$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()); -ob_end_clean(); -if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ - $t = str_replace("\n", $l, $t); -} -return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); -// eof -} - -function hl_version(){ -// rel -return '1.1.14'; -// eof -} - -function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){ -// kses compat -foreach($h as $k=>$v){ - $h[$k]['n']['*'] = 1; -} -$C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0; -$C['keep_bad'] = 1; -$C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*'; -$C['hook'] = 'kses_hook'; -$C['schemes'] = '*:'. implode(',', $p); -return htmLawed($t, $C, $h); -// eof -} - -function kses_hook($t, &$C, &$S){ -// kses compat -return $t; -// eof -} \ No newline at end of file -- cgit v1.2.3-54-g00ecf From 57e0ae9d4bcd37c5c4f9eaebabeaf12eb833bf71 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 11:14:46 +0400 Subject: remove include of htmlawed file --- include/functions.php | 1 - 1 file changed, 1 deletion(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 341177b0a..7fa200322 100644 --- a/include/functions.php +++ b/include/functions.php @@ -110,7 +110,6 @@ ini_set('user_agent', SELF_USER_AGENT); require_once 'lib/pubsubhubbub/publisher.php'; - require_once 'lib/htmLawed.php'; $tz_offset = -1; $utc_tz = new DateTimeZone('UTC'); -- cgit v1.2.3-54-g00ecf From 55783ca45c7d87a671796842386afd0f5eb31b89 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 12:42:39 +0400 Subject: remove duplicate gzdecode --- include/functions.php | 119 -------------------------------------------------- 1 file changed, 119 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 7fa200322..0e5d15eaf 100644 --- a/include/functions.php +++ b/include/functions.php @@ -4029,123 +4029,4 @@ return $rv; } - // http://php.net/gzencode - function gzdecode($data) { - $len = strlen($data); - if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { - return null; // Not GZIP format (See RFC 1952) - } - $method = ord(substr($data,2,1)); // Compression method - $flags = ord(substr($data,3,1)); // Flags - if ($flags & 31 != $flags) { - // Reserved bits are set -- NOT ALLOWED by RFC 1952 - return null; - } - // NOTE: $mtime may be negative (PHP integer limitations) - $mtime = unpack("V", substr($data,4,4)); - $mtime = $mtime[1]; - $xfl = substr($data,8,1); - $os = substr($data,8,1); - $headerlen = 10; - $extralen = 0; - $extra = ""; - if ($flags & 4) { - // 2-byte length prefixed EXTRA data in header - if ($len - $headerlen - 2 < 8) { - return false; // Invalid format - } - $extralen = unpack("v",substr($data,8,2)); - $extralen = $extralen[1]; - if ($len - $headerlen - 2 - $extralen < 8) { - return false; // Invalid format - } - $extra = substr($data,10,$extralen); - $headerlen += 2 + $extralen; - } - - $filenamelen = 0; - $filename = ""; - if ($flags & 8) { - // C-style string file NAME data in header - if ($len - $headerlen - 1 < 8) { - return false; // Invalid format - } - $filenamelen = strpos(substr($data,8+$extralen),chr(0)); - if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { - return false; // Invalid format - } - $filename = substr($data,$headerlen,$filenamelen); - $headerlen += $filenamelen + 1; - } - - $commentlen = 0; - $comment = ""; - if ($flags & 16) { - // C-style string COMMENT data in header - if ($len - $headerlen - 1 < 8) { - return false; // Invalid format - } - $commentlen = strpos(substr($data,8+$extralen+$filenamelen),chr(0)); - if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { - return false; // Invalid header format - } - $comment = substr($data,$headerlen,$commentlen); - $headerlen += $commentlen + 1; - } - - $headercrc = ""; - if ($flags & 1) { - // 2-bytes (lowest order) of CRC32 on header present - if ($len - $headerlen - 2 < 8) { - return false; // Invalid format - } - $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; - $headercrc = unpack("v", substr($data,$headerlen,2)); - $headercrc = $headercrc[1]; - if ($headercrc != $calccrc) { - return false; // Bad header CRC - } - $headerlen += 2; - } - - // GZIP FOOTER - These be negative due to PHP's limitations - $datacrc = unpack("V",substr($data,-8,4)); - $datacrc = $datacrc[1]; - $isize = unpack("V",substr($data,-4)); - $isize = $isize[1]; - - // Perform the decompression: - $bodylen = $len-$headerlen-8; - if ($bodylen < 1) { - // This should never happen - IMPLEMENTATION BUG! - return null; - } - $body = substr($data,$headerlen,$bodylen); - $data = ""; - if ($bodylen > 0) { - switch ($method) { - case 8: - // Currently the only supported compression method: - $data = gzinflate($body); - break; - default: - // Unknown compression method - return false; - } - } else { - // I'm not sure if zero-byte body content is allowed. - // Allow it for now... Do nothing... - } - - // Verifiy decompressed size and CRC32: - // NOTE: This may fail with large data sizes depending on how - // PHP's integer limitations affect strlen() since $isize - // may be negative for large sizes. - if ($isize != strlen($data) || crc32($data) != $datacrc) { - // Bad format! Length or CRC doesn't match! - return false; - } - return $data; - } - ?> -- cgit v1.2.3-54-g00ecf From 4f7d69e1856a611025f53eef273e5af039d9aa16 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 12:49:55 +0400 Subject: detect whether browser supports iframe.sandbox and allow iframes accordingly; allow object and embed elements --- classes/rpc.php | 1 + include/functions.php | 4 +++- js/tt-rss.js | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/functions.php') diff --git a/classes/rpc.php b/classes/rpc.php index 5d77b1ae8..b297bbade 100644 --- a/classes/rpc.php +++ b/classes/rpc.php @@ -285,6 +285,7 @@ class RPC extends Handler_Protected { function sanityCheck() { $_SESSION["hasAudio"] = $_REQUEST["hasAudio"] === "true"; + $_SESSION["hasSandbox"] = $_REQUEST["hasSandbox"] === "true"; $reply = array(); diff --git a/include/functions.php b/include/functions.php index 0e5d15eaf..50bdc13ae 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2626,7 +2626,9 @@ $allowed_elements = array('p', 'br', 'div', 'table', 'tr', 'td', 'th', 'ul', 'ol', 'li', 'blockquote', 'span', 'html', 'body', 'a', 'img', - 'iframe', 'video', 'audio', 'source'); + 'video', 'audio', 'source', 'object', 'embed'); + + if ($_SESSION['hasSandbox']) array_push($allowed_elements, 'iframe'); $disallowed_attributes = array('id', 'style', 'class'); diff --git a/js/tt-rss.js b/js/tt-rss.js index 5ada64d31..5968f58eb 100644 --- a/js/tt-rss.js +++ b/js/tt-rss.js @@ -244,9 +244,11 @@ function init() { loading_set_progress(20); var hasAudio = !!((myAudioTag = document.createElement('audio')).canPlayType); + var hasSandbox = "sandbox" in document.createElement("iframe"); new Ajax.Request("backend.php", { - parameters: {op: "rpc", method: "sanityCheck", hasAudio: hasAudio}, + parameters: {op: "rpc", method: "sanityCheck", hasAudio: hasAudio, + hasSandbox: hasSandbox}, onComplete: function(transport) { backend_sanity_check_callback(transport); } }); -- cgit v1.2.3-54-g00ecf From 10b55a120ce1c937325921b699cd6264d5c35e68 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 14:32:26 +0400 Subject: strip_harmful_tags: expand the allowed list --- include/functions.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 50bdc13ae..8dba74618 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2624,9 +2624,15 @@ function strip_harmful_tags($doc) { $entries = $doc->getElementsByTagName("*"); - $allowed_elements = array('p', 'br', 'div', 'table', 'tr', 'td', 'th', - 'ul', 'ol', 'li', 'blockquote', 'span', 'html', 'body', 'a', 'img', - 'video', 'audio', 'source', 'object', 'embed'); + $allowed_elements = array('a', 'address', 'audio', + 'b', 'big', 'blockquote', 'body', 'br', 'cite', + 'code', 'dd', 'del', 'details', 'div', 'dl', + 'dt', 'em', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'html', 'i', 'iframe', 'img', 'ins', 'kbd', + 'li', 'nav', 'ol', 'p', 'pre', 'q', 's','small', + 'source', 'span', 'strike', 'strong', 'sub', 'summary', + 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', + 'tr', 'track', 'tt', 'u', 'ul', 'var', 'wbr', 'video' ); if ($_SESSION['hasSandbox']) array_push($allowed_elements, 'iframe'); -- cgit v1.2.3-54-g00ecf From 6e61104d78ee30e5927e61a1a2273ca99e60ad98 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 15:12:22 +0400 Subject: strip_harmful_tags: remove double iframe --- include/functions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 8dba74618..3793e0426 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2628,7 +2628,7 @@ 'b', 'big', 'blockquote', 'body', 'br', 'cite', 'code', 'dd', 'del', 'details', 'div', 'dl', 'dt', 'em', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'html', 'i', 'iframe', 'img', 'ins', 'kbd', + 'header', 'html', 'i', 'img', 'ins', 'kbd', 'li', 'nav', 'ol', 'p', 'pre', 'q', 's','small', 'source', 'span', 'strike', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', -- cgit v1.2.3-54-g00ecf From e9b86f0a1c6165f4f06b1f7909889abfea96b620 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 18:58:42 +0400 Subject: reimplement HOOK_SANITIZE (plugs before final removal of harmful tags) --- classes/pluginhost.php | 1 + include/functions.php | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/functions.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 592629881..79544b01b 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -21,6 +21,7 @@ class PluginHost { const HOOK_RENDER_ARTICLE = 10; const HOOK_RENDER_ARTICLE_CDM = 11; const HOOK_FEED_FETCHED = 12; + const HOOK_SANITIZE = 13; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/functions.php b/include/functions.php index 3793e0426..d1743af7a 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2615,6 +2615,14 @@ $entry->setAttribute('sandbox', true); } + global $pluginhost; + + if (isset($pluginhost)) { + foreach ($pluginhost->get_hooks($pluginhost::HOOK_SANITIZE) as $plugin) { + $doc = $plugin->hook_sanitize($doc, $site_url); + } + } + $doc->removeChild($doc->firstChild); //remove doctype $doc = strip_harmful_tags($doc); $res = $doc->saveHTML(); -- cgit v1.2.3-54-g00ecf From d2db81a5f67dfc13bab7cf379d1182f7aa794eb1 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 19:13:56 +0400 Subject: improve hotkey buffer scrolling speed; bind viewport scrolling to shift-arrows --- include/functions.php | 2 ++ js/tt-rss.js | 8 ++++++-- js/viewfeed.js | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index d1743af7a..4eff1149e 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1946,6 +1946,8 @@ "c n" => "catchup_above", "*n" => "article_scroll_down", "*p" => "article_scroll_up", + "*(38)|Shift+up" => "article_scroll_up", + "*(40)|Shift+down" => "article_scroll_down", "a *w" => "toggle_widescreen", "e" => "email_article", "a q" => "close_article", diff --git a/js/tt-rss.js b/js/tt-rss.js index a8552d173..e9bc9d412 100644 --- a/js/tt-rss.js +++ b/js/tt-rss.js @@ -659,10 +659,14 @@ function hotkey_handler(e) { catchupRelativeToArticle(0); return false; case "article_scroll_down": - scrollArticle(50); + var ctr = $("content_insert") ? $("content_insert") : $("headlines-frame"); + + scrollArticle(ctr.offsetHeight/3); return false; case "article_scroll_up": - scrollArticle(-50); + var ctr = $("content_insert") ? $("content_insert") : $("headlines-frame"); + + scrollArticle(-ctr.offsetHeight/3); return false; case "close_article": closeArticlePanel(); diff --git a/js/viewfeed.js b/js/viewfeed.js index 622a8109f..9a16befff 100644 --- a/js/viewfeed.js +++ b/js/viewfeed.js @@ -560,11 +560,11 @@ function moveToPost(mode, noscroll) { var ctr = $("headlines-frame"); if (!noscroll && article && article.offsetTop < ctr.scrollTop) { - scrollArticle(-ctr.offsetHeight/2); + scrollArticle(-ctr.offsetHeight/3); } else if (!noscroll && prev_article && prev_article.offsetTop < ctr.scrollTop) { cdmExpandArticle(prev_id); - scrollArticle(-ctr.offsetHeight/2); + scrollArticle(-ctr.offsetHeight/3); } else if (prev_id) { cdmExpandArticle(prev_id); cdmScrollToArticleId(prev_id, noscroll); -- cgit v1.2.3-54-g00ecf From 2229e6ed6b07d4a28b04689a21c645bdb83652f7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Mar 2013 21:57:29 +0400 Subject: strip_harmful_tags: allow article --- include/functions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index 4eff1149e..affd4d4dc 100644 --- a/include/functions.php +++ b/include/functions.php @@ -2634,7 +2634,7 @@ function strip_harmful_tags($doc) { $entries = $doc->getElementsByTagName("*"); - $allowed_elements = array('a', 'address', 'audio', + $allowed_elements = array('a', 'address', 'audio', 'article', 'b', 'big', 'blockquote', 'body', 'br', 'cite', 'code', 'dd', 'del', 'details', 'div', 'dl', 'dt', 'em', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', -- cgit v1.2.3-54-g00ecf