From ebec81a6fb2dff0b2fe6b569b021e057995ee6c7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 19 Apr 2013 13:17:28 +0400 Subject: subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails --- include/rssfuncs.php | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/rssfuncs.php') diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 31d35bf8e..47d622169 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -316,6 +316,25 @@ _debug("update_rss_feed: fetch done."); } + $error = verify_feed_xml($feed_data); + + if ($error) { + if ($debug_enabled) { + _debug("update_rss_feed: error verifying XML, code: " . $error->code); + } + + if ($error->code == 26) { + if ($debug_enabled) { + _debug("update_rss_feed: got error 26, trying to decode entities..."); + } + + $feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8'); + + $error = verify_feed_xml($feed_data); + + if ($error) $feed_data = ''; + } + } } if (!$feed_data) { @@ -559,7 +578,7 @@ _debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]"); } - $entry_title = html_entity_decode($item->get_title()); + $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8'); $entry_link = rewrite_relative_url($site_url, $item->get_link()); @@ -1421,5 +1440,13 @@ mb_strtolower(strip_tags($title), 'utf-8')); } + function verify_feed_xml($feed_data) { + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadXML($feed_data); + $error = libxml_get_last_error(); + libxml_clear_errors(); + return $error; + } ?> -- cgit v1.2.3-54-g00ecf