From cd07592c29391ca374f78a75a10465f894d50042 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 17:04:57 +0400 Subject: add basic tinyparser/atom --- classes/feedparser.php | 101 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 classes/feedparser.php (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php new file mode 100644 index 000000000..274e871b9 --- /dev/null +++ b/classes/feedparser.php @@ -0,0 +1,101 @@ +doc = new DOMDocument(); + $this->doc->loadXML($data); + $this->error = $this->format_error(libxml_get_last_error()); + libxml_clear_errors(); + + $this->items = array(); + } + + function init() { + $root = $this->doc->firstChild; + + if ($root) { + switch ($root->tagName) { + case "rss": + $this->type = $this::FEED_RSS; + break; + case "feed": + $this->type = $this::FEED_ATOM; + break; + default: + $this->error = "Unknown/unsupported feed type"; + return; + } + + $xpath = new DOMXPath($this->doc); + + switch ($this->type) { + case $this::FEED_ATOM: + $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + + $title = $xpath->query("//atom:feed/atom:title")->item(0); + + if ($title) { + $this->title = $title->nodeValue; + } + + $link = $xpath->query("//atom:feed/atom:link[not(@rel)]")->item(0); + + if ($link && $link->hasAttributes()) { + $this->link = $link->getAttribute("href"); + } + + $articles = $xpath->query("//atom:entry"); + + foreach ($articles as $article) { + array_push($this->items, new FeedItem_Atom($article)); + } + + break; + case $this::FEED_RDF: + + break; + case $this::FEED_RSS: + break; + } + } + } + + function format_error($error) { + if ($error) { + return sprintf("LibXML error %s at line %d (column %d): %s", + $error->code, $error->line, $error->column, + $error->message); + } else { + return ""; + } + } + + function error() { + return $this->error; + } + + function get_link() { + return $this->link; + } + + function get_title() { + return $this->title; + } + + function get_items() { + return $this->items; + } + +} ?> -- cgit v1.2.3-54-g00ecf From 04d2f9c831b14f7295a3475746b9096402a055f0 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 17:38:16 +0400 Subject: add basic rss support --- classes/feeditem/atom.php | 5 ++- classes/feeditem/rss.php | 107 ++++++++++++++++++++++++++++++++++++++++++++++ classes/feedparser.php | 28 ++++++++++-- include/rssfuncs.php | 4 +- 4 files changed, 138 insertions(+), 6 deletions(-) create mode 100644 classes/feeditem/rss.php (limited to 'classes/feedparser.php') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 560484eda..0d888c443 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -17,8 +17,11 @@ class FeedItem_Atom { } function get_date() { + $updated = $this->elem->getElementsByTagName("updated")->item(0); - + if ($updated) { + return strtotime($updated->nodeValue); + } } function get_link() { diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php new file mode 100644 index 000000000..e7ea1df57 --- /dev/null +++ b/classes/feeditem/rss.php @@ -0,0 +1,107 @@ +elem = $elem; + } + + function get_id() { + return $this->get_link(); + } + + function get_date() { + $pubDate = $this->elem->getElementsByTagName("pubDate")->item(0); + + if ($pubDate) { + return strtotime($pubDate->nodeValue); + } + } + + function get_link() { + $link = $this->elem->getElementsByTagName("link")->item(0); + + if ($link) { + return $link->nodeValue; + } + } + + function get_title() { + $title = $this->elem->getElementsByTagName("title")->item(0); + + if ($title) { + return $title->nodeValue; + } + } + + function get_content() { + $content = $this->elem->getElementsByTagName("description")->item(0); + + if ($content) { + return $content->nodeValue; + } + } + + function get_description() { + $summary = $this->elem->getElementsByTagName("description")->item(0); + + if ($summary) { + return $summary->nodeValue; + } + } + + // todo + function get_comments_url() { + + } + + // todo + function get_comments_count() { + + } + + function get_categories() { + $categories = $this->elem->getElementsByTagName("category"); + $cats = array(); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } + + return $cats; + } + + function get_enclosures() { + $enclosures = $this->elem->getElementsByTagName("enclosure"); + + $encs = array(); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + + return $encs; + } + + function get_author() { + $author = $this->elem->getElementsByTagName("author")->item(0); + + if ($author) { + $name = $author->getElementsByTagName("name")->item(0); + + if ($name) return $name->nodeValue; + + $email = $author->getElementsByTagName("email")->item(0); + + if ($email) return $email->nodeValue; + + } + } +} +?> diff --git a/classes/feedparser.php b/classes/feedparser.php index 274e871b9..fb6344cc0 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -24,10 +24,14 @@ class FeedParser { function init() { $root = $this->doc->firstChild; + $xpath = new DOMXPath($this->doc); + $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + + $root = $xpath->query("(//atom:feed|//channel)")->item(0); if ($root) { switch ($root->tagName) { - case "rss": + case "channel": $this->type = $this::FEED_RSS; break; case "feed": @@ -38,11 +42,8 @@ class FeedParser { return; } - $xpath = new DOMXPath($this->doc); - switch ($this->type) { case $this::FEED_ATOM: - $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); $title = $xpath->query("//atom:feed/atom:title")->item(0); @@ -67,6 +68,25 @@ class FeedParser { break; case $this::FEED_RSS: + + $title = $xpath->query("//channel/title")->item(0); + + if ($title) { + $this->title = $title->nodeValue; + } + + $link = $xpath->query("//channel/link")->item(0); + + if ($link && $link->hasAttributes()) { + $this->link = $link->getAttribute("href"); + } + + $articles = $xpath->query("//channel/item"); + + foreach ($articles as $article) { + array_push($this->items, new FeedItem_RSS($article)); + } + break; } } diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 3b7f7d2cf..ec0ca5a4f 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -549,7 +549,9 @@ $entry_timestamp = ""; - $entry_timestamp = strtotime($item->get_date()); + $entry_timestamp = $item->get_date(); + + _debug("orig date: " . $item->get_date(), $debug_enabled); if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) { $entry_timestamp = time(); -- cgit v1.2.3-54-g00ecf From b8f316dc288d759938aaae1e49d4bd2093dc99a2 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 17:56:21 +0400 Subject: change caching to save xml data, remove RDF init section --- classes/feedparser.php | 3 --- include/rssfuncs.php | 13 ++++++------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php index fb6344cc0..ed284a043 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -63,9 +63,6 @@ class FeedParser { array_push($this->items, new FeedItem_Atom($article)); } - break; - case $this::FEED_RDF: - break; case $this::FEED_RSS: diff --git a/include/rssfuncs.php b/include/rssfuncs.php index ec0ca5a4f..21040e5a7 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -245,7 +245,7 @@ $date_feed_processed = date('Y-m-d H:i'); - $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".feed"; + $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".feed2"; // Ignore cache if new feed or manual update. $cache_age = ($no_cache || is_null($last_updated) || strpos($last_updated, '1970-01-01') === 0) ? 30 : get_feed_update_interval($feed) * 60; @@ -269,11 +269,10 @@ _debug("using local cache.", $debug_enabled); if ($cache_timestamp > $last_article_timestamp) { - @$rss_data = file_get_contents($cache_filename); + @$feed_data = file_get_contents($cache_filename); - if ($rss_data) { - $rss_hash = sha1($rss_data); - @$rss = unserialize($rss_data); + if ($feed_data) { + $rss_hash = sha1($feed_data); } } else if (!$force_refetch) { _debug("local cache valid and older than last_updated, nothing to do.", $debug_enabled); @@ -385,9 +384,9 @@ $rss_data = serialize($rss); $new_rss_hash = sha1($rss_data); - if ($new_rss_hash != $rss_hash) { + if ($new_rss_hash != $rss_hash && count($rss->get_items()) > 0 ) { _debug("saving $cache_filename", $debug_enabled); - //@file_put_contents($cache_filename, serialize($rss)); NOT YET + @file_put_contents($cache_filename, $rss_data); } } -- cgit v1.2.3-54-g00ecf From b9eee80e08fab8ebcc1484e8d62cd646ac451f43 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 19:08:04 +0400 Subject: parser: add get_links() --- classes/feedparser.php | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php index ed284a043..6d3a15802 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -6,6 +6,7 @@ class FeedParser { private $link; private $title; private $type; + private $xpath; const FEED_RDF = 0; const FEED_RSS = 1; @@ -26,6 +27,7 @@ class FeedParser { $root = $this->doc->firstChild; $xpath = new DOMXPath($this->doc); $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + $this->xpath = $xpath; $root = $xpath->query("(//atom:feed|//channel)")->item(0); @@ -115,4 +117,27 @@ class FeedParser { return $this->items; } + function get_links($rel) { + $rv = array(); + + switch ($this->type) { + case $this::FEED_ATOM: + $links = $this->xpath->query("//atom:feed/atom:link"); + + foreach ($links as $link) { + if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { + array_push($rv, $link->getAttribute('href')); + } + } + break; + case $this::FEED_RSS: + $links = $this->xpath->query("//channel/link"); + if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { + array_push($rv, $link->getAttribute('href')); + } + break; + } + + return $rv; + } } ?> -- cgit v1.2.3-54-g00ecf From 3c8060aca07af57f97885466a369afdf4c2d79d5 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 19:09:07 +0400 Subject: fix get_links() for RSS --- classes/feedparser.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php index 6d3a15802..f127ba73d 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -132,8 +132,10 @@ class FeedParser { break; case $this::FEED_RSS: $links = $this->xpath->query("//channel/link"); - if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { - array_push($rv, $link->getAttribute('href')); + foreach ($links as $link) { + if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { + array_push($rv, $link->getAttribute('href')); + } } break; } -- cgit v1.2.3-54-g00ecf From 4c00e15b5d05b0d137af9a33bf79172bfeb0df24 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 19:40:43 +0400 Subject: pass xpath object to feeditem, support media-rss objects --- classes/feeditem/atom.php | 2 +- classes/feeditem/rss.php | 18 +++++++++++++++++- classes/feedparser.php | 5 +++-- 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'classes/feedparser.php') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 0d888c443..7dc4ce5c1 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -2,7 +2,7 @@ class FeedItem_Atom { private $elem; - function __construct($elem) { + function __construct($elem, $doc, $xpath) { $this->elem = $elem; } diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index d5667102d..e5e2a8e56 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -1,9 +1,11 @@ elem = $elem; + $this->xpath = $xpath; } function get_id() { @@ -92,6 +94,20 @@ class FeedItem_RSS { array_push($encs, $enc); } + $enclosures = $this->xpath->query("media:content", $this->elem); + + $encs = array(); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + return $encs; } diff --git a/classes/feedparser.php b/classes/feedparser.php index f127ba73d..eabbb5b28 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -27,6 +27,7 @@ class FeedParser { $root = $this->doc->firstChild; $xpath = new DOMXPath($this->doc); $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/'); $this->xpath = $xpath; $root = $xpath->query("(//atom:feed|//channel)")->item(0); @@ -62,7 +63,7 @@ class FeedParser { $articles = $xpath->query("//atom:entry"); foreach ($articles as $article) { - array_push($this->items, new FeedItem_Atom($article)); + array_push($this->items, new FeedItem_Atom($article, $this->doc, $this->xpath)); } break; @@ -83,7 +84,7 @@ class FeedParser { $articles = $xpath->query("//channel/item"); foreach ($articles as $article) { - array_push($this->items, new FeedItem_RSS($article)); + array_push($this->items, new FeedItem_RSS($article, $this->doc, $this->xpath)); } break; -- cgit v1.2.3-54-g00ecf From 852d4ac89084edbee98df89ab066266cd9df63c9 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 20:30:52 +0400 Subject: support RDF-XML feeds --- classes/feedparser.php | 34 ++++++++++++++++++++++++++++++++-- include/rssfuncs.php | 8 +++++--- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php index eabbb5b28..8cb736a8b 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -28,12 +28,16 @@ class FeedParser { $xpath = new DOMXPath($this->doc); $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/'); + $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); $this->xpath = $xpath; - $root = $xpath->query("(//atom:feed|//channel)")->item(0); + $root = $xpath->query("(//atom:feed|//channel|//rdf:rdf|//rdf:RDF)")->item(0); if ($root) { - switch ($root->tagName) { + switch (mb_strtolower($root->tagName)) { + case "rdf:rdf": + $this->type = $this::FEED_RDF; + break; case "channel": $this->type = $this::FEED_RSS; break; @@ -88,7 +92,33 @@ class FeedParser { } break; + case $this::FEED_RDF: + $xpath->registerNamespace('rssfake', 'http://purl.org/rss/1.0/'); + + $title = $xpath->query("//rssfake:channel/rssfake:title")->item(0); + + if ($title) { + $this->title = $title->nodeValue; + } + + $link = $xpath->query("//rssfake:channel/rssfake:link")->item(0); + + if ($link) { + $this->link = $link->nodeValue; + } + + $articles = $xpath->query("//rssfake:item"); + + foreach ($articles as $article) { + array_push($this->items, new FeedItem_RSS($article, $this->doc, $this->xpath)); + } + + break; + } + } else { + $this->error = "Unknown/unsupported feed type"; + return; } } diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 229154696..57e63b873 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -448,10 +448,12 @@ $feed_title = db_escape_string($rss->get_title()); - _debug("registering title: $feed_title", $debug_enabled); + if ($feed_title) { + _debug("registering title: $feed_title", $debug_enabled); - db_query("UPDATE ttrss_feeds SET - title = '$feed_title' WHERE id = '$feed'"); + db_query("UPDATE ttrss_feeds SET + title = '$feed_title' WHERE id = '$feed'"); + } } if ($site_url && $orig_site_url != $site_url) { -- cgit v1.2.3-54-g00ecf From d4992d6b48ed458ae3ff39c5cd5ac19244ccd6a8 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 20:55:08 +0400 Subject: add support for dc:subject and slash:comments --- classes/feeditem/atom.php | 24 +++++++++++++++++++++++- classes/feeditem/rss.php | 13 ++++++++++--- classes/feedparser.php | 3 +++ include/rssfuncs.php | 8 +------- 4 files changed, 37 insertions(+), 11 deletions(-) (limited to 'classes/feedparser.php') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 7dc4ce5c1..d21933897 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -1,9 +1,11 @@ elem = $elem; + $this->xpath = $xpath; } function get_id() { @@ -63,9 +65,12 @@ class FeedItem_Atom { } - // todo function get_comments_count() { + $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); + if ($comments) { + return $comments->nodeValue; + } } function get_categories() { @@ -77,6 +82,11 @@ class FeedItem_Atom { array_push($cats, $cat->getAttribute("term")); } + $categories = $this->xpath->query("dc:subject", $this->elem); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } return $cats; } @@ -100,6 +110,18 @@ class FeedItem_Atom { } } + $enclosures = $this->xpath->query("media:content", $this->elem); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + return $encs; } diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index e5e2a8e56..7a3b0d342 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -63,9 +63,12 @@ class FeedItem_RSS { } - // todo function get_comments_count() { + $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); + if ($comments) { + return $comments->nodeValue; + } } function get_categories() { @@ -76,6 +79,12 @@ class FeedItem_RSS { array_push($cats, $cat->nodeValue); } + $categories = $this->xpath->query("dc:subject", $this->elem); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } + return $cats; } @@ -96,8 +105,6 @@ class FeedItem_RSS { $enclosures = $this->xpath->query("media:content", $this->elem); - $encs = array(); - foreach ($enclosures as $enclosure) { $enc = new FeedEnclosure(); diff --git a/classes/feedparser.php b/classes/feedparser.php index 8cb736a8b..f61c2a003 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -29,6 +29,9 @@ class FeedParser { $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/'); $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); + $xpath->registerNamespace('slash', 'http://purl.org/rss/1.0/modules/slash/'); + $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); + $this->xpath = $xpath; $root = $xpath->query("(//atom:feed|//channel|//rdf:rdf|//rdf:RDF)")->item(0); diff --git a/include/rssfuncs.php b/include/rssfuncs.php index d1175a303..f342bf7f1 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -581,13 +581,7 @@ $entry_comments = db_escape_string(mb_substr(trim($entry_comments), 0, 245)); $entry_author = db_escape_string(mb_substr(trim($entry_author), 0, 245)); - $num_comments = $item->get_comments_count(); - - if (is_array($num_comments) && is_array($num_comments[0])) { - $num_comments = (int) $num_comments[0]["data"]; - } else { - $num_comments = 0; - } + $num_comments = (int) $item->get_comments_count(); _debug("author $entry_author", $debug_enabled); _debug("num_comments: $num_comments", $debug_enabled); -- cgit v1.2.3-54-g00ecf From 8a95d630a9183bf2c7b79f9e1f015eec328d8804 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 22:05:59 +0400 Subject: fix rss content:encoded not used --- classes/feeditem/rss.php | 7 +++++++ classes/feedparser.php | 1 + 2 files changed, 8 insertions(+) (limited to 'classes/feedparser.php') diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 29c8cb6b4..5b43d0e8c 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -40,6 +40,13 @@ class FeedItem_RSS extends FeedItem_Common { if ($content) { return $content->nodeValue; } + + $content = $this->xpath->query("content:encoded", $this->elem)->item(0); + + if ($content) { + return $content->nodeValue; + } + } function get_description() { diff --git a/classes/feedparser.php b/classes/feedparser.php index f61c2a003..d60db8a28 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -31,6 +31,7 @@ class FeedParser { $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); $xpath->registerNamespace('slash', 'http://purl.org/rss/1.0/modules/slash/'); $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); + $xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/'); $this->xpath = $xpath; -- cgit v1.2.3-54-g00ecf