From 2e50f96901b828aefbe0adc2a43bb023d2653c71 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Sat, 17 May 2025 08:02:42 +0300 Subject: Revert "drop gocomics and other inactive af_comics filters" This reverts commit 5f064b4477e74f5de24bdb22a8cb499f2ab03805. --- plugins/af_comics/filters/af_comics_dilbert.php | 71 ++++++++++++++ plugins/af_comics/filters/af_comics_explosm.php | 28 ++++++ plugins/af_comics/filters/af_comics_gocomics.php | 98 +++++++++++++++++++ .../filters/af_comics_gocomics_farside.php | 108 +++++++++++++++++++++ plugins/af_comics/filters/af_comics_pvp.php | 31 ++++++ 5 files changed, 336 insertions(+) create mode 100644 plugins/af_comics/filters/af_comics_dilbert.php create mode 100644 plugins/af_comics/filters/af_comics_explosm.php create mode 100644 plugins/af_comics/filters/af_comics_gocomics.php create mode 100644 plugins/af_comics/filters/af_comics_gocomics_farside.php create mode 100644 plugins/af_comics/filters/af_comics_pvp.php diff --git a/plugins/af_comics/filters/af_comics_dilbert.php b/plugins/af_comics/filters/af_comics_dilbert.php new file mode 100644 index 000000000..35123b47d --- /dev/null +++ b/plugins/af_comics/filters/af_comics_dilbert.php @@ -0,0 +1,71 @@ + $article['link'], + 'useragent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0', + ]); + + if (!$res && UrlHelper::$fetch_last_error_content) + $res = UrlHelper::$fetch_last_error_content; + + $doc = new DOMDocument(); + + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + + /** @var DOMElement|null $basenode (image container) */ + $basenode = $xpath->query('(//div[@class="img-comic-container"]/a[@class="img-comic-link"])')->item(0); + + // Get the comic title + $comic_title = $xpath->query('(//span[@class="comic-title-name"])')->item(0)->textContent; + + // Get tags from the article + $matches = $xpath->query('(//p[contains(@class, "comic-tags")][1]//a)'); + $tags = array(); + + foreach ($matches as $tag) { + // Only strings starting with a number sign are considered tags + if (str_starts_with($tag->textContent, '#')) { + $tags[] = mb_strtolower(substr($tag->textContent, 1), 'utf-8'); + } + } + + // Get the current comics transcript and set it + // as the title so it will be visible on mousover + $transcript = $xpath->query('(//div[starts-with(@id, "js-toggle-transcript-")]//p)')->item(0); + if ($transcript) { + $basenode->setAttribute("title", $transcript->textContent); + } + + if ($basenode) { + $article["content"] = $doc->saveHTML($basenode); + } + + // Add comic title to article type if not empty (mostly Sunday strips) + if ($comic_title) { + $article["title"] = $article["title"] . " - " . $comic_title; + } + + if (!empty($tags)) { + // Ignore existing tags and just replace them all + $article["tags"] = array_unique($tags); + } + + } + + return true; + } + + return false; + } +} +?> diff --git a/plugins/af_comics/filters/af_comics_explosm.php b/plugins/af_comics/filters/af_comics_explosm.php new file mode 100644 index 000000000..bb237f796 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_explosm.php @@ -0,0 +1,28 @@ +loadHTML(UrlHelper::fetch(['url' => $article['link']]))) { + $xpath = new DOMXPath($doc); + $basenode = $xpath->query('//div[contains(@class, "MainComic__ComicImage")]//img')->item(0); + + if ($basenode) { + $article["content"] = $doc->saveHTML($basenode); + } + } + + return true; + } + + return false; + } +} diff --git a/plugins/af_comics/filters/af_comics_gocomics.php b/plugins/af_comics/filters/af_comics_gocomics.php new file mode 100644 index 000000000..3ae169c02 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_gocomics.php @@ -0,0 +1,98 @@ +'; // Get _is_html() to return false. + else + return false; + } + + public function on_basic_info($url) { + if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url, $matches)) + return ['title' => ucfirst($matches[1]), 'site_url' => $matches[0]]; + else + return false; + } + + public function on_fetch($url) { + if (preg_match('#^https?://(?:feeds\.feedburner\.com/uclick|www\.gocomics\.com)/([-a-z0-9]+)$#i', $url, $comic)) { + $site_url = 'https://www.gocomics.com/' . $comic[1]; + + // no date suffix here since /whatever/$TODAY redirects to /whatever + $article_link = $site_url; + + $body = UrlHelper::fetch(array('url' => $article_link, 'type' => 'text/html', 'followlocation' => false)); + + $feed_title = htmlspecialchars($comic[1]); + $site_url = htmlspecialchars($site_url); + + // add the date suffix here to uniquely identify the "article" and provide the permalink + $article_link = htmlspecialchars($article_link) . date('/Y/m/d'); + + $tpl = new Templator(); + + $tpl->readTemplateFromFile('templates/generated_feed.txt'); + + $tpl->setVariable('FEED_TITLE', $feed_title, true); + $tpl->setVariable('VERSION', Config::get_version(), true); + $tpl->setVariable('FEED_URL', htmlspecialchars($url), true); + $tpl->setVariable('SELF_URL', $site_url, true); + + if ($body) { + $doc = new DOMDocument(); + + if (@$doc->loadHTML($body)) { + $xpath = new DOMXPath($doc); + + /** @var DOMElement|null $node */ + $node = $xpath->query('//button[@aria-label="Expand comic"]/img')->item(0); + + if ($node) { + $title = $xpath->query('//h1')->item(0); + + if ($title) { + $title = clean(trim($title->nodeValue)); + } else { + $title = date('l, F d, Y'); + } + + foreach (['srcset', 'sizes', 'data-srcset', 'width'] as $attr ) { + $node->removeAttribute($attr); + } + + $tpl->setVariable('ARTICLE_ID', $article_link, true); + $tpl->setVariable('ARTICLE_LINK', $article_link, true); + $tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true); + $tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true); + $tpl->setVariable('ARTICLE_EXCERPT', '', true); + $tpl->setVariable('ARTICLE_CONTENT', $doc->saveHTML($node), true); + + $tpl->setVariable('ARTICLE_AUTHOR', '', true); + $tpl->setVariable('ARTICLE_SOURCE_LINK', $site_url, true); + $tpl->setVariable('ARTICLE_SOURCE_TITLE', $feed_title, true); + + $tpl->addBlock('entry'); + } + } + } + + $tpl->addBlock('feed'); + + if ($tpl->generateOutputToString($tmp_data)) + return $tmp_data; + + } + + return false; + } + +} diff --git a/plugins/af_comics/filters/af_comics_gocomics_farside.php b/plugins/af_comics/filters/af_comics_gocomics_farside.php new file mode 100644 index 000000000..e2951eb36 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_gocomics_farside.php @@ -0,0 +1,108 @@ +'; // Get _is_html() to return false. + else + return false; + } + + public function on_basic_info($url) { + if (preg_match("#^https?://www.thefarside.com/#", $url)) + return ['title' => "The Far Side", 'site_url' => 'https://www.thefarside.com']; + else + return false; + } + + public function on_fetch($url) { + if (preg_match("#^https?://www\.thefarside\.com#", $url)) { + + $article_link = htmlspecialchars("https://www.thefarside.com" . date('/Y/m/d')); + + $tpl = new Templator(); + + $tpl->readTemplateFromFile('templates/generated_feed.txt'); + + $tpl->setVariable('FEED_TITLE', "The Far Side", true); + $tpl->setVariable('VERSION', Config::get_version(), true); + $tpl->setVariable('FEED_URL', htmlspecialchars($url), true); + $tpl->setVariable('SELF_URL', htmlspecialchars($url), true); + + $body = UrlHelper::fetch(['url' => $article_link, 'type' => 'text/html', 'followlocation' => false]); + + if ($body) { + $doc = new DOMDocument(); + + if (@$doc->loadHTML($body)) { + $xpath = new DOMXPath($doc); + + $content_node = $xpath->query('//*[contains(@class,"js-daily-dose")]')->item(0); + + if ($content_node) { + $imgs = $xpath->query('//img[@data-src]', $content_node); + + $cache = DiskCache::instance("images"); + + foreach ($imgs as $img) { + $image_url = $img->getAttribute('data-src'); + $local_filename = sha1($image_url); + + if ($image_url) { + $img->setAttribute('src', $image_url); + + // try to cache image locally because they just 401 us otherwise + if (!$cache->exists($local_filename)) { + Debug::log("[Af_Comics_Gocomics_FarSide] caching: $image_url", Debug::LOG_VERBOSE); + $res = $cache->download($image_url, sha1($image_url), ["http_referrer" => $image_url]); + Debug::log("[Af_Comics_Gocomics_FarSide] cache result: $res", Debug::LOG_VERBOSE); + } + } + } + + $junk_elems = $xpath->query("//*[@data-shareable-popover]"); + + foreach ($junk_elems as $junk) + $junk->parentNode->removeChild($junk); + + $title = $xpath->query('//h3')->item(0); + + if ($title) { + $title = clean(trim($title->nodeValue)); + } else { + $title = date('l, F d, Y'); + } + + $tpl->setVariable('ARTICLE_ID', htmlspecialchars($article_link), true); + $tpl->setVariable('ARTICLE_LINK', htmlspecialchars($article_link), true); + $tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true); + $tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true); + $tpl->setVariable('ARTICLE_EXCERPT', '', true); + $tpl->setVariable('ARTICLE_CONTENT', "

" . $doc->saveHTML($content_node) . "

", true); + + $tpl->setVariable('ARTICLE_AUTHOR', '', true); + $tpl->setVariable('ARTICLE_SOURCE_LINK', htmlspecialchars($article_link), true); + $tpl->setVariable('ARTICLE_SOURCE_TITLE', "The Far Side", true); + + $tpl->addBlock('entry'); + } + } + } + + $tpl->addBlock('feed'); + + if ($tpl->generateOutputToString($tmp_data)) + return $tmp_data; + } + + return false; + } +} diff --git a/plugins/af_comics/filters/af_comics_pvp.php b/plugins/af_comics/filters/af_comics_pvp.php new file mode 100644 index 000000000..907eee1a8 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_pvp.php @@ -0,0 +1,31 @@ + $article['link'], + 'useragent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + ]); + + $doc = new DOMDocument(); + + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $basenode = $xpath->query('//section[@class="comic-art"]')->item(0); + + if ($basenode) { + $article["content"] = $doc->saveHTML($basenode); + } + } + + return true; + } + + return false; + } +} -- cgit v1.2.3-54-g00ecf