From efe6fbd3fa97d4dcf22cdaa72561cc91421bcfe2 Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Mon, 1 Sep 2025 14:06:10 -0500 Subject: Update ComicPress logic This updates the logic to work across a variety of additional sites. Additionally, it grabs the author's comments from comics, such as the text on Buttersafe. This does not update the list of supported comics. There are too many comic sites to enumerate all of them anyway. --- plugins/af_comics/filters/af_comics_comicpress.php | 85 ++++++++++++---------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 0afa19906..54ad515d7 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -9,59 +9,66 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || - str_contains($article["guid"], "buttersafe.com") || - str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || - str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || - str_contains($article["guid"], "happyjar.com") || - str_contains($article["guid"], "nedroid.com") || - str_contains($article["guid"], "stonetoss.com") || - str_contains($article["guid"], "csectioncomics.com")) { + str_contains($article["guid"], "buttersafe.com") || + str_contains($article["guid"], "extrafabulouscomics.com") || + str_contains($article["guid"], "danbydraws.com") || + str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || + str_contains($article["guid"], "happyjar.com") || + str_contains($article["guid"], "nedroid.com") || + str_contains($article["guid"], "stonetoss.com") || + str_contains($article["guid"], "csectioncomics.com")) { - // lol at people who block clients by user agent - // oh noes my ad revenue Q_Q + $res = UrlHelper::fetch(["url" => $article["link"]]); - $res = UrlHelper::fetch(["url" => $article["link"], - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + $doc = new DOMDocument(); - $doc = new DOMDocument(); + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $img = $xpath->query('//div[@id="comic"]')->item(0); + $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); - if ($res && $doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + if ($img || $text) { + $article["content"] = ''; - if ($basenode) { - $article["content"] = $doc->saveHTML($basenode); - return true; + if ($img) { + $this->cleanup($xpath, $img); + $article["content"] .= $doc->saveHTML($img); } - /** @var DOMElement|null $webtoon_link (buni specific) */ - $webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0); + if ($text) { + $this->cleanup($xpath, $text); + $article["content"] .= $doc->saveHTML($text); + } - if ($webtoon_link) { + return true; + } + } + } - $res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"), - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + return false; + } - if (@$doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0); + private function cleanup($xpath, $content_node) { + $toUpdates = $xpath->query('//img[@data-src]', $content_node); + $this->move_all_attributes($toUpdates, 'data-src', 'src'); - if ($basenode) { - $imgs = $xpath->query("//img[@data-url]", $basenode); + $toUpdates = $xpath->query('//img[@data-srcset]', $content_node); + $this->move_all_attributes($toUpdates, 'data-srcset', 'srcset'); - foreach ($imgs as $img) { - $img->setAttribute("src", $img->getAttribute("data-url")); - } + $toUpdates = $xpath->query('//img[@data-sizes]', $content_node); + $this->move_all_attributes($toUpdates, 'data-sizes', 'sizes'); - $article["content"] = $doc->saveHTML($basenode); - return true; - } - } - } - } + $toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node); + foreach ($toRemoves as $toRemove) { + $toRemove->parentNode->removeChild($toRemove); } + } - return false; + private function move_all_attributes($toUpdates, $srcName, $dstName) { + foreach ($toUpdates as $toUpdate) { + $attributeValue = $toUpdate->getAttribute($srcName); + $toUpdate->setAttribute($dstName, $attributeValue); + $toUpdate->removeAttribute($srcName); + } } } -- cgit v1.2.3-54-g00ecf