diff options
| author | Andrew Dolgov <fox@fakecake.org> | 2025-09-02 21:24:44 +0300 |
|---|---|---|
| committer | Andrew Dolgov <fox@fakecake.org> | 2025-09-02 21:24:44 +0300 |
| commit | be3ee920b145ecad8fb468c00c25e53ced68da5c (patch) | |
| tree | ad439a4b3c2781c3b064b26fa9cd36d9e2292b44 | |
| parent | 98dbf4973380a525cbdeb542786838de10ad6d08 (diff) | |
| parent | c914d0710f052ef111243e3b7d3dffc89fef69c5 (diff) | |
Merge branch 'update-comicpress' into 'master'
Update ComicPress logic
See merge request tt-rss/tt-rss!183
| -rwxr-xr-x | plugins/af_comics/filters/af_comics_comicpress.php | 73 | ||||
| -rw-r--r-- | plugins/af_comics/filters/af_comics_danbydraws.php | 27 |
2 files changed, 68 insertions, 32 deletions
diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 0afa19906..f62d4889c 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -4,64 +4,73 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function supported() { return array("Buni", "Buttersafe", "Happy Jar", "CSection", "Extra Fabulous Comics", "Nedroid", "Stonetoss", - "Danby Draws", "Powerup Comics"); + "Powerup Comics"); } function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || str_contains($article["guid"], "buttersafe.com") || str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || str_contains($article["guid"], "happyjar.com") || str_contains($article["guid"], "nedroid.com") || str_contains($article["guid"], "stonetoss.com") || str_contains($article["guid"], "csectioncomics.com")) { - // lol at people who block clients by user agent - // oh noes my ad revenue Q_Q - - $res = UrlHelper::fetch(["url" => $article["link"], - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + $res = UrlHelper::fetch(["url" => $article["link"]]); $doc = new DOMDocument(); if ($res && $doc->loadHTML($res)) { $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + $img = $xpath->query('//div[@id="comic"]')->item(0); + $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); - if ($basenode) { - $article["content"] = $doc->saveHTML($basenode); - return true; - } + if ($img || $text) { + $article["content"] = ''; - /** @var DOMElement|null $webtoon_link (buni specific) */ - $webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0); + if ($img) { + $this->cleanup($xpath, $img); + $article["content"] .= $doc->saveHTML($img); + } - if ($webtoon_link) { + if ($text) { + $this->cleanup($xpath, $text); + $article["content"] .= $doc->saveHTML($text); + } + + return true; + } + } + } - $res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"), - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + return false; + } - if (@$doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0); + private function cleanup(DOMXPath $xpath, DOMNode $content_node): void { + $toUpdates = $xpath->query('//img[@data-src]', $content_node); + $this->move_all_attributes($toUpdates, 'data-src', 'src'); - if ($basenode) { - $imgs = $xpath->query("//img[@data-url]", $basenode); + $toUpdates = $xpath->query('//img[@data-srcset]', $content_node); + $this->move_all_attributes($toUpdates, 'data-srcset', 'srcset'); - foreach ($imgs as $img) { - $img->setAttribute("src", $img->getAttribute("data-url")); - } + $toUpdates = $xpath->query('//img[@data-sizes]', $content_node); + $this->move_all_attributes($toUpdates, 'data-sizes', 'sizes'); - $article["content"] = $doc->saveHTML($basenode); - return true; - } - } - } - } + $toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node); + foreach ($toRemoves as $toRemove) { + $toRemove->parentNode->removeChild($toRemove); } + } - return false; + /** + * @param DOMNodeList<DOMNode> $toUpdates + */ + private function move_all_attributes(DOMNodeList $toUpdates, string $srcName, string $dstName): void { + foreach ($toUpdates as $toUpdate) { + $attributeValue = $toUpdate->getAttribute($srcName); + $toUpdate->setAttribute($dstName, $attributeValue); + $toUpdate->removeAttribute($srcName); + } } } diff --git a/plugins/af_comics/filters/af_comics_danbydraws.php b/plugins/af_comics/filters/af_comics_danbydraws.php new file mode 100644 index 000000000..3ecc93180 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_danbydraws.php @@ -0,0 +1,27 @@ +<?php +class Af_Comics_DanbyDraws extends Af_ComicFilter { + + function supported() { + return array("Danby Draws"); + } + + function process(&$article) { + if (str_contains($article["link"], "danbydraws.com")) { + $res = UrlHelper::fetch(["url" => $article["link"]]); + + $doc = new DOMDocument(); + + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + + if ($basenode) { + $article["content"] = $doc->saveHTML($basenode); + return true; + } + } + } + + return false; + } +} |