diff options
| author | vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> | 2025-09-01 14:06:10 -0500 |
|---|---|---|
| committer | vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> | 2025-09-01 14:06:10 -0500 |
| commit | efe6fbd3fa97d4dcf22cdaa72561cc91421bcfe2 (patch) | |
| tree | 340b8454c1aa4bfa38384819e2f554b9a2a60c45 | |
| parent | 98dbf4973380a525cbdeb542786838de10ad6d08 (diff) | |
Update ComicPress logic
This updates the logic to work across a variety of additional sites.
Additionally, it grabs the author's comments from comics, such as the text on Buttersafe.
This does not update the list of supported comics.
There are too many comic sites to enumerate all of them anyway.
| -rwxr-xr-x | plugins/af_comics/filters/af_comics_comicpress.php | 85 |
1 files changed, 46 insertions, 39 deletions
diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 0afa19906..54ad515d7 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -9,59 +9,66 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || - str_contains($article["guid"], "buttersafe.com") || - str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || - str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || - str_contains($article["guid"], "happyjar.com") || - str_contains($article["guid"], "nedroid.com") || - str_contains($article["guid"], "stonetoss.com") || - str_contains($article["guid"], "csectioncomics.com")) { + str_contains($article["guid"], "buttersafe.com") || + str_contains($article["guid"], "extrafabulouscomics.com") || + str_contains($article["guid"], "danbydraws.com") || + str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || + str_contains($article["guid"], "happyjar.com") || + str_contains($article["guid"], "nedroid.com") || + str_contains($article["guid"], "stonetoss.com") || + str_contains($article["guid"], "csectioncomics.com")) { - // lol at people who block clients by user agent - // oh noes my ad revenue Q_Q + $res = UrlHelper::fetch(["url" => $article["link"]]); - $res = UrlHelper::fetch(["url" => $article["link"], - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + $doc = new DOMDocument(); - $doc = new DOMDocument(); + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $img = $xpath->query('//div[@id="comic"]')->item(0); + $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); - if ($res && $doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + if ($img || $text) { + $article["content"] = ''; - if ($basenode) { - $article["content"] = $doc->saveHTML($basenode); - return true; + if ($img) { + $this->cleanup($xpath, $img); + $article["content"] .= $doc->saveHTML($img); } - /** @var DOMElement|null $webtoon_link (buni specific) */ - $webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0); + if ($text) { + $this->cleanup($xpath, $text); + $article["content"] .= $doc->saveHTML($text); + } - if ($webtoon_link) { + return true; + } + } + } - $res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"), - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + return false; + } - if (@$doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0); + private function cleanup($xpath, $content_node) { + $toUpdates = $xpath->query('//img[@data-src]', $content_node); + $this->move_all_attributes($toUpdates, 'data-src', 'src'); - if ($basenode) { - $imgs = $xpath->query("//img[@data-url]", $basenode); + $toUpdates = $xpath->query('//img[@data-srcset]', $content_node); + $this->move_all_attributes($toUpdates, 'data-srcset', 'srcset'); - foreach ($imgs as $img) { - $img->setAttribute("src", $img->getAttribute("data-url")); - } + $toUpdates = $xpath->query('//img[@data-sizes]', $content_node); + $this->move_all_attributes($toUpdates, 'data-sizes', 'sizes'); - $article["content"] = $doc->saveHTML($basenode); - return true; - } - } - } - } + $toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node); + foreach ($toRemoves as $toRemove) { + $toRemove->parentNode->removeChild($toRemove); } + } - return false; + private function move_all_attributes($toUpdates, $srcName, $dstName) { + foreach ($toUpdates as $toUpdate) { + $attributeValue = $toUpdate->getAttribute($srcName); + $toUpdate->setAttribute($dstName, $attributeValue); + $toUpdate->removeAttribute($srcName); + } } } |