summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org>2025-09-01 14:06:10 -0500
committervjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org>2025-09-01 14:06:10 -0500
commitefe6fbd3fa97d4dcf22cdaa72561cc91421bcfe2 (patch)
tree340b8454c1aa4bfa38384819e2f554b9a2a60c45
parent98dbf4973380a525cbdeb542786838de10ad6d08 (diff)
Update ComicPress logic
This updates the logic to work across a variety of additional sites. Additionally, it grabs the author's comments from comics, such as the text on Buttersafe. This does not update the list of supported comics. There are too many comic sites to enumerate all of them anyway.
-rwxr-xr-xplugins/af_comics/filters/af_comics_comicpress.php85
1 files changed, 46 insertions, 39 deletions
diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php
index 0afa19906..54ad515d7 100755
--- a/plugins/af_comics/filters/af_comics_comicpress.php
+++ b/plugins/af_comics/filters/af_comics_comicpress.php
@@ -9,59 +9,66 @@ class Af_Comics_ComicPress extends Af_ComicFilter {
function process(&$article) {
if (str_contains($article["guid"], "bunicomic.com") ||
- str_contains($article["guid"], "buttersafe.com") ||
- str_contains($article["guid"], "extrafabulouscomics.com") ||
- str_contains($article["guid"], "danbydraws.com") ||
- str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") ||
- str_contains($article["guid"], "happyjar.com") ||
- str_contains($article["guid"], "nedroid.com") ||
- str_contains($article["guid"], "stonetoss.com") ||
- str_contains($article["guid"], "csectioncomics.com")) {
+ str_contains($article["guid"], "buttersafe.com") ||
+ str_contains($article["guid"], "extrafabulouscomics.com") ||
+ str_contains($article["guid"], "danbydraws.com") ||
+ str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") ||
+ str_contains($article["guid"], "happyjar.com") ||
+ str_contains($article["guid"], "nedroid.com") ||
+ str_contains($article["guid"], "stonetoss.com") ||
+ str_contains($article["guid"], "csectioncomics.com")) {
- // lol at people who block clients by user agent
- // oh noes my ad revenue Q_Q
+ $res = UrlHelper::fetch(["url" => $article["link"]]);
- $res = UrlHelper::fetch(["url" => $article["link"],
- "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
+ $doc = new DOMDocument();
- $doc = new DOMDocument();
+ if ($res && $doc->loadHTML($res)) {
+ $xpath = new DOMXPath($doc);
+ $img = $xpath->query('//div[@id="comic"]')->item(0);
+ $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0);
- if ($res && $doc->loadHTML($res)) {
- $xpath = new DOMXPath($doc);
- $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0);
+ if ($img || $text) {
+ $article["content"] = '';
- if ($basenode) {
- $article["content"] = $doc->saveHTML($basenode);
- return true;
+ if ($img) {
+ $this->cleanup($xpath, $img);
+ $article["content"] .= $doc->saveHTML($img);
}
- /** @var DOMElement|null $webtoon_link (buni specific) */
- $webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0);
+ if ($text) {
+ $this->cleanup($xpath, $text);
+ $article["content"] .= $doc->saveHTML($text);
+ }
- if ($webtoon_link) {
+ return true;
+ }
+ }
+ }
- $res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"),
- "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
+ return false;
+ }
- if (@$doc->loadHTML($res)) {
- $xpath = new DOMXPath($doc);
- $basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0);
+ private function cleanup($xpath, $content_node) {
+ $toUpdates = $xpath->query('//img[@data-src]', $content_node);
+ $this->move_all_attributes($toUpdates, 'data-src', 'src');
- if ($basenode) {
- $imgs = $xpath->query("//img[@data-url]", $basenode);
+ $toUpdates = $xpath->query('//img[@data-srcset]', $content_node);
+ $this->move_all_attributes($toUpdates, 'data-srcset', 'srcset');
- foreach ($imgs as $img) {
- $img->setAttribute("src", $img->getAttribute("data-url"));
- }
+ $toUpdates = $xpath->query('//img[@data-sizes]', $content_node);
+ $this->move_all_attributes($toUpdates, 'data-sizes', 'sizes');
- $article["content"] = $doc->saveHTML($basenode);
- return true;
- }
- }
- }
- }
+ $toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node);
+ foreach ($toRemoves as $toRemove) {
+ $toRemove->parentNode->removeChild($toRemove);
}
+ }
- return false;
+ private function move_all_attributes($toUpdates, $srcName, $dstName) {
+ foreach ($toUpdates as $toUpdate) {
+ $attributeValue = $toUpdate->getAttribute($srcName);
+ $toUpdate->setAttribute($dstName, $attributeValue);
+ $toUpdate->removeAttribute($srcName);
+ }
}
}