diff options
Diffstat (limited to 'classes')
| -rw-r--r-- | classes/Pref_Filters.php | 119 | ||||
| -rw-r--r-- | classes/Sanitizer.php | 89 |
2 files changed, 162 insertions, 46 deletions
diff --git a/classes/Pref_Filters.php b/classes/Pref_Filters.php index cfad881a4..a6063d898 100644 --- a/classes/Pref_Filters.php +++ b/classes/Pref_Filters.php @@ -70,6 +70,7 @@ class Pref_Filters extends Handler_Protected { $offset = (int) clean($_REQUEST["offset"]); $limit = (int) clean($_REQUEST["limit"]); + // catchall fake filter which includes all rules $filter = [ 'enabled' => true, 'match_any_rule' => checkbox_to_sql_bool($_REQUEST['match_any_rule'] ?? false), @@ -94,7 +95,6 @@ class Pref_Filters extends Handler_Protected { if (is_array($rule)) { $rule['type'] = $filter_types[$rule['filter_type']]; - unset($rule['filter_type']); array_push($filter['rules'], $rule); $scope_inner_qparts = []; @@ -103,7 +103,10 @@ class Pref_Filters extends Handler_Protected { foreach ($rule["feed_id"] as $feed_id) { if (str_starts_with("$feed_id", "CAT:")) { $cat_id = (int) substr("$feed_id", 4); - array_push($scope_inner_qparts, "cat_id = " . $cat_id); + if ($cat_id > 0) + array_push($scope_inner_qparts, "cat_id = " . $cat_id); + else + array_push($scope_inner_qparts, "cat_id IS NULL"); } else if (is_numeric($feed_id) && $feed_id > 0) { array_push($scope_inner_qparts, "feed_id = " . (int)$feed_id); } @@ -137,25 +140,101 @@ class Pref_Filters extends Handler_Protected { ]; foreach ($entries as $entry) { - $rc = RSSUtils::get_article_filters(array($filter), $entry['title'], $entry['content'], $entry['link'], - $entry['author'], explode(",", $entry['tag_cache'])); + + // temporary filter which will be used to compare against returned article + $feed_filter = $filter; + $feed_filter['rules'] = []; + + // only add rules which match result from specific feed or category ID or rules matching all feeds + // @phpstan-ignore foreach.emptyArray + foreach ($filter['rules'] as $rule) { + foreach ($rule['feed_id'] as $rule_feed) { + if (($rule_feed === 'CAT:0' && $entry['cat_id'] === null) || // rule matches Uncategorized + $rule_feed === 'CAT:' . $entry['cat_id'] || // rule matches category + (int)$rule_feed === $entry['feed_id'] || // rule matches feed + $rule_feed === '0') { // rule matches all feeds + + $feed_filter['rules'][] = $rule; + } + } + } + + $matched_rules = []; + + $entry_tags = explode(",", $entry['tag_cache']); + + $rc = RSSUtils::get_article_filters([$feed_filter], $entry['title'], $entry['content'], $entry['link'], + $entry['author'], $entry_tags, $matched_rules); if (count($rc) > 0) { - $entry["content_preview"] = truncate_string(strip_tags($entry["content"]), 200, '…'); + $content_preview = ""; + + $matches = []; + $rules = []; + + $entry_title = $entry["title"]; + + // technically only one rule may match *here* because we're testing a single (fake) filter defined above + // let's keep this forward-compatible in case we'll want to return multiple rules for whatever reason + foreach ($matched_rules as $rule) { + $can_highlight_content = false; + $can_highlight_title = false; + + $rule_regexp_match = mb_substr(strip_tags($rule['regexp_matches'][0]), 0, 200); - $excerpt_length = 100; + $matches[] = $rule_regexp_match; - PluginHost::getInstance()->chain_hooks_callback(PluginHost::HOOK_QUERY_HEADLINES, - function ($result) use (&$entry) { - $entry = $result; - }, - $entry, $excerpt_length); + $rules[] = self::_get_rule_name($rule, ''); + + if (in_array($rule['type'], ['content', 'both'])) { + // also stripping [\r\n\t] to match what's done for content in RSSUtils#get_article_filters() + $entry_content_text = strip_tags(preg_replace("/[\r\n\t]/", "", $entry["content"])); + + $match_index = mb_strpos($entry_content_text, $rule_regexp_match); + $content_preview = truncate_string(mb_substr($entry_content_text, $match_index), 200); + + if ($match_index > 0) + $content_preview = '…' . $content_preview; + + } else if ($rule['type'] == 'link') { + $content_preview = $entry['link']; + } else if ($rule['type'] == 'author') { + $content_preview = $entry['author']; + } else if ($rule['type'] == 'tag') { + $content_preview = '<i class="material-icons">label_outline</i> ' . implode(', ', $entry_tags); + } else { + $content_preview = "—"; + } + + switch ($rule['type']) { + case "both": + $can_highlight_title = true; + $can_highlight_content = true; + break; + case "title": + $can_highlight_title = true; + break; + case "content": + case "link": + case "author": + case "tag": + $can_highlight_content = true; + break; + } + + if ($can_highlight_content) + $content_preview = Sanitizer::highlight_words_str($content_preview, $matches); + + if ($can_highlight_title) + $entry_title = Sanitizer::highlight_words_str($entry_title, $matches); + } $rv['items'][] = [ - 'title' => $entry['title'], + 'title' => $entry_title, 'feed_title' => $entry['feed_title'], 'date' => mb_substr($entry['date_entered'], 0, 16), - 'content_preview' => $entry['content_preview'], + 'content_preview' => $content_preview, + 'rules' => $rules ]; } } @@ -369,7 +448,7 @@ class Pref_Filters extends Handler_Protected { /** * @param array<string, mixed>|null $rule */ - private function _get_rule_name(?array $rule = null): string { + private function _get_rule_name(?array $rule = null, string $format = 'html'): string { if (!$rule) $rule = json_decode(clean($_REQUEST["rule"]), true); $feeds = $rule["feed_id"]; @@ -404,10 +483,14 @@ class Pref_Filters extends Handler_Protected { $inverse = isset($rule["inverse"]) ? "inverse" : ""; - return "<span class='filterRule $inverse'>" . - T_sprintf("%s on %s in %s %s", htmlspecialchars($rule["reg_exp"]), - "<span class='field'>$filter_type</span>", "<span class='feed'>$feed</span>", isset($rule["inverse"]) ? __("(inverse)") : "") . "</span>"; - } + if ($format === 'html') + return "<span class='filterRule $inverse'>" . + T_sprintf("%s on %s in %s %s", htmlspecialchars($rule["reg_exp"]), + "<span class='field'>$filter_type</span>", "<span class='feed'>$feed</span>", isset($rule["inverse"]) ? __("(inverse)") : "") . "</span>"; + else + return T_sprintf("%s on %s in %s %s", $rule["reg_exp"], + $filter_type, $feed, isset($rule["inverse"]) ? __("(inverse)") : ""); + } function printRuleName(): void { print $this->_get_rule_name(json_decode(clean($_REQUEST["rule"]), true)); diff --git a/classes/Sanitizer.php b/classes/Sanitizer.php index 94d6fe621..2ae07d8d3 100644 --- a/classes/Sanitizer.php +++ b/classes/Sanitizer.php @@ -59,6 +59,65 @@ class Sanitizer { return parse_url(Config::get_self_url(), PHP_URL_SCHEME) == 'https'; } + /** @param array<string> $words */ + public static function highlight_words_str(string $str, array $words) : string { + $doc = new DOMDocument(); + + if ($doc->loadHTML('<?xml encoding="UTF-8"><span>' . $str . '</span>')) { + $xpath = new DOMXPath($doc); + + if (self::highlight_words($doc, $xpath, $words)) { + $res = $doc->saveHTML(); + + /* strip everything outside of <body>...</body> */ + $res_frag = array(); + + if (preg_match('/<body>(.*)<\/body>/is', $res, $res_frag)) { + return $res_frag[1]; + } else { + return $res; + } + } + } + + return $str; + } + + /** @param array<string> $words */ + public static function highlight_words(DOMDocument &$doc, DOMXPath $xpath, array $words) : bool { + $rv = false; + + foreach ($words as $word) { + + // http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph + $elements = $xpath->query("//*/text()"); + + foreach ($elements as $child) { + + $fragment = $doc->createDocumentFragment(); + $text = $child->textContent; + + while (($pos = mb_stripos($text, $word)) !== false) { + $fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos))); + $word = mb_substr($text, (int)$pos, mb_strlen($word)); + $highlight = $doc->createElement('span'); + $highlight->appendChild(new DOMText($word)); + $highlight->setAttribute('class', 'highlight'); + $fragment->appendChild($highlight); + $text = mb_substr($text, $pos + mb_strlen($word)); + } + + if (!empty($text)) $fragment->appendChild(new DOMText($text)); + + $child->parentNode->replaceChild($fragment, $child); + + $rv = true; + } + } + + return $rv; + } + /** * @param array<int, string>|null $highlight_words Words to highlight in the HTML output. * @@ -197,34 +256,8 @@ class Sanitizer { $div->appendChild($entry); } - if (is_array($highlight_words)) { - foreach ($highlight_words as $word) { - - // http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph - - $elements = $xpath->query("//*/text()"); - - foreach ($elements as $child) { - - $fragment = $doc->createDocumentFragment(); - $text = $child->textContent; - - while (($pos = mb_stripos($text, $word)) !== false) { - $fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos))); - $word = mb_substr($text, (int)$pos, mb_strlen($word)); - $highlight = $doc->createElement('span'); - $highlight->appendChild(new DOMText($word)); - $highlight->setAttribute('class', 'highlight'); - $fragment->appendChild($highlight); - $text = mb_substr($text, $pos + mb_strlen($word)); - } - - if (!empty($text)) $fragment->appendChild(new DOMText($text)); - - $child->parentNode->replaceChild($fragment, $child); - } - } - } + if (is_array($highlight_words)) + self::highlight_words($doc, $xpath, $highlight_words); $res = $doc->saveHTML(); |