summaryrefslogtreecommitdiff
path: root/classes
diff options
context:
space:
mode:
Diffstat (limited to 'classes')
-rw-r--r--classes/Pref_Filters.php119
-rw-r--r--classes/Sanitizer.php89
2 files changed, 162 insertions, 46 deletions
diff --git a/classes/Pref_Filters.php b/classes/Pref_Filters.php
index cfad881a4..a6063d898 100644
--- a/classes/Pref_Filters.php
+++ b/classes/Pref_Filters.php
@@ -70,6 +70,7 @@ class Pref_Filters extends Handler_Protected {
$offset = (int) clean($_REQUEST["offset"]);
$limit = (int) clean($_REQUEST["limit"]);
+ // catchall fake filter which includes all rules
$filter = [
'enabled' => true,
'match_any_rule' => checkbox_to_sql_bool($_REQUEST['match_any_rule'] ?? false),
@@ -94,7 +95,6 @@ class Pref_Filters extends Handler_Protected {
if (is_array($rule)) {
$rule['type'] = $filter_types[$rule['filter_type']];
- unset($rule['filter_type']);
array_push($filter['rules'], $rule);
$scope_inner_qparts = [];
@@ -103,7 +103,10 @@ class Pref_Filters extends Handler_Protected {
foreach ($rule["feed_id"] as $feed_id) {
if (str_starts_with("$feed_id", "CAT:")) {
$cat_id = (int) substr("$feed_id", 4);
- array_push($scope_inner_qparts, "cat_id = " . $cat_id);
+ if ($cat_id > 0)
+ array_push($scope_inner_qparts, "cat_id = " . $cat_id);
+ else
+ array_push($scope_inner_qparts, "cat_id IS NULL");
} else if (is_numeric($feed_id) && $feed_id > 0) {
array_push($scope_inner_qparts, "feed_id = " . (int)$feed_id);
}
@@ -137,25 +140,101 @@ class Pref_Filters extends Handler_Protected {
];
foreach ($entries as $entry) {
- $rc = RSSUtils::get_article_filters(array($filter), $entry['title'], $entry['content'], $entry['link'],
- $entry['author'], explode(",", $entry['tag_cache']));
+
+ // temporary filter which will be used to compare against returned article
+ $feed_filter = $filter;
+ $feed_filter['rules'] = [];
+
+ // only add rules which match result from specific feed or category ID or rules matching all feeds
+ // @phpstan-ignore foreach.emptyArray
+ foreach ($filter['rules'] as $rule) {
+ foreach ($rule['feed_id'] as $rule_feed) {
+ if (($rule_feed === 'CAT:0' && $entry['cat_id'] === null) || // rule matches Uncategorized
+ $rule_feed === 'CAT:' . $entry['cat_id'] || // rule matches category
+ (int)$rule_feed === $entry['feed_id'] || // rule matches feed
+ $rule_feed === '0') { // rule matches all feeds
+
+ $feed_filter['rules'][] = $rule;
+ }
+ }
+ }
+
+ $matched_rules = [];
+
+ $entry_tags = explode(",", $entry['tag_cache']);
+
+ $rc = RSSUtils::get_article_filters([$feed_filter], $entry['title'], $entry['content'], $entry['link'],
+ $entry['author'], $entry_tags, $matched_rules);
if (count($rc) > 0) {
- $entry["content_preview"] = truncate_string(strip_tags($entry["content"]), 200, '…');
+ $content_preview = "";
+
+ $matches = [];
+ $rules = [];
+
+ $entry_title = $entry["title"];
+
+ // technically only one rule may match *here* because we're testing a single (fake) filter defined above
+ // let's keep this forward-compatible in case we'll want to return multiple rules for whatever reason
+ foreach ($matched_rules as $rule) {
+ $can_highlight_content = false;
+ $can_highlight_title = false;
+
+ $rule_regexp_match = mb_substr(strip_tags($rule['regexp_matches'][0]), 0, 200);
- $excerpt_length = 100;
+ $matches[] = $rule_regexp_match;
- PluginHost::getInstance()->chain_hooks_callback(PluginHost::HOOK_QUERY_HEADLINES,
- function ($result) use (&$entry) {
- $entry = $result;
- },
- $entry, $excerpt_length);
+ $rules[] = self::_get_rule_name($rule, '');
+
+ if (in_array($rule['type'], ['content', 'both'])) {
+ // also stripping [\r\n\t] to match what's done for content in RSSUtils#get_article_filters()
+ $entry_content_text = strip_tags(preg_replace("/[\r\n\t]/", "", $entry["content"]));
+
+ $match_index = mb_strpos($entry_content_text, $rule_regexp_match);
+ $content_preview = truncate_string(mb_substr($entry_content_text, $match_index), 200);
+
+ if ($match_index > 0)
+ $content_preview = '…' . $content_preview;
+
+ } else if ($rule['type'] == 'link') {
+ $content_preview = $entry['link'];
+ } else if ($rule['type'] == 'author') {
+ $content_preview = $entry['author'];
+ } else if ($rule['type'] == 'tag') {
+ $content_preview = '<i class="material-icons">label_outline</i> ' . implode(', ', $entry_tags);
+ } else {
+ $content_preview = "&mdash;";
+ }
+
+ switch ($rule['type']) {
+ case "both":
+ $can_highlight_title = true;
+ $can_highlight_content = true;
+ break;
+ case "title":
+ $can_highlight_title = true;
+ break;
+ case "content":
+ case "link":
+ case "author":
+ case "tag":
+ $can_highlight_content = true;
+ break;
+ }
+
+ if ($can_highlight_content)
+ $content_preview = Sanitizer::highlight_words_str($content_preview, $matches);
+
+ if ($can_highlight_title)
+ $entry_title = Sanitizer::highlight_words_str($entry_title, $matches);
+ }
$rv['items'][] = [
- 'title' => $entry['title'],
+ 'title' => $entry_title,
'feed_title' => $entry['feed_title'],
'date' => mb_substr($entry['date_entered'], 0, 16),
- 'content_preview' => $entry['content_preview'],
+ 'content_preview' => $content_preview,
+ 'rules' => $rules
];
}
}
@@ -369,7 +448,7 @@ class Pref_Filters extends Handler_Protected {
/**
* @param array<string, mixed>|null $rule
*/
- private function _get_rule_name(?array $rule = null): string {
+ private function _get_rule_name(?array $rule = null, string $format = 'html'): string {
if (!$rule) $rule = json_decode(clean($_REQUEST["rule"]), true);
$feeds = $rule["feed_id"];
@@ -404,10 +483,14 @@ class Pref_Filters extends Handler_Protected {
$inverse = isset($rule["inverse"]) ? "inverse" : "";
- return "<span class='filterRule $inverse'>" .
- T_sprintf("%s on %s in %s %s", htmlspecialchars($rule["reg_exp"]),
- "<span class='field'>$filter_type</span>", "<span class='feed'>$feed</span>", isset($rule["inverse"]) ? __("(inverse)") : "") . "</span>";
- }
+ if ($format === 'html')
+ return "<span class='filterRule $inverse'>" .
+ T_sprintf("%s on %s in %s %s", htmlspecialchars($rule["reg_exp"]),
+ "<span class='field'>$filter_type</span>", "<span class='feed'>$feed</span>", isset($rule["inverse"]) ? __("(inverse)") : "") . "</span>";
+ else
+ return T_sprintf("%s on %s in %s %s", $rule["reg_exp"],
+ $filter_type, $feed, isset($rule["inverse"]) ? __("(inverse)") : "");
+ }
function printRuleName(): void {
print $this->_get_rule_name(json_decode(clean($_REQUEST["rule"]), true));
diff --git a/classes/Sanitizer.php b/classes/Sanitizer.php
index 94d6fe621..2ae07d8d3 100644
--- a/classes/Sanitizer.php
+++ b/classes/Sanitizer.php
@@ -59,6 +59,65 @@ class Sanitizer {
return parse_url(Config::get_self_url(), PHP_URL_SCHEME) == 'https';
}
+ /** @param array<string> $words */
+ public static function highlight_words_str(string $str, array $words) : string {
+ $doc = new DOMDocument();
+
+ if ($doc->loadHTML('<?xml encoding="UTF-8"><span>' . $str . '</span>')) {
+ $xpath = new DOMXPath($doc);
+
+ if (self::highlight_words($doc, $xpath, $words)) {
+ $res = $doc->saveHTML();
+
+ /* strip everything outside of <body>...</body> */
+ $res_frag = array();
+
+ if (preg_match('/<body>(.*)<\/body>/is', $res, $res_frag)) {
+ return $res_frag[1];
+ } else {
+ return $res;
+ }
+ }
+ }
+
+ return $str;
+ }
+
+ /** @param array<string> $words */
+ public static function highlight_words(DOMDocument &$doc, DOMXPath $xpath, array $words) : bool {
+ $rv = false;
+
+ foreach ($words as $word) {
+
+ // http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph
+ $elements = $xpath->query("//*/text()");
+
+ foreach ($elements as $child) {
+
+ $fragment = $doc->createDocumentFragment();
+ $text = $child->textContent;
+
+ while (($pos = mb_stripos($text, $word)) !== false) {
+ $fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos)));
+ $word = mb_substr($text, (int)$pos, mb_strlen($word));
+ $highlight = $doc->createElement('span');
+ $highlight->appendChild(new DOMText($word));
+ $highlight->setAttribute('class', 'highlight');
+ $fragment->appendChild($highlight);
+ $text = mb_substr($text, $pos + mb_strlen($word));
+ }
+
+ if (!empty($text)) $fragment->appendChild(new DOMText($text));
+
+ $child->parentNode->replaceChild($fragment, $child);
+
+ $rv = true;
+ }
+ }
+
+ return $rv;
+ }
+
/**
* @param array<int, string>|null $highlight_words Words to highlight in the HTML output.
*
@@ -197,34 +256,8 @@ class Sanitizer {
$div->appendChild($entry);
}
- if (is_array($highlight_words)) {
- foreach ($highlight_words as $word) {
-
- // http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph
-
- $elements = $xpath->query("//*/text()");
-
- foreach ($elements as $child) {
-
- $fragment = $doc->createDocumentFragment();
- $text = $child->textContent;
-
- while (($pos = mb_stripos($text, $word)) !== false) {
- $fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos)));
- $word = mb_substr($text, (int)$pos, mb_strlen($word));
- $highlight = $doc->createElement('span');
- $highlight->appendChild(new DOMText($word));
- $highlight->setAttribute('class', 'highlight');
- $fragment->appendChild($highlight);
- $text = mb_substr($text, $pos + mb_strlen($word));
- }
-
- if (!empty($text)) $fragment->appendChild(new DOMText($text));
-
- $child->parentNode->replaceChild($fragment, $child);
- }
- }
- }
+ if (is_array($highlight_words))
+ self::highlight_words($doc, $xpath, $highlight_words);
$res = $doc->saveHTML();