From 304d3a0b88052d16aca0230f67532acd6822e482 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 20 Nov 2019 18:56:34 +0300 Subject: tag-related fixes 1. move tag sanitization to feedparser common item class 2. enforce length limit on tags when parsing 3. support multiple tags passed via one dc:subject and other such elements, parse them as a comma-separated list 4. sort resulting tag list to prevent different order between feed updates 5. remove some duplicate code related to tag validation 6. allow + symbol in tags --- classes/feeditem/common.php | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'classes/feeditem/common.php') diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php index 3193ed273..f208f4a48 100755 --- a/classes/feeditem/common.php +++ b/classes/feeditem/common.php @@ -162,4 +162,35 @@ abstract class FeedItem_Common extends FeedItem { } } + static function normalize_categories($cats) { + + $tmp = []; + + foreach ($cats as $rawcat) { + $tmp = array_merge($tmp, explode(",", $rawcat)); + } + + $tmp = array_map(function($srccat) { + $cat = clean(trim(mb_strtolower($srccat))); + + // we don't support numeric tags + if (is_numeric($cat)) + $cat = 't:' . $cat; + + $cat = preg_replace('/[,\'\"]/', "", $cat); + + if (DB_TYPE == "mysql") { + $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat); + } + + if (mb_strlen($cat) > 250) + $cat = mb_substr($cat, 0, 250); + + return $cat; + }, $tmp); + + asort($tmp); + + return array_unique($tmp); + } } -- cgit v1.2.3-54-g00ecf