From 117efb6fa497ce873c9e18368d2a9fc61eaa8ef8 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 12:52:15 +0300 Subject: add trgm plugin for postgresql --- plugins/af_psql_trgm/init.php | 202 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 plugins/af_psql_trgm/init.php (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php new file mode 100644 index 000000000..a1ee040e8 --- /dev/null +++ b/plugins/af_psql_trgm/init.php @@ -0,0 +1,202 @@ + 1) $similarity = 1; + + if ($min_title_length < 0) $min_title_length = 0; + + $similarity = sprintf("%.2f", $similarity); + + $this->host->set($this, "similarity", $similarity); + $this->host->set($this, "min_title_length", $min_title_length); + + echo T_sprintf("Data saved (%s)", $similarity); + } + + function init($host) { + $this->host = $host; + + $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); + $host->add_hook($host::HOOK_PREFS_TAB, $this); + $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this); + $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this); + + } + + function hook_prefs_tab($args) { + if ($args != "prefFeeds") return; + + print "
"; + + if (DB_TYPE != "pgsql") { + print_error("Database type not supported."); + } + + $result = db_query("select 'similarity'::regproc"); + + if (db_num_rows($result) == 0) { + print_error("pg_trgm extension not found."); + } + + $similarity = $this->host->get($this, "similarity"); + $min_title_length = $this->host->get($this, "min_title_length"); + + if (!$similarity) $similarity = '0.75'; + if (!$min_title_length) $min_title_length = '32'; + + print "
"; + + print ""; + + print ""; + print ""; + print ""; + + print_notice("PostgreSQL trigram extension returns string similarity as a floating point number (0-1). Setting it too low might produce false positives, zero disables checking."); + + print "
"; + print_notice("Only data in other feeds is checked, i.e. sequential duplicate posts in one feed will not be detected by this plugin."); + + print "
"; + print_notice("Enable the plugin for specific feeds in the feed editor."); + + print "

" . __("Global settings") . "

"; + + print ""; + + print ""; + print ""; + print ""; + print ""; + + + print "
".__("Minimum similarity:")." +
".__("Minimum title length:")." +
"; + + print "

"; + + print "

"; + + print "
"; + } + + //PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_EDIT_FEED, + // "hook_prefs_edit_feed", $feed_id); + // PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_SAVE_FEED, + // "hook_prefs_save_feed", $feed_id); + + function hook_prefs_edit_feed($feed_id) { + print "
".__("Similarity (pg_trgm)")."
"; + print "
"; + + $enabled_feeds = $this->host->get($this, "enabled_feeds"); + if (!array($enabled_feeds)) $enabled_feeds = array(); + + $key = array_search($feed_id, $enabled_feeds); + $checked = $key !== FALSE ? "checked" : ""; + + print "
 "; + + print "
"; + } + + function hook_prefs_save_feed($feed_id) { + $enabled_feeds = $this->host->get($this, "enabled_feeds"); + if (!is_array($enabled_feeds)) $enabled_feeds = array(); + + $enable = checkbox_to_sql_bool($_POST["trgm_similarity_enabled"]) == 'true'; + $key = array_search($feed_id, $enabled_feeds); + + if ($enable) { + if ($key === FALSE) { + array_push($enabled_feeds, $feed_id); + } + } else { + if ($key !== FALSE) { + unset($enabled_feeds[$key]); + } + } + + $this->host->set($this, "enabled_feeds", $enabled_feeds); + } + + function hook_article_filter($article) { + + if (DB_TYPE != "pgsql") return $article; + + $result = db_query("select 'similarity'::regproc"); + if (db_num_rows($result) == 0) return $article; + + $enabled_feeds = $this->host->get($this, "enabled_feeds"); + $key = array_search($article["feed"]["id"], $enabled_feeds); + if ($key === FALSE) return $article; + + $similarity = (float) $this->host->get($this, "similarity"); + if ($similarity < 0.01) return $article; + + $min_title_length = (int) $this->host->get($this, "min_length"); + if (mb_strlen($article["title"]) < $min_title_length) return $article; + + $owner_uid = $article["owner_uid"]; + $feed_id = $article["feed"]["id"]; + + $title_escaped = db_escape_string($article["title"]); + + $result = db_query("SELECT MAX(SIMILARITY(title, '$title_escaped')) AS ms + FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND + date_entered >= NOW() - interval '1 day' AND + feed_id != $feed_id AND + owner_uid = $owner_uid"); + + $similarity_result = db_fetch_result($result, 0, "ms"); + + //_debug("similarity result: $similarity_result"); + + if ($similarity_result >= $similarity) { + $article["force_catchup"] = true; + } + + return $article; + + } + + function api_version() { + return 2; + } + +} +?> -- cgit v1.2.3-54-g00ecf From 97600ddd978e20dde613085ea03453ddd102b8fc Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 12:56:36 +0300 Subject: af_psql_trgm: allow dupe checking within one feed --- plugins/af_psql_trgm/init.php | 9 --------- 1 file changed, 9 deletions(-) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index a1ee040e8..95b78dec1 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -80,9 +80,6 @@ class Af_Psql_Trgm extends Plugin { print_notice("PostgreSQL trigram extension returns string similarity as a floating point number (0-1). Setting it too low might produce false positives, zero disables checking."); - print "
"; - print_notice("Only data in other feeds is checked, i.e. sequential duplicate posts in one feed will not be detected by this plugin."); - print "
"; print_notice("Enable the plugin for specific feeds in the feed editor."); @@ -112,11 +109,6 @@ class Af_Psql_Trgm extends Plugin { print ""; } - //PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_EDIT_FEED, - // "hook_prefs_edit_feed", $feed_id); - // PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_SAVE_FEED, - // "hook_prefs_save_feed", $feed_id); - function hook_prefs_edit_feed($feed_id) { print "
".__("Similarity (pg_trgm)")."
"; print "
"; @@ -179,7 +171,6 @@ class Af_Psql_Trgm extends Plugin { $result = db_query("SELECT MAX(SIMILARITY(title, '$title_escaped')) AS ms FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND date_entered >= NOW() - interval '1 day' AND - feed_id != $feed_id AND owner_uid = $owner_uid"); $similarity_result = db_fetch_result($result, 0, "ms"); -- cgit v1.2.3-54-g00ecf From 9a121298e15856638be6f3e44593ab06a172bc17 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 14:22:41 +0300 Subject: add some stuff to the trgm plugin --- plugins/af_psql_trgm/init.php | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index 95b78dec1..f55aa1156 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -2,7 +2,6 @@ class Af_Psql_Trgm extends Plugin { private $host; - private $filters = array(); function about() { return array(1.0, @@ -98,7 +97,6 @@ class Af_Psql_Trgm extends Plugin { placeholder=\"32\" required=\"1\" name=\"min_title_length\" value=\"$min_title_length\">"; - print ""; print "

"; } -- cgit v1.2.3-54-g00ecf From 167fb03f3e94d8a86c986e262952a6e124c12470 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 14:59:33 +0300 Subject: trgm: add disabled for now button part --- plugins/af_psql_trgm/init.php | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index f55aa1156..a451dce23 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -33,9 +33,56 @@ class Af_Psql_Trgm extends Plugin { $host->add_hook($host::HOOK_PREFS_TAB, $this); $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this); $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this); + //$host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); } + /* function get_js() { + return file_get_contents(__DIR__ . "/init.js"); + } + + function showrelated() { + $id = (int) db_escape_string($_REQUEST['param']); + $owner_uid = $_SESSION["uid"]; + + $result = db_query("SELECT title FROM ttrss_entries, ttrss_user_entries + WHERE ref_id = id AND id = $id AND owner_uid = $owner_uid"); + + $title = db_fetch_result($result, 0, "title"); + + print "

$title

"; + + $title = db_escape_string($title); + $result = db_query("SELECT id,title,updated + FROM ttrss_entries, ttrss_user_entries + WHERE owner_uid = $owner_uid AND + id = ref_id AND + id != $id AND + date_entered >= NOW() - INTERVAL '1 day' AND + SIMILARITY(title, '$title') >= 0.5 + LIMIT 30"); + + print "
    "; + + while ($line = db_fetch_assoc($result)) { + print "
  • "; + print "
    " . smart_date_time($line["updated"]) + . "
    "; + print $line["title"]; + print "
  • "; + } + + print "
"; + + } */ + + /* function hook_article_button($line) { + return ""; + } */ + function hook_prefs_tab($args) { if ($args != "prefFeeds") return; -- cgit v1.2.3-54-g00ecf From f52879fed516925a1cc026401671bcb083880e13 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 15:46:15 +0300 Subject: trgm: add basic related stories dialog --- css/tt-rss.css | 4 ++++ plugins/af_psql_trgm/init.js | 6 ++--- plugins/af_psql_trgm/init.php | 54 +++++++++++++++++++++++++++++++------------ 3 files changed, 46 insertions(+), 18 deletions(-) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/css/tt-rss.css b/css/tt-rss.css index 3db8dc1d5..693541a7c 100644 --- a/css/tt-rss.css +++ b/css/tt-rss.css @@ -341,6 +341,10 @@ div.prefHelp { color : #555; } +.small { + font-size : 11px; +} + #main-toolbar > * { white-space : nowrap; display : table-cell; diff --git a/plugins/af_psql_trgm/init.js b/plugins/af_psql_trgm/init.js index 8affb72d4..621ccb746 100644 --- a/plugins/af_psql_trgm/init.js +++ b/plugins/af_psql_trgm/init.js @@ -3,11 +3,11 @@ function showTrgmRelated(id) { var query = "backend.php?op=pluginhandler&plugin=af_psql_trgm&method=showrelated¶m=" + param_escape(id); - if (dijit.byId("editNoteDlg")) - dijit.byId("editNoteDlg").destroyRecursive(); + if (dijit.byId("trgmRelatedDlg")) + dijit.byId("trgmRelatedDlg").destroyRecursive(); dialog = new dijit.Dialog({ - id: "editNoteDlg", + id: "trgmRelatedDlg", title: __("Related articles"), style: "width: 600px", execute: function() { diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index a451dce23..97187a602 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -33,11 +33,11 @@ class Af_Psql_Trgm extends Plugin { $host->add_hook($host::HOOK_PREFS_TAB, $this); $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this); $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this); - //$host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); + $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); } - /* function get_js() { + function get_js() { return file_get_contents(__DIR__ . "/init.js"); } @@ -53,35 +53,59 @@ class Af_Psql_Trgm extends Plugin { print "

$title

"; $title = db_escape_string($title); - $result = db_query("SELECT id,title,updated - FROM ttrss_entries, ttrss_user_entries - WHERE owner_uid = $owner_uid AND - id = ref_id AND - id != $id AND - date_entered >= NOW() - INTERVAL '1 day' AND - SIMILARITY(title, '$title') >= 0.5 - LIMIT 30"); + $result = db_query("SELECT ttrss_entries.id AS id, + feed_id, + ttrss_entries.title AS title, + updated, link, + ttrss_feeds.title AS feed_title, + SIMILARITY(ttrss_entries.title, '$title') AS sm + FROM + ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON (ttrss_feeds.id = feed_id) + WHERE + ttrss_entries.id = ref_id AND + ttrss_user_entries.owner_uid = $owner_uid AND + ttrss_entries.id != $id AND + date_entered >= NOW() - INTERVAL '1 week' + ORDER BY + sm DESC, date_entered DESC + LIMIT 10"); print "
    "; while ($line = db_fetch_assoc($result)) { print "
  • "; - print "
    " . smart_date_time($line["updated"]) + print "
    " . + smart_date_time(strtotime($line["updated"])) . "
    "; - print $line["title"]; + + print ""; + + $article_link = htmlspecialchars($line["link"]); + print " ". + $line["title"].""; + + print " (". + htmlspecialchars($line["feed_title"]).")"; + print "
  • "; } print "
"; - } */ + print "
"; + print ""; + print "
"; - /* function hook_article_button($line) { + + } + + function hook_article_button($line) { return ""; - } */ + } function hook_prefs_tab($args) { if ($args != "prefFeeds") return; -- cgit v1.2.3-54-g00ecf From d806953471684c45b3ebe93de46fb4294f55ca49 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 16:42:10 +0300 Subject: trgm: extend relative articles search interval --- plugins/af_psql_trgm/init.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index 97187a602..559a5a71c 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -65,7 +65,7 @@ class Af_Psql_Trgm extends Plugin { ttrss_entries.id = ref_id AND ttrss_user_entries.owner_uid = $owner_uid AND ttrss_entries.id != $id AND - date_entered >= NOW() - INTERVAL '1 week' + date_entered >= NOW() - INTERVAL '2 weeks' ORDER BY sm DESC, date_entered DESC LIMIT 10"); -- cgit v1.2.3-54-g00ecf From 94238b143a031d731341e1e344e7032f19f397ca Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 19 Jan 2015 18:18:03 +0300 Subject: related: do not consider negative scored headlines --- plugins/af_psql_trgm/init.php | 1 + 1 file changed, 1 insertion(+) (limited to 'plugins/af_psql_trgm/init.php') diff --git a/plugins/af_psql_trgm/init.php b/plugins/af_psql_trgm/init.php index 559a5a71c..02a0faba3 100644 --- a/plugins/af_psql_trgm/init.php +++ b/plugins/af_psql_trgm/init.php @@ -65,6 +65,7 @@ class Af_Psql_Trgm extends Plugin { ttrss_entries.id = ref_id AND ttrss_user_entries.owner_uid = $owner_uid AND ttrss_entries.id != $id AND + score >= 0 AND date_entered >= NOW() - INTERVAL '2 weeks' ORDER BY sm DESC, date_entered DESC -- cgit v1.2.3-54-g00ecf