From 891e36f57e2e3955a4f13019e8fdc58fce826434 Mon Sep 17 00:00:00 2001 From: justauser Date: Thu, 27 Jun 2013 11:18:23 -0400 Subject: Changes to add a new hook: HOOK_QUERY_HEADLINES. An example is provided. --- classes/pluginhost.php | 1 + 1 file changed, 1 insertion(+) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 53adf01f9..a1bd1b364 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -37,6 +37,7 @@ class PluginHost { const HOOK_PREFS_EDIT_FEED = 20; const HOOK_PREFS_SAVE_FEED = 21; const HOOK_FETCH_FEED = 22; + const HOOK_QUERY_HEADLINES = 23; const KIND_ALL = 1; const KIND_SYSTEM = 2; -- cgit v1.2.3-54-g00ecf From 8e47022036cdba907add2a375bf0c23a4043746f Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 2 Aug 2013 14:06:18 +0400 Subject: add hook_house_keeping --- classes/pluginhost.php | 1 + include/rssfuncs.php | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index a1bd1b364..b7b852278 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -38,6 +38,7 @@ class PluginHost { const HOOK_PREFS_SAVE_FEED = 21; const HOOK_FETCH_FEED = 22; const HOOK_QUERY_HEADLINES = 23; + const HOOK_HOUSE_KEEPING = 24; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/rssfuncs.php b/include/rssfuncs.php index c0d81c351..e6866182b 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -1378,5 +1378,10 @@ $rc = cleanup_tags( 14, 50000); _debug("Cleaned $rc cached tags."); + + foreach ($pluginhost->get_hooks(PluginHost::HOOK_HOUSE_KEEPING) as $plugin) { + $plugin->hook_house_keeping(); + } + } ?> -- cgit v1.2.3-54-g00ecf From a96bb3d88ae2bc8f869c825de28ddb89851023a2 Mon Sep 17 00:00:00 2001 From: Dave Zaikos Date: Sat, 25 Jan 2014 02:18:27 -0500 Subject: Fixed a bug in PluginHost::del_hook() where the parameters passed to array_search() were in the incorrect order. --- classes/pluginhost.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index b7b852278..472f20186 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -99,7 +99,7 @@ class PluginHost { function del_hook($type, $sender) { if (is_array($this->hooks[$type])) { - $key = array_Search($this->hooks[$type], $sender); + $key = array_Search($sender, $this->hooks[$type]); if ($key !== FALSE) { unset($this->hooks[$type][$key]); } -- cgit v1.2.3-54-g00ecf From 84e36b61a380d7d72fe7b78afe41825aa828eff7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 14 Mar 2014 15:06:37 +0400 Subject: make_init_params: add plugins --- classes/pluginhost.php | 10 ++++++++++ include/functions2.php | 1 + 2 files changed, 11 insertions(+) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 472f20186..19f8b563b 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -75,6 +75,16 @@ class PluginHost { return $this->dbh; } + function get_plugin_names() { + $names = array(); + + foreach ($this->plugins as $p) { + array_push($names, get_class($p)); + } + + return $names; + } + function get_plugins() { return $this->plugins; } diff --git a/include/functions2.php b/include/functions2.php index 2cf4af22e..ca58ea6c5 100644 --- a/include/functions2.php +++ b/include/functions2.php @@ -18,6 +18,7 @@ $params["bw_limit"] = (int) $_SESSION["bw_limit"]; $params["label_base_index"] = (int) LABEL_BASE_INDEX; $params["theme"] = get_pref("USER_CSS_THEME", false, false); + $params["plugins"] = implode(", ", PluginHost::getInstance()->get_plugin_names()); $params["php_platform"] = PHP_OS; $params["php_version"] = PHP_VERSION; -- cgit v1.2.3-54-g00ecf From baaf4c3043f428ec009dd48b85e1c2d6cb8abee4 Mon Sep 17 00:00:00 2001 From: Rob Hoelz Date: Mon, 14 Apr 2014 23:18:33 -0500 Subject: Make search mechanism pluggable Currently, TinyTinyRSS can use raw SQL or the Sphinx search engine for searching. It would be nice if other search engines (such as Xapian) could be used, or if features of the underlying SQL engine (such as MySQL's FULLTEXT indexes) could be leveraged. This commit makes searching into a plugin hook, falling back to the builtin behavior if no search plugin is active. The Sphinx search behavior has been broken out into a plugin. --- classes/feeds.php | 2 +- classes/pluginhost.php | 1 + config.php-dist | 4 - include/functions2.php | 50 +- include/sanity_check.php | 5 - include/sanity_config.php | 2 +- lib/sphinxapi.php | 1691 ----------------------------------- plugins/search_sphinx/init.php | 60 ++ plugins/search_sphinx/sphinxapi.php | 1691 +++++++++++++++++++++++++++++++++++ 9 files changed, 1760 insertions(+), 1746 deletions(-) delete mode 100644 lib/sphinxapi.php create mode 100644 plugins/search_sphinx/init.php create mode 100644 plugins/search_sphinx/sphinxapi.php (limited to 'classes/pluginhost.php') diff --git a/classes/feeds.php b/classes/feeds.php index ca470c032..89acb0d0b 100644 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -1147,7 +1147,7 @@ class Feeds extends Handler_Protected { print "
"; - if (!SPHINX_ENABLED) { + if (count(PluginHost::getInstance()->get_hooks(PluginHost::HOOK_SEARCH)) == 0) { print ""; diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 19f8b563b..4582c314a 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -39,6 +39,7 @@ class PluginHost { const HOOK_FETCH_FEED = 22; const HOOK_QUERY_HEADLINES = 23; const HOOK_HOUSE_KEEPING = 24; + const HOOK_SEARCH = 25; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/config.php-dist b/config.php-dist index 999f00773..2d7a6d195 100644 --- a/config.php-dist +++ b/config.php-dist @@ -108,10 +108,6 @@ // *** Sphinx search *** // ********************* - define('SPHINX_ENABLED', false); - // Enable fulltext search using Sphinx (http://www.sphinxsearch.com) - // Please see http://tt-rss.org/wiki/SphinxSearch for more information. - define('SPHINX_SERVER', 'localhost:9312'); // Hostname:port combination for the Sphinx server. diff --git a/include/functions2.php b/include/functions2.php index ca58ea6c5..22c602362 100644 --- a/include/functions2.php +++ b/include/functions2.php @@ -397,20 +397,15 @@ $search_words = array(); if ($search) { + foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_SEARCH) as $plugin) { + list($search_query_part, $search_words) = $plugin->hook_search($search); + } - if (SPHINX_ENABLED) { - $ids = join(",", @sphinx_search($search, 0, 500)); - - if ($ids) - $search_query_part = "ref_id IN ($ids) AND "; - else - $search_query_part = "ref_id = -1 AND "; - - } else { + // fall back in case of no plugins + if (!$search_query_part) { list($search_query_part, $search_words) = search_to_sql($search); - $search_query_part .= " AND "; } - + $search_query_part .= " AND "; } else { $search_query_part = ""; } @@ -1994,39 +1989,6 @@ } } - function sphinx_search($query, $offset = 0, $limit = 30) { - require_once 'lib/sphinxapi.php'; - - $sphinxClient = new SphinxClient(); - - $sphinxpair = explode(":", SPHINX_SERVER, 2); - - $sphinxClient->SetServer($sphinxpair[0], (int)$sphinxpair[1]); - $sphinxClient->SetConnectTimeout(1); - - $sphinxClient->SetFieldWeights(array('title' => 70, 'content' => 30, - 'feed_title' => 20)); - - $sphinxClient->SetMatchMode(SPH_MATCH_EXTENDED2); - $sphinxClient->SetRankingMode(SPH_RANK_PROXIMITY_BM25); - $sphinxClient->SetLimits($offset, $limit, 1000); - $sphinxClient->SetArrayResult(false); - $sphinxClient->SetFilter('owner_uid', array($_SESSION['uid'])); - - $result = $sphinxClient->Query($query, SPHINX_INDEX); - - $ids = array(); - - if (is_array($result['matches'])) { - foreach (array_keys($result['matches']) as $int_id) { - $ref_id = $result['matches'][$int_id]['attrs']['ref_id']; - array_push($ids, $ref_id); - } - } - - return $ids; - } - function cleanup_tags($days = 14, $limit = 1000) { if (DB_TYPE == "pgsql") { diff --git a/include/sanity_check.php b/include/sanity_check.php index 1aa581bfc..6bec43051 100644 --- a/include/sanity_check.php +++ b/include/sanity_check.php @@ -146,11 +146,6 @@ array_push($errors, "PHP support for CURL is required for PubSubHubbub."); } - if (SPHINX_ENABLED && class_exists("SphinxClient")) { - array_push($errors, "Your PHP has a separate systemwide Sphinx client installed which conflicts with the client library used by tt-rss. Either remove the system library or disable Sphinx support."); - - } - if (!class_exists("DOMDocument")) { array_push($errors, "PHP support for DOMDocument is required, but was not found."); } diff --git a/include/sanity_config.php b/include/sanity_config.php index 99e83e0d9..76fba4b81 100644 --- a/include/sanity_config.php +++ b/include/sanity_config.php @@ -1,3 +1,3 @@ +$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_NEW_VERSION', 'DETECT_ARTICLE_LANGUAGE', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?> diff --git a/lib/sphinxapi.php b/lib/sphinxapi.php deleted file mode 100644 index 90643d3c7..000000000 --- a/lib/sphinxapi.php +++ /dev/null @@ -1,1691 +0,0 @@ -=8 ) - { - $v = (int)$v; - return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); - } - - // x32, int - if ( is_int($v) ) - return pack ( "NN", $v < 0 ? -1 : 0, $v ); - - // x32, bcmath - if ( function_exists("bcmul") ) - { - if ( bccomp ( $v, 0 ) == -1 ) - $v = bcadd ( "18446744073709551616", $v ); - $h = bcdiv ( $v, "4294967296", 0 ); - $l = bcmod ( $v, "4294967296" ); - return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit - } - - // x32, no-bcmath - $p = max(0, strlen($v) - 13); - $lo = abs((float)substr($v, $p)); - $hi = abs((float)substr($v, 0, $p)); - - $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912 - $q = floor($m/4294967296.0); - $l = $m - ($q*4294967296.0); - $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328 - - if ( $v<0 ) - { - if ( $l==0 ) - $h = 4294967296.0 - $h; - else - { - $h = 4294967295.0 - $h; - $l = 4294967296.0 - $l; - } - } - return pack ( "NN", $h, $l ); -} - -/// pack 64-bit unsigned -function sphPackU64 ( $v ) -{ - assert ( is_numeric($v) ); - - // x64 - if ( PHP_INT_SIZE>=8 ) - { - assert ( $v>=0 ); - - // x64, int - if ( is_int($v) ) - return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); - - // x64, bcmath - if ( function_exists("bcmul") ) - { - $h = bcdiv ( $v, 4294967296, 0 ); - $l = bcmod ( $v, 4294967296 ); - return pack ( "NN", $h, $l ); - } - - // x64, no-bcmath - $p = max ( 0, strlen($v) - 13 ); - $lo = (int)substr ( $v, $p ); - $hi = (int)substr ( $v, 0, $p ); - - $m = $lo + $hi*1316134912; - $l = $m % 4294967296; - $h = $hi*2328 + (int)($m/4294967296); - - return pack ( "NN", $h, $l ); - } - - // x32, int - if ( is_int($v) ) - return pack ( "NN", 0, $v ); - - // x32, bcmath - if ( function_exists("bcmul") ) - { - $h = bcdiv ( $v, "4294967296", 0 ); - $l = bcmod ( $v, "4294967296" ); - return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit - } - - // x32, no-bcmath - $p = max(0, strlen($v) - 13); - $lo = (float)substr($v, $p); - $hi = (float)substr($v, 0, $p); - - $m = $lo + $hi*1316134912.0; - $q = floor($m / 4294967296.0); - $l = $m - ($q * 4294967296.0); - $h = $hi*2328.0 + $q; - - return pack ( "NN", $h, $l ); -} - -// unpack 64-bit unsigned -function sphUnpackU64 ( $v ) -{ - list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); - - if ( PHP_INT_SIZE>=8 ) - { - if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again - if ( $lo<0 ) $lo += (1<<32); - - // x64, int - if ( $hi<=2147483647 ) - return ($hi<<32) + $lo; - - // x64, bcmath - if ( function_exists("bcmul") ) - return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); - - // x64, no-bcmath - $C = 100000; - $h = ((int)($hi / $C) << 32) + (int)($lo / $C); - $l = (($hi % $C) << 32) + ($lo % $C); - if ( $l>$C ) - { - $h += (int)($l / $C); - $l = $l % $C; - } - - if ( $h==0 ) - return $l; - return sprintf ( "%d%05d", $h, $l ); - } - - // x32, int - if ( $hi==0 ) - { - if ( $lo>0 ) - return $lo; - return sprintf ( "%u", $lo ); - } - - $hi = sprintf ( "%u", $hi ); - $lo = sprintf ( "%u", $lo ); - - // x32, bcmath - if ( function_exists("bcmul") ) - return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); - - // x32, no-bcmath - $hi = (float)$hi; - $lo = (float)$lo; - - $q = floor($hi/10000000.0); - $r = $hi - $q*10000000.0; - $m = $lo + $r*4967296.0; - $mq = floor($m/10000000.0); - $l = $m - $mq*10000000.0; - $h = $q*4294967296.0 + $r*429.0 + $mq; - - $h = sprintf ( "%.0f", $h ); - $l = sprintf ( "%07.0f", $l ); - if ( $h=="0" ) - return sprintf( "%.0f", (float)$l ); - return $h . $l; -} - -// unpack 64-bit signed -function sphUnpackI64 ( $v ) -{ - list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); - - // x64 - if ( PHP_INT_SIZE>=8 ) - { - if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again - if ( $lo<0 ) $lo += (1<<32); - - return ($hi<<32) + $lo; - } - - // x32, int - if ( $hi==0 ) - { - if ( $lo>0 ) - return $lo; - return sprintf ( "%u", $lo ); - } - // x32, int - elseif ( $hi==-1 ) - { - if ( $lo<0 ) - return $lo; - return sprintf ( "%.0f", $lo - 4294967296.0 ); - } - - $neg = ""; - $c = 0; - if ( $hi<0 ) - { - $hi = ~$hi; - $lo = ~$lo; - $c = 1; - $neg = "-"; - } - - $hi = sprintf ( "%u", $hi ); - $lo = sprintf ( "%u", $lo ); - - // x32, bcmath - if ( function_exists("bcmul") ) - return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c ); - - // x32, no-bcmath - $hi = (float)$hi; - $lo = (float)$lo; - - $q = floor($hi/10000000.0); - $r = $hi - $q*10000000.0; - $m = $lo + $r*4967296.0; - $mq = floor($m/10000000.0); - $l = $m - $mq*10000000.0 + $c; - $h = $q*4294967296.0 + $r*429.0 + $mq; - if ( $l==10000000 ) - { - $l = 0; - $h += 1; - } - - $h = sprintf ( "%.0f", $h ); - $l = sprintf ( "%07.0f", $l ); - if ( $h=="0" ) - return $neg . sprintf( "%.0f", (float)$l ); - return $neg . $h . $l; -} - - -function sphFixUint ( $value ) -{ - if ( PHP_INT_SIZE>=8 ) - { - // x64 route, workaround broken unpack() in 5.2.2+ - if ( $value<0 ) $value += (1<<32); - return $value; - } - else - { - // x32 route, workaround php signed/unsigned braindamage - return sprintf ( "%u", $value ); - } -} - - -/// sphinx searchd client class -class SphinxClient -{ - var $_host; ///< searchd host (default is "localhost") - var $_port; ///< searchd port (default is 9312) - var $_offset; ///< how many records to seek from result-set start (default is 0) - var $_limit; ///< how many records to return from result-set starting at offset (default is 20) - var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) - var $_weights; ///< per-field weights (default is 1 for all fields) - var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) - var $_sortby; ///< attribute to sort by (defualt is "") - var $_min_id; ///< min ID to match (default is 0, which means no limit) - var $_max_id; ///< max ID to match (default is 0, which means no limit) - var $_filters; ///< search filters - var $_groupby; ///< group-by attribute name - var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) - var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) - var $_groupdistinct;///< group-by count-distinct attribute - var $_maxmatches; ///< max matches to retrieve - var $_cutoff; ///< cutoff to stop searching at (default is 0) - var $_retrycount; ///< distributed retries count - var $_retrydelay; ///< distributed retries delay - var $_anchor; ///< geographical anchor point - var $_indexweights; ///< per-index weights - var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) - var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) - var $_fieldweights; ///< per-field-name weights - var $_overrides; ///< per-query attribute values overrides - var $_select; ///< select-list (attributes or expressions, with optional aliases) - - var $_error; ///< last error message - var $_warning; ///< last warning message - var $_connerror; ///< connection error vs remote error flag - - var $_reqs; ///< requests array for multi-query - var $_mbenc; ///< stored mbstring encoding - var $_arrayresult; ///< whether $result["matches"] should be a hash or an array - var $_timeout; ///< connect timeout - - ///////////////////////////////////////////////////////////////////////////// - // common stuff - ///////////////////////////////////////////////////////////////////////////// - - /// create a new client object and fill defaults - function SphinxClient () - { - // per-client-object settings - $this->_host = "localhost"; - $this->_port = 9312; - $this->_path = false; - $this->_socket = false; - - // per-query settings - $this->_offset = 0; - $this->_limit = 20; - $this->_mode = SPH_MATCH_ALL; - $this->_weights = array (); - $this->_sort = SPH_SORT_RELEVANCE; - $this->_sortby = ""; - $this->_min_id = 0; - $this->_max_id = 0; - $this->_filters = array (); - $this->_groupby = ""; - $this->_groupfunc = SPH_GROUPBY_DAY; - $this->_groupsort = "@group desc"; - $this->_groupdistinct= ""; - $this->_maxmatches = 1000; - $this->_cutoff = 0; - $this->_retrycount = 0; - $this->_retrydelay = 0; - $this->_anchor = array (); - $this->_indexweights= array (); - $this->_ranker = SPH_RANK_PROXIMITY_BM25; - $this->_maxquerytime= 0; - $this->_fieldweights= array(); - $this->_overrides = array(); - $this->_select = "*"; - - $this->_error = ""; // per-reply fields (for single-query case) - $this->_warning = ""; - $this->_connerror = false; - - $this->_reqs = array (); // requests storage (for multi-query case) - $this->_mbenc = ""; - $this->_arrayresult = false; - $this->_timeout = 0; - } - - function __destruct() - { - if ( $this->_socket !== false ) - fclose ( $this->_socket ); - } - - /// get last error message (string) - function GetLastError () - { - return $this->_error; - } - - /// get last warning message (string) - function GetLastWarning () - { - return $this->_warning; - } - - /// get last error flag (to tell network connection errors from searchd errors or broken responses) - function IsConnectError() - { - return $this->_connerror; - } - - /// set searchd host name (string) and port (integer) - function SetServer ( $host, $port = 0 ) - { - assert ( is_string($host) ); - if ( $host[0] == '/') - { - $this->_path = 'unix://' . $host; - return; - } - if ( substr ( $host, 0, 7 )=="unix://" ) - { - $this->_path = $host; - return; - } - - assert ( is_int($port) ); - $this->_host = $host; - $this->_port = $port; - $this->_path = ''; - - } - - /// set server connection timeout (0 to remove) - function SetConnectTimeout ( $timeout ) - { - assert ( is_numeric($timeout) ); - $this->_timeout = $timeout; - } - - - function _Send ( $handle, $data, $length ) - { - if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length ) - { - $this->_error = 'connection unexpectedly closed (timed out?)'; - $this->_connerror = true; - return false; - } - return true; - } - - ///////////////////////////////////////////////////////////////////////////// - - /// enter mbstring workaround mode - function _MBPush () - { - $this->_mbenc = ""; - if ( ini_get ( "mbstring.func_overload" ) & 2 ) - { - $this->_mbenc = mb_internal_encoding(); - mb_internal_encoding ( "latin1" ); - } - } - - /// leave mbstring workaround mode - function _MBPop () - { - if ( $this->_mbenc ) - mb_internal_encoding ( $this->_mbenc ); - } - - /// connect to searchd server - function _Connect () - { - if ( $this->_socket!==false ) - { - // we are in persistent connection mode, so we have a socket - // however, need to check whether it's still alive - if ( !@feof ( $this->_socket ) ) - return $this->_socket; - - // force reopen - $this->_socket = false; - } - - $errno = 0; - $errstr = ""; - $this->_connerror = false; - - if ( $this->_path ) - { - $host = $this->_path; - $port = 0; - } - else - { - $host = $this->_host; - $port = $this->_port; - } - - if ( $this->_timeout<=0 ) - $fp = @fsockopen ( $host, $port, $errno, $errstr ); - else - $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout ); - - if ( !$fp ) - { - if ( $this->_path ) - $location = $this->_path; - else - $location = "{$this->_host}:{$this->_port}"; - - $errstr = trim ( $errstr ); - $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)"; - $this->_connerror = true; - return false; - } - - // send my version - // this is a subtle part. we must do it before (!) reading back from searchd. - // because otherwise under some conditions (reported on FreeBSD for instance) - // TCP stack could throttle write-write-read pattern because of Nagle. - if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) ) - { - fclose ( $fp ); - $this->_error = "failed to send client protocol version"; - return false; - } - - // check version - list(,$v) = unpack ( "N*", fread ( $fp, 4 ) ); - $v = (int)$v; - if ( $v<1 ) - { - fclose ( $fp ); - $this->_error = "expected searchd protocol version 1+, got version '$v'"; - return false; - } - - return $fp; - } - - /// get and check response packet from searchd server - function _GetResponse ( $fp, $client_ver ) - { - $response = ""; - $len = 0; - - $header = fread ( $fp, 8 ); - if ( strlen($header)==8 ) - { - list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) ); - $left = $len; - while ( $left>0 && !feof($fp) ) - { - $chunk = fread ( $fp, min ( 8192, $left ) ); - if ( $chunk ) - { - $response .= $chunk; - $left -= strlen($chunk); - } - } - } - if ( $this->_socket === false ) - fclose ( $fp ); - - // check response - $read = strlen ( $response ); - if ( !$response || $read!=$len ) - { - $this->_error = $len - ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" - : "received zero-sized searchd response"; - return false; - } - - // check status - if ( $status==SEARCHD_WARNING ) - { - list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) ); - $this->_warning = substr ( $response, 4, $wlen ); - return substr ( $response, 4+$wlen ); - } - if ( $status==SEARCHD_ERROR ) - { - $this->_error = "searchd error: " . substr ( $response, 4 ); - return false; - } - if ( $status==SEARCHD_RETRY ) - { - $this->_error = "temporary searchd error: " . substr ( $response, 4 ); - return false; - } - if ( $status!=SEARCHD_OK ) - { - $this->_error = "unknown status code '$status'"; - return false; - } - - // check version - if ( $ver<$client_ver ) - { - $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", - $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ); - } - - return $response; - } - - ///////////////////////////////////////////////////////////////////////////// - // searching - ///////////////////////////////////////////////////////////////////////////// - - /// set offset and count into result set, - /// and optionally set max-matches and cutoff limits - function SetLimits ( $offset, $limit, $max=0, $cutoff=0 ) - { - assert ( is_int($offset) ); - assert ( is_int($limit) ); - assert ( $offset>=0 ); - assert ( $limit>0 ); - assert ( $max>=0 ); - $this->_offset = $offset; - $this->_limit = $limit; - if ( $max>0 ) - $this->_maxmatches = $max; - if ( $cutoff>0 ) - $this->_cutoff = $cutoff; - } - - /// set maximum query time, in milliseconds, per-index - /// integer, 0 means "do not limit" - function SetMaxQueryTime ( $max ) - { - assert ( is_int($max) ); - assert ( $max>=0 ); - $this->_maxquerytime = $max; - } - - /// set matching mode - function SetMatchMode ( $mode ) - { - assert ( $mode==SPH_MATCH_ALL - || $mode==SPH_MATCH_ANY - || $mode==SPH_MATCH_PHRASE - || $mode==SPH_MATCH_BOOLEAN - || $mode==SPH_MATCH_EXTENDED - || $mode==SPH_MATCH_FULLSCAN - || $mode==SPH_MATCH_EXTENDED2 ); - $this->_mode = $mode; - } - - /// set ranking mode - function SetRankingMode ( $ranker ) - { - assert ( $ranker>=0 && $ranker_ranker = $ranker; - } - - /// set matches sorting mode - function SetSortMode ( $mode, $sortby="" ) - { - assert ( - $mode==SPH_SORT_RELEVANCE || - $mode==SPH_SORT_ATTR_DESC || - $mode==SPH_SORT_ATTR_ASC || - $mode==SPH_SORT_TIME_SEGMENTS || - $mode==SPH_SORT_EXTENDED || - $mode==SPH_SORT_EXPR ); - assert ( is_string($sortby) ); - assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 ); - - $this->_sort = $mode; - $this->_sortby = $sortby; - } - - /// bind per-field weights by order - /// DEPRECATED; use SetFieldWeights() instead - function SetWeights ( $weights ) - { - assert ( is_array($weights) ); - foreach ( $weights as $weight ) - assert ( is_int($weight) ); - - $this->_weights = $weights; - } - - /// bind per-field weights by name - function SetFieldWeights ( $weights ) - { - assert ( is_array($weights) ); - foreach ( $weights as $name=>$weight ) - { - assert ( is_string($name) ); - assert ( is_int($weight) ); - } - $this->_fieldweights = $weights; - } - - /// bind per-index weights by name - function SetIndexWeights ( $weights ) - { - assert ( is_array($weights) ); - foreach ( $weights as $index=>$weight ) - { - assert ( is_string($index) ); - assert ( is_int($weight) ); - } - $this->_indexweights = $weights; - } - - /// set IDs range to match - /// only match records if document ID is beetwen $min and $max (inclusive) - function SetIDRange ( $min, $max ) - { - assert ( is_numeric($min) ); - assert ( is_numeric($max) ); - assert ( $min<=$max ); - $this->_min_id = $min; - $this->_max_id = $max; - } - - /// set values set filter - /// only match records where $attribute value is in given set - function SetFilter ( $attribute, $values, $exclude=false ) - { - assert ( is_string($attribute) ); - assert ( is_array($values) ); - assert ( count($values) ); - - if ( is_array($values) && count($values) ) - { - foreach ( $values as $value ) - assert ( is_numeric($value) ); - - $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values ); - } - } - - /// set range filter - /// only match records if $attribute value is beetwen $min and $max (inclusive) - function SetFilterRange ( $attribute, $min, $max, $exclude=false ) - { - assert ( is_string($attribute) ); - assert ( is_numeric($min) ); - assert ( is_numeric($max) ); - assert ( $min<=$max ); - - $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); - } - - /// set float range filter - /// only match records if $attribute value is beetwen $min and $max (inclusive) - function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false ) - { - assert ( is_string($attribute) ); - assert ( is_float($min) ); - assert ( is_float($max) ); - assert ( $min<=$max ); - - $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); - } - - /// setup anchor point for geosphere distance calculations - /// required to use @geodist in filters and sorting - /// latitude and longitude must be in radians - function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long ) - { - assert ( is_string($attrlat) ); - assert ( is_string($attrlong) ); - assert ( is_float($lat) ); - assert ( is_float($long) ); - - $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long ); - } - - /// set grouping attribute and function - function SetGroupBy ( $attribute, $func, $groupsort="@group desc" ) - { - assert ( is_string($attribute) ); - assert ( is_string($groupsort) ); - assert ( $func==SPH_GROUPBY_DAY - || $func==SPH_GROUPBY_WEEK - || $func==SPH_GROUPBY_MONTH - || $func==SPH_GROUPBY_YEAR - || $func==SPH_GROUPBY_ATTR - || $func==SPH_GROUPBY_ATTRPAIR ); - - $this->_groupby = $attribute; - $this->_groupfunc = $func; - $this->_groupsort = $groupsort; - } - - /// set count-distinct attribute for group-by queries - function SetGroupDistinct ( $attribute ) - { - assert ( is_string($attribute) ); - $this->_groupdistinct = $attribute; - } - - /// set distributed retries count and delay - function SetRetries ( $count, $delay=0 ) - { - assert ( is_int($count) && $count>=0 ); - assert ( is_int($delay) && $delay>=0 ); - $this->_retrycount = $count; - $this->_retrydelay = $delay; - } - - /// set result set format (hash or array; hash by default) - /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs - function SetArrayResult ( $arrayresult ) - { - assert ( is_bool($arrayresult) ); - $this->_arrayresult = $arrayresult; - } - - /// set attribute values override - /// there can be only one override per attribute - /// $values must be a hash that maps document IDs to attribute values - function SetOverride ( $attrname, $attrtype, $values ) - { - assert ( is_string ( $attrname ) ); - assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) ); - assert ( is_array ( $values ) ); - - $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values ); - } - - /// set select-list (attributes or expressions), SQL-like syntax - function SetSelect ( $select ) - { - assert ( is_string ( $select ) ); - $this->_select = $select; - } - - ////////////////////////////////////////////////////////////////////////////// - - /// clear all filters (for multi-queries) - function ResetFilters () - { - $this->_filters = array(); - $this->_anchor = array(); - } - - /// clear groupby settings (for multi-queries) - function ResetGroupBy () - { - $this->_groupby = ""; - $this->_groupfunc = SPH_GROUPBY_DAY; - $this->_groupsort = "@group desc"; - $this->_groupdistinct= ""; - } - - /// clear all attribute value overrides (for multi-queries) - function ResetOverrides () - { - $this->_overrides = array (); - } - - ////////////////////////////////////////////////////////////////////////////// - - /// connect to searchd server, run given search query through given indexes, - /// and return the search results - function Query ( $query, $index="*", $comment="" ) - { - assert ( empty($this->_reqs) ); - - $this->AddQuery ( $query, $index, $comment ); - $results = $this->RunQueries (); - $this->_reqs = array (); // just in case it failed too early - - if ( !is_array($results) ) - return false; // probably network error; error message should be already filled - - $this->_error = $results[0]["error"]; - $this->_warning = $results[0]["warning"]; - if ( $results[0]["status"]==SEARCHD_ERROR ) - return false; - else - return $results[0]; - } - - /// helper to pack floats in network byte order - function _PackFloat ( $f ) - { - $t1 = pack ( "f", $f ); // machine order - list(,$t2) = unpack ( "L*", $t1 ); // int in machine order - return pack ( "N", $t2 ); - } - - /// add query to multi-query batch - /// returns index into results array from RunQueries() call - function AddQuery ( $query, $index="*", $comment="" ) - { - // mbstring workaround - $this->_MBPush (); - - // build request - $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits - $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby; - $req .= pack ( "N", strlen($query) ) . $query; // query itself - $req .= pack ( "N", count($this->_weights) ); // weights - foreach ( $this->_weights as $weight ) - $req .= pack ( "N", (int)$weight ); - $req .= pack ( "N", strlen($index) ) . $index; // indexes - $req .= pack ( "N", 1 ); // id64 range marker - $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range - - // filters - $req .= pack ( "N", count($this->_filters) ); - foreach ( $this->_filters as $filter ) - { - $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"]; - $req .= pack ( "N", $filter["type"] ); - switch ( $filter["type"] ) - { - case SPH_FILTER_VALUES: - $req .= pack ( "N", count($filter["values"]) ); - foreach ( $filter["values"] as $value ) - $req .= sphPackI64 ( $value ); - break; - - case SPH_FILTER_RANGE: - $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] ); - break; - - case SPH_FILTER_FLOATRANGE: - $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] ); - break; - - default: - assert ( 0 && "internal error: unhandled filter type" ); - } - $req .= pack ( "N", $filter["exclude"] ); - } - - // group-by clause, max-matches count, group-sort clause, cutoff count - $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby; - $req .= pack ( "N", $this->_maxmatches ); - $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort; - $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay ); - $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct; - - // anchor point - if ( empty($this->_anchor) ) - { - $req .= pack ( "N", 0 ); - } else - { - $a =& $this->_anchor; - $req .= pack ( "N", 1 ); - $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"]; - $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"]; - $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] ); - } - - // per-index weights - $req .= pack ( "N", count($this->_indexweights) ); - foreach ( $this->_indexweights as $idx=>$weight ) - $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight ); - - // max query time - $req .= pack ( "N", $this->_maxquerytime ); - - // per-field weights - $req .= pack ( "N", count($this->_fieldweights) ); - foreach ( $this->_fieldweights as $field=>$weight ) - $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight ); - - // comment - $req .= pack ( "N", strlen($comment) ) . $comment; - - // attribute overrides - $req .= pack ( "N", count($this->_overrides) ); - foreach ( $this->_overrides as $key => $entry ) - { - $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"]; - $req .= pack ( "NN", $entry["type"], count($entry["values"]) ); - foreach ( $entry["values"] as $id=>$val ) - { - assert ( is_numeric($id) ); - assert ( is_numeric($val) ); - - $req .= sphPackU64 ( $id ); - switch ( $entry["type"] ) - { - case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break; - case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break; - default: $req .= pack ( "N", $val ); break; - } - } - } - - // select-list - $req .= pack ( "N", strlen($this->_select) ) . $this->_select; - - // mbstring workaround - $this->_MBPop (); - - // store request to requests array - $this->_reqs[] = $req; - return count($this->_reqs)-1; - } - - /// connect to searchd, run queries batch, and return an array of result sets - function RunQueries () - { - if ( empty($this->_reqs) ) - { - $this->_error = "no queries defined, issue AddQuery() first"; - return false; - } - - // mbstring workaround - $this->_MBPush (); - - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop (); - return false; - } - - // send query, get response - $nreqs = count($this->_reqs); - $req = join ( "", $this->_reqs ); - $len = 8+strlen($req); - $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header - - if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || - !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) ) - { - $this->_MBPop (); - return false; - } - - // query sent ok; we can reset reqs now - $this->_reqs = array (); - - // parse and return response - return $this->_ParseSearchResponse ( $response, $nreqs ); - } - - /// parse and return search query (or queries) response - function _ParseSearchResponse ( $response, $nreqs ) - { - $p = 0; // current position - $max = strlen($response); // max position for checks, to protect against broken responses - - $results = array (); - for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ ) - { - $results[] = array(); - $result =& $results[$ires]; - - $result["error"] = ""; - $result["warning"] = ""; - - // extract status - list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $result["status"] = $status; - if ( $status!=SEARCHD_OK ) - { - list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $message = substr ( $response, $p, $len ); $p += $len; - - if ( $status==SEARCHD_WARNING ) - { - $result["warning"] = $message; - } else - { - $result["error"] = $message; - continue; - } - } - - // read schema - $fields = array (); - $attrs = array (); - - list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - while ( $nfields-->0 && $p<$max ) - { - list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $fields[] = substr ( $response, $p, $len ); $p += $len; - } - $result["fields"] = $fields; - - list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - while ( $nattrs-->0 && $p<$max ) - { - list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $attr = substr ( $response, $p, $len ); $p += $len; - list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $attrs[$attr] = $type; - } - $result["attrs"] = $attrs; - - // read match count - list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - - // read matches - $idx = -1; - while ( $count-->0 && $p<$max ) - { - // index into result array - $idx++; - - // parse document id and weight - if ( $id64 ) - { - $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8; - list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - } - else - { - list ( $doc, $weight ) = array_values ( unpack ( "N*N*", - substr ( $response, $p, 8 ) ) ); - $p += 8; - $doc = sphFixUint($doc); - } - $weight = sprintf ( "%u", $weight ); - - // create match entry - if ( $this->_arrayresult ) - $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight ); - else - $result["matches"][$doc]["weight"] = $weight; - - // parse and create attributes - $attrvals = array (); - foreach ( $attrs as $attr=>$type ) - { - // handle 64bit ints - if ( $type==SPH_ATTR_BIGINT ) - { - $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8; - continue; - } - - // handle floats - if ( $type==SPH_ATTR_FLOAT ) - { - list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); - $attrvals[$attr] = $fval; - continue; - } - - // handle everything else as unsigned ints - list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - if ( $type & SPH_ATTR_MULTI ) - { - $attrvals[$attr] = array (); - $nvalues = $val; - while ( $nvalues-->0 && $p<$max ) - { - list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $attrvals[$attr][] = sphFixUint($val); - } - } else if ( $type==SPH_ATTR_STRING ) - { - $attrvals[$attr] = substr ( $response, $p, $val ); - $p += $val; - } else - { - $attrvals[$attr] = sphFixUint($val); - } - } - - if ( $this->_arrayresult ) - $result["matches"][$idx]["attrs"] = $attrvals; - else - $result["matches"][$doc]["attrs"] = $attrvals; - } - - list ( $total, $total_found, $msecs, $words ) = - array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) ); - $result["total"] = sprintf ( "%u", $total ); - $result["total_found"] = sprintf ( "%u", $total_found ); - $result["time"] = sprintf ( "%.3f", $msecs/1000 ); - $p += 16; - - while ( $words-->0 && $p<$max ) - { - list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $word = substr ( $response, $p, $len ); $p += $len; - list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; - $result["words"][$word] = array ( - "docs"=>sprintf ( "%u", $docs ), - "hits"=>sprintf ( "%u", $hits ) ); - } - } - - $this->_MBPop (); - return $results; - } - - ///////////////////////////////////////////////////////////////////////////// - // excerpts generation - ///////////////////////////////////////////////////////////////////////////// - - /// connect to searchd server, and generate exceprts (snippets) - /// of given documents for given query. returns false on failure, - /// an array of snippets on success - function BuildExcerpts ( $docs, $index, $words, $opts=array() ) - { - assert ( is_array($docs) ); - assert ( is_string($index) ); - assert ( is_string($words) ); - assert ( is_array($opts) ); - - $this->_MBPush (); - - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop(); - return false; - } - - ///////////////// - // fixup options - ///////////////// - - if ( !isset($opts["before_match"]) ) $opts["before_match"] = ""; - if ( !isset($opts["after_match"]) ) $opts["after_match"] = ""; - if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... "; - if ( !isset($opts["limit"]) ) $opts["limit"] = 256; - if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0; - if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0; - if ( !isset($opts["around"]) ) $opts["around"] = 5; - if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false; - if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false; - if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false; - if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false; - if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false; - if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false; - if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1; - if ( !isset($opts["load_files"]) ) $opts["load_files"] = false; - if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index"; - if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false; - if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none"; - if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false; - - ///////////////// - // build request - ///////////////// - - // v.1.2 req - $flags = 1; // remove spaces - if ( $opts["exact_phrase"] ) $flags |= 2; - if ( $opts["single_passage"] ) $flags |= 4; - if ( $opts["use_boundaries"] ) $flags |= 8; - if ( $opts["weight_order"] ) $flags |= 16; - if ( $opts["query_mode"] ) $flags |= 32; - if ( $opts["force_all_words"] ) $flags |= 64; - if ( $opts["load_files"] ) $flags |= 128; - if ( $opts["allow_empty"] ) $flags |= 256; - if ( $opts["emit_zones"] ) $flags |= 512; - $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags - $req .= pack ( "N", strlen($index) ) . $index; // req index - $req .= pack ( "N", strlen($words) ) . $words; // req words - - // options - $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"]; - $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"]; - $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"]; - $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] ); - $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2 - $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"]; - $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"]; - - // documents - $req .= pack ( "N", count($docs) ); - foreach ( $docs as $doc ) - { - assert ( is_string($doc) ); - $req .= pack ( "N", strlen($doc) ) . $doc; - } - - //////////////////////////// - // send query, get response - //////////////////////////// - - $len = strlen($req); - $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header - if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || - !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) ) - { - $this->_MBPop (); - return false; - } - - ////////////////// - // parse response - ////////////////// - - $pos = 0; - $res = array (); - $rlen = strlen($response); - for ( $i=0; $i $rlen ) - { - $this->_error = "incomplete reply"; - $this->_MBPop (); - return false; - } - $res[] = $len ? substr ( $response, $pos, $len ) : ""; - $pos += $len; - } - - $this->_MBPop (); - return $res; - } - - - ///////////////////////////////////////////////////////////////////////////// - // keyword generation - ///////////////////////////////////////////////////////////////////////////// - - /// connect to searchd server, and generate keyword list for a given query - /// returns false on failure, - /// an array of words on success - function BuildKeywords ( $query, $index, $hits ) - { - assert ( is_string($query) ); - assert ( is_string($index) ); - assert ( is_bool($hits) ); - - $this->_MBPush (); - - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop(); - return false; - } - - ///////////////// - // build request - ///////////////// - - // v.1.0 req - $req = pack ( "N", strlen($query) ) . $query; // req query - $req .= pack ( "N", strlen($index) ) . $index; // req index - $req .= pack ( "N", (int)$hits ); - - //////////////////////////// - // send query, get response - //////////////////////////// - - $len = strlen($req); - $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header - if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || - !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) ) - { - $this->_MBPop (); - return false; - } - - ////////////////// - // parse response - ////////////////// - - $pos = 0; - $res = array (); - $rlen = strlen($response); - list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) ); - $pos += 4; - for ( $i=0; $i<$nwords; $i++ ) - { - list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; - $tokenized = $len ? substr ( $response, $pos, $len ) : ""; - $pos += $len; - - list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; - $normalized = $len ? substr ( $response, $pos, $len ) : ""; - $pos += $len; - - $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized ); - - if ( $hits ) - { - list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) ); - $pos += 8; - $res [$i]["docs"] = $ndocs; - $res [$i]["hits"] = $nhits; - } - - if ( $pos > $rlen ) - { - $this->_error = "incomplete reply"; - $this->_MBPop (); - return false; - } - } - - $this->_MBPop (); - return $res; - } - - function EscapeString ( $string ) - { - $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' ); - $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' ); - - return str_replace ( $from, $to, $string ); - } - - ///////////////////////////////////////////////////////////////////////////// - // attribute updates - ///////////////////////////////////////////////////////////////////////////// - - /// batch update given attributes in given rows in given indexes - /// returns amount of updated documents (0 or more) on success, or -1 on failure - function UpdateAttributes ( $index, $attrs, $values, $mva=false ) - { - // verify everything - assert ( is_string($index) ); - assert ( is_bool($mva) ); - - assert ( is_array($attrs) ); - foreach ( $attrs as $attr ) - assert ( is_string($attr) ); - - assert ( is_array($values) ); - foreach ( $values as $id=>$entry ) - { - assert ( is_numeric($id) ); - assert ( is_array($entry) ); - assert ( count($entry)==count($attrs) ); - foreach ( $entry as $v ) - { - if ( $mva ) - { - assert ( is_array($v) ); - foreach ( $v as $vv ) - assert ( is_int($vv) ); - } else - assert ( is_int($v) ); - } - } - - // build request - $this->_MBPush (); - $req = pack ( "N", strlen($index) ) . $index; - - $req .= pack ( "N", count($attrs) ); - foreach ( $attrs as $attr ) - { - $req .= pack ( "N", strlen($attr) ) . $attr; - $req .= pack ( "N", $mva ? 1 : 0 ); - } - - $req .= pack ( "N", count($values) ); - foreach ( $values as $id=>$entry ) - { - $req .= sphPackU64 ( $id ); - foreach ( $entry as $v ) - { - $req .= pack ( "N", $mva ? count($v) : $v ); - if ( $mva ) - foreach ( $v as $vv ) - $req .= pack ( "N", $vv ); - } - } - - // connect, send query, get response - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop (); - return -1; - } - - $len = strlen($req); - $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header - if ( !$this->_Send ( $fp, $req, $len+8 ) ) - { - $this->_MBPop (); - return -1; - } - - if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) )) - { - $this->_MBPop (); - return -1; - } - - // parse response - list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) ); - $this->_MBPop (); - return $updated; - } - - ///////////////////////////////////////////////////////////////////////////// - // persistent connections - ///////////////////////////////////////////////////////////////////////////// - - function Open() - { - if ( $this->_socket !== false ) - { - $this->_error = 'already connected'; - return false; - } - if ( !$fp = $this->_Connect() ) - return false; - - // command, command version = 0, body length = 4, body = 1 - $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 ); - if ( !$this->_Send ( $fp, $req, 12 ) ) - return false; - - $this->_socket = $fp; - return true; - } - - function Close() - { - if ( $this->_socket === false ) - { - $this->_error = 'not connected'; - return false; - } - - fclose ( $this->_socket ); - $this->_socket = false; - - return true; - } - - ////////////////////////////////////////////////////////////////////////// - // status - ////////////////////////////////////////////////////////////////////////// - - function Status () - { - $this->_MBPush (); - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop(); - return false; - } - - $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1 - if ( !( $this->_Send ( $fp, $req, 12 ) ) || - !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) ) - { - $this->_MBPop (); - return false; - } - - $res = substr ( $response, 4 ); // just ignore length, error handling, etc - $p = 0; - list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; - - $res = array(); - for ( $i=0; $i<$rows; $i++ ) - for ( $j=0; $j<$cols; $j++ ) - { - list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; - $res[$i][] = substr ( $response, $p, $len ); $p += $len; - } - - $this->_MBPop (); - return $res; - } - - ////////////////////////////////////////////////////////////////////////// - // flush - ////////////////////////////////////////////////////////////////////////// - - function FlushAttributes () - { - $this->_MBPush (); - if (!( $fp = $this->_Connect() )) - { - $this->_MBPop(); - return -1; - } - - $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0 - if ( !( $this->_Send ( $fp, $req, 8 ) ) || - !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) ) - { - $this->_MBPop (); - return -1; - } - - $tag = -1; - if ( strlen($response)==4 ) - list(,$tag) = unpack ( "N*", $response ); - else - $this->_error = "unexpected response length"; - - $this->_MBPop (); - return $tag; - } -} - -// -// $Id: sphinxapi.php 2758 2011-04-04 11:10:44Z kevg $ -// diff --git a/plugins/search_sphinx/init.php b/plugins/search_sphinx/init.php new file mode 100644 index 000000000..f2877e44d --- /dev/null +++ b/plugins/search_sphinx/init.php @@ -0,0 +1,60 @@ +add_hook($host::HOOK_SEARCH, $this); + + require_once __DIR__ . "/sphinxapi.php"; + } + + function hook_search($search) { + $offset = 0; + $limit = 500; + + $sphinxClient = new SphinxClient(); + + $sphinxpair = explode(":", SPHINX_SERVER, 2); + + $sphinxClient->SetServer($sphinxpair[0], (int)$sphinxpair[1]); + $sphinxClient->SetConnectTimeout(1); + + $sphinxClient->SetFieldWeights(array('title' => 70, 'content' => 30, + 'feed_title' => 20)); + + $sphinxClient->SetMatchMode(SPH_MATCH_EXTENDED2); + $sphinxClient->SetRankingMode(SPH_RANK_PROXIMITY_BM25); + $sphinxClient->SetLimits($offset, $limit, 1000); + $sphinxClient->SetArrayResult(false); + $sphinxClient->SetFilter('owner_uid', array($_SESSION['uid'])); + + $result = $sphinxClient->Query($search, SPHINX_INDEX); + + $ids = array(); + + if (is_array($result['matches'])) { + foreach (array_keys($result['matches']) as $int_id) { + $ref_id = $result['matches'][$int_id]['attrs']['ref_id']; + array_push($ids, $ref_id); + } + } + + $ids = join(",", $ids); + + if ($ids) + return array("ref_id IN ($ids)", array()); + else + return array("ref_id = -1", array()); + } + + function api_version() { + return 2; + } +} +?> diff --git a/plugins/search_sphinx/sphinxapi.php b/plugins/search_sphinx/sphinxapi.php new file mode 100644 index 000000000..90643d3c7 --- /dev/null +++ b/plugins/search_sphinx/sphinxapi.php @@ -0,0 +1,1691 @@ +=8 ) + { + $v = (int)$v; + return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); + } + + // x32, int + if ( is_int($v) ) + return pack ( "NN", $v < 0 ? -1 : 0, $v ); + + // x32, bcmath + if ( function_exists("bcmul") ) + { + if ( bccomp ( $v, 0 ) == -1 ) + $v = bcadd ( "18446744073709551616", $v ); + $h = bcdiv ( $v, "4294967296", 0 ); + $l = bcmod ( $v, "4294967296" ); + return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit + } + + // x32, no-bcmath + $p = max(0, strlen($v) - 13); + $lo = abs((float)substr($v, $p)); + $hi = abs((float)substr($v, 0, $p)); + + $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912 + $q = floor($m/4294967296.0); + $l = $m - ($q*4294967296.0); + $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328 + + if ( $v<0 ) + { + if ( $l==0 ) + $h = 4294967296.0 - $h; + else + { + $h = 4294967295.0 - $h; + $l = 4294967296.0 - $l; + } + } + return pack ( "NN", $h, $l ); +} + +/// pack 64-bit unsigned +function sphPackU64 ( $v ) +{ + assert ( is_numeric($v) ); + + // x64 + if ( PHP_INT_SIZE>=8 ) + { + assert ( $v>=0 ); + + // x64, int + if ( is_int($v) ) + return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); + + // x64, bcmath + if ( function_exists("bcmul") ) + { + $h = bcdiv ( $v, 4294967296, 0 ); + $l = bcmod ( $v, 4294967296 ); + return pack ( "NN", $h, $l ); + } + + // x64, no-bcmath + $p = max ( 0, strlen($v) - 13 ); + $lo = (int)substr ( $v, $p ); + $hi = (int)substr ( $v, 0, $p ); + + $m = $lo + $hi*1316134912; + $l = $m % 4294967296; + $h = $hi*2328 + (int)($m/4294967296); + + return pack ( "NN", $h, $l ); + } + + // x32, int + if ( is_int($v) ) + return pack ( "NN", 0, $v ); + + // x32, bcmath + if ( function_exists("bcmul") ) + { + $h = bcdiv ( $v, "4294967296", 0 ); + $l = bcmod ( $v, "4294967296" ); + return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit + } + + // x32, no-bcmath + $p = max(0, strlen($v) - 13); + $lo = (float)substr($v, $p); + $hi = (float)substr($v, 0, $p); + + $m = $lo + $hi*1316134912.0; + $q = floor($m / 4294967296.0); + $l = $m - ($q * 4294967296.0); + $h = $hi*2328.0 + $q; + + return pack ( "NN", $h, $l ); +} + +// unpack 64-bit unsigned +function sphUnpackU64 ( $v ) +{ + list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); + + if ( PHP_INT_SIZE>=8 ) + { + if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again + if ( $lo<0 ) $lo += (1<<32); + + // x64, int + if ( $hi<=2147483647 ) + return ($hi<<32) + $lo; + + // x64, bcmath + if ( function_exists("bcmul") ) + return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); + + // x64, no-bcmath + $C = 100000; + $h = ((int)($hi / $C) << 32) + (int)($lo / $C); + $l = (($hi % $C) << 32) + ($lo % $C); + if ( $l>$C ) + { + $h += (int)($l / $C); + $l = $l % $C; + } + + if ( $h==0 ) + return $l; + return sprintf ( "%d%05d", $h, $l ); + } + + // x32, int + if ( $hi==0 ) + { + if ( $lo>0 ) + return $lo; + return sprintf ( "%u", $lo ); + } + + $hi = sprintf ( "%u", $hi ); + $lo = sprintf ( "%u", $lo ); + + // x32, bcmath + if ( function_exists("bcmul") ) + return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); + + // x32, no-bcmath + $hi = (float)$hi; + $lo = (float)$lo; + + $q = floor($hi/10000000.0); + $r = $hi - $q*10000000.0; + $m = $lo + $r*4967296.0; + $mq = floor($m/10000000.0); + $l = $m - $mq*10000000.0; + $h = $q*4294967296.0 + $r*429.0 + $mq; + + $h = sprintf ( "%.0f", $h ); + $l = sprintf ( "%07.0f", $l ); + if ( $h=="0" ) + return sprintf( "%.0f", (float)$l ); + return $h . $l; +} + +// unpack 64-bit signed +function sphUnpackI64 ( $v ) +{ + list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); + + // x64 + if ( PHP_INT_SIZE>=8 ) + { + if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again + if ( $lo<0 ) $lo += (1<<32); + + return ($hi<<32) + $lo; + } + + // x32, int + if ( $hi==0 ) + { + if ( $lo>0 ) + return $lo; + return sprintf ( "%u", $lo ); + } + // x32, int + elseif ( $hi==-1 ) + { + if ( $lo<0 ) + return $lo; + return sprintf ( "%.0f", $lo - 4294967296.0 ); + } + + $neg = ""; + $c = 0; + if ( $hi<0 ) + { + $hi = ~$hi; + $lo = ~$lo; + $c = 1; + $neg = "-"; + } + + $hi = sprintf ( "%u", $hi ); + $lo = sprintf ( "%u", $lo ); + + // x32, bcmath + if ( function_exists("bcmul") ) + return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c ); + + // x32, no-bcmath + $hi = (float)$hi; + $lo = (float)$lo; + + $q = floor($hi/10000000.0); + $r = $hi - $q*10000000.0; + $m = $lo + $r*4967296.0; + $mq = floor($m/10000000.0); + $l = $m - $mq*10000000.0 + $c; + $h = $q*4294967296.0 + $r*429.0 + $mq; + if ( $l==10000000 ) + { + $l = 0; + $h += 1; + } + + $h = sprintf ( "%.0f", $h ); + $l = sprintf ( "%07.0f", $l ); + if ( $h=="0" ) + return $neg . sprintf( "%.0f", (float)$l ); + return $neg . $h . $l; +} + + +function sphFixUint ( $value ) +{ + if ( PHP_INT_SIZE>=8 ) + { + // x64 route, workaround broken unpack() in 5.2.2+ + if ( $value<0 ) $value += (1<<32); + return $value; + } + else + { + // x32 route, workaround php signed/unsigned braindamage + return sprintf ( "%u", $value ); + } +} + + +/// sphinx searchd client class +class SphinxClient +{ + var $_host; ///< searchd host (default is "localhost") + var $_port; ///< searchd port (default is 9312) + var $_offset; ///< how many records to seek from result-set start (default is 0) + var $_limit; ///< how many records to return from result-set starting at offset (default is 20) + var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) + var $_weights; ///< per-field weights (default is 1 for all fields) + var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) + var $_sortby; ///< attribute to sort by (defualt is "") + var $_min_id; ///< min ID to match (default is 0, which means no limit) + var $_max_id; ///< max ID to match (default is 0, which means no limit) + var $_filters; ///< search filters + var $_groupby; ///< group-by attribute name + var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) + var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) + var $_groupdistinct;///< group-by count-distinct attribute + var $_maxmatches; ///< max matches to retrieve + var $_cutoff; ///< cutoff to stop searching at (default is 0) + var $_retrycount; ///< distributed retries count + var $_retrydelay; ///< distributed retries delay + var $_anchor; ///< geographical anchor point + var $_indexweights; ///< per-index weights + var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) + var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) + var $_fieldweights; ///< per-field-name weights + var $_overrides; ///< per-query attribute values overrides + var $_select; ///< select-list (attributes or expressions, with optional aliases) + + var $_error; ///< last error message + var $_warning; ///< last warning message + var $_connerror; ///< connection error vs remote error flag + + var $_reqs; ///< requests array for multi-query + var $_mbenc; ///< stored mbstring encoding + var $_arrayresult; ///< whether $result["matches"] should be a hash or an array + var $_timeout; ///< connect timeout + + ///////////////////////////////////////////////////////////////////////////// + // common stuff + ///////////////////////////////////////////////////////////////////////////// + + /// create a new client object and fill defaults + function SphinxClient () + { + // per-client-object settings + $this->_host = "localhost"; + $this->_port = 9312; + $this->_path = false; + $this->_socket = false; + + // per-query settings + $this->_offset = 0; + $this->_limit = 20; + $this->_mode = SPH_MATCH_ALL; + $this->_weights = array (); + $this->_sort = SPH_SORT_RELEVANCE; + $this->_sortby = ""; + $this->_min_id = 0; + $this->_max_id = 0; + $this->_filters = array (); + $this->_groupby = ""; + $this->_groupfunc = SPH_GROUPBY_DAY; + $this->_groupsort = "@group desc"; + $this->_groupdistinct= ""; + $this->_maxmatches = 1000; + $this->_cutoff = 0; + $this->_retrycount = 0; + $this->_retrydelay = 0; + $this->_anchor = array (); + $this->_indexweights= array (); + $this->_ranker = SPH_RANK_PROXIMITY_BM25; + $this->_maxquerytime= 0; + $this->_fieldweights= array(); + $this->_overrides = array(); + $this->_select = "*"; + + $this->_error = ""; // per-reply fields (for single-query case) + $this->_warning = ""; + $this->_connerror = false; + + $this->_reqs = array (); // requests storage (for multi-query case) + $this->_mbenc = ""; + $this->_arrayresult = false; + $this->_timeout = 0; + } + + function __destruct() + { + if ( $this->_socket !== false ) + fclose ( $this->_socket ); + } + + /// get last error message (string) + function GetLastError () + { + return $this->_error; + } + + /// get last warning message (string) + function GetLastWarning () + { + return $this->_warning; + } + + /// get last error flag (to tell network connection errors from searchd errors or broken responses) + function IsConnectError() + { + return $this->_connerror; + } + + /// set searchd host name (string) and port (integer) + function SetServer ( $host, $port = 0 ) + { + assert ( is_string($host) ); + if ( $host[0] == '/') + { + $this->_path = 'unix://' . $host; + return; + } + if ( substr ( $host, 0, 7 )=="unix://" ) + { + $this->_path = $host; + return; + } + + assert ( is_int($port) ); + $this->_host = $host; + $this->_port = $port; + $this->_path = ''; + + } + + /// set server connection timeout (0 to remove) + function SetConnectTimeout ( $timeout ) + { + assert ( is_numeric($timeout) ); + $this->_timeout = $timeout; + } + + + function _Send ( $handle, $data, $length ) + { + if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length ) + { + $this->_error = 'connection unexpectedly closed (timed out?)'; + $this->_connerror = true; + return false; + } + return true; + } + + ///////////////////////////////////////////////////////////////////////////// + + /// enter mbstring workaround mode + function _MBPush () + { + $this->_mbenc = ""; + if ( ini_get ( "mbstring.func_overload" ) & 2 ) + { + $this->_mbenc = mb_internal_encoding(); + mb_internal_encoding ( "latin1" ); + } + } + + /// leave mbstring workaround mode + function _MBPop () + { + if ( $this->_mbenc ) + mb_internal_encoding ( $this->_mbenc ); + } + + /// connect to searchd server + function _Connect () + { + if ( $this->_socket!==false ) + { + // we are in persistent connection mode, so we have a socket + // however, need to check whether it's still alive + if ( !@feof ( $this->_socket ) ) + return $this->_socket; + + // force reopen + $this->_socket = false; + } + + $errno = 0; + $errstr = ""; + $this->_connerror = false; + + if ( $this->_path ) + { + $host = $this->_path; + $port = 0; + } + else + { + $host = $this->_host; + $port = $this->_port; + } + + if ( $this->_timeout<=0 ) + $fp = @fsockopen ( $host, $port, $errno, $errstr ); + else + $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout ); + + if ( !$fp ) + { + if ( $this->_path ) + $location = $this->_path; + else + $location = "{$this->_host}:{$this->_port}"; + + $errstr = trim ( $errstr ); + $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)"; + $this->_connerror = true; + return false; + } + + // send my version + // this is a subtle part. we must do it before (!) reading back from searchd. + // because otherwise under some conditions (reported on FreeBSD for instance) + // TCP stack could throttle write-write-read pattern because of Nagle. + if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) ) + { + fclose ( $fp ); + $this->_error = "failed to send client protocol version"; + return false; + } + + // check version + list(,$v) = unpack ( "N*", fread ( $fp, 4 ) ); + $v = (int)$v; + if ( $v<1 ) + { + fclose ( $fp ); + $this->_error = "expected searchd protocol version 1+, got version '$v'"; + return false; + } + + return $fp; + } + + /// get and check response packet from searchd server + function _GetResponse ( $fp, $client_ver ) + { + $response = ""; + $len = 0; + + $header = fread ( $fp, 8 ); + if ( strlen($header)==8 ) + { + list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) ); + $left = $len; + while ( $left>0 && !feof($fp) ) + { + $chunk = fread ( $fp, min ( 8192, $left ) ); + if ( $chunk ) + { + $response .= $chunk; + $left -= strlen($chunk); + } + } + } + if ( $this->_socket === false ) + fclose ( $fp ); + + // check response + $read = strlen ( $response ); + if ( !$response || $read!=$len ) + { + $this->_error = $len + ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" + : "received zero-sized searchd response"; + return false; + } + + // check status + if ( $status==SEARCHD_WARNING ) + { + list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) ); + $this->_warning = substr ( $response, 4, $wlen ); + return substr ( $response, 4+$wlen ); + } + if ( $status==SEARCHD_ERROR ) + { + $this->_error = "searchd error: " . substr ( $response, 4 ); + return false; + } + if ( $status==SEARCHD_RETRY ) + { + $this->_error = "temporary searchd error: " . substr ( $response, 4 ); + return false; + } + if ( $status!=SEARCHD_OK ) + { + $this->_error = "unknown status code '$status'"; + return false; + } + + // check version + if ( $ver<$client_ver ) + { + $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", + $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ); + } + + return $response; + } + + ///////////////////////////////////////////////////////////////////////////// + // searching + ///////////////////////////////////////////////////////////////////////////// + + /// set offset and count into result set, + /// and optionally set max-matches and cutoff limits + function SetLimits ( $offset, $limit, $max=0, $cutoff=0 ) + { + assert ( is_int($offset) ); + assert ( is_int($limit) ); + assert ( $offset>=0 ); + assert ( $limit>0 ); + assert ( $max>=0 ); + $this->_offset = $offset; + $this->_limit = $limit; + if ( $max>0 ) + $this->_maxmatches = $max; + if ( $cutoff>0 ) + $this->_cutoff = $cutoff; + } + + /// set maximum query time, in milliseconds, per-index + /// integer, 0 means "do not limit" + function SetMaxQueryTime ( $max ) + { + assert ( is_int($max) ); + assert ( $max>=0 ); + $this->_maxquerytime = $max; + } + + /// set matching mode + function SetMatchMode ( $mode ) + { + assert ( $mode==SPH_MATCH_ALL + || $mode==SPH_MATCH_ANY + || $mode==SPH_MATCH_PHRASE + || $mode==SPH_MATCH_BOOLEAN + || $mode==SPH_MATCH_EXTENDED + || $mode==SPH_MATCH_FULLSCAN + || $mode==SPH_MATCH_EXTENDED2 ); + $this->_mode = $mode; + } + + /// set ranking mode + function SetRankingMode ( $ranker ) + { + assert ( $ranker>=0 && $ranker_ranker = $ranker; + } + + /// set matches sorting mode + function SetSortMode ( $mode, $sortby="" ) + { + assert ( + $mode==SPH_SORT_RELEVANCE || + $mode==SPH_SORT_ATTR_DESC || + $mode==SPH_SORT_ATTR_ASC || + $mode==SPH_SORT_TIME_SEGMENTS || + $mode==SPH_SORT_EXTENDED || + $mode==SPH_SORT_EXPR ); + assert ( is_string($sortby) ); + assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 ); + + $this->_sort = $mode; + $this->_sortby = $sortby; + } + + /// bind per-field weights by order + /// DEPRECATED; use SetFieldWeights() instead + function SetWeights ( $weights ) + { + assert ( is_array($weights) ); + foreach ( $weights as $weight ) + assert ( is_int($weight) ); + + $this->_weights = $weights; + } + + /// bind per-field weights by name + function SetFieldWeights ( $weights ) + { + assert ( is_array($weights) ); + foreach ( $weights as $name=>$weight ) + { + assert ( is_string($name) ); + assert ( is_int($weight) ); + } + $this->_fieldweights = $weights; + } + + /// bind per-index weights by name + function SetIndexWeights ( $weights ) + { + assert ( is_array($weights) ); + foreach ( $weights as $index=>$weight ) + { + assert ( is_string($index) ); + assert ( is_int($weight) ); + } + $this->_indexweights = $weights; + } + + /// set IDs range to match + /// only match records if document ID is beetwen $min and $max (inclusive) + function SetIDRange ( $min, $max ) + { + assert ( is_numeric($min) ); + assert ( is_numeric($max) ); + assert ( $min<=$max ); + $this->_min_id = $min; + $this->_max_id = $max; + } + + /// set values set filter + /// only match records where $attribute value is in given set + function SetFilter ( $attribute, $values, $exclude=false ) + { + assert ( is_string($attribute) ); + assert ( is_array($values) ); + assert ( count($values) ); + + if ( is_array($values) && count($values) ) + { + foreach ( $values as $value ) + assert ( is_numeric($value) ); + + $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values ); + } + } + + /// set range filter + /// only match records if $attribute value is beetwen $min and $max (inclusive) + function SetFilterRange ( $attribute, $min, $max, $exclude=false ) + { + assert ( is_string($attribute) ); + assert ( is_numeric($min) ); + assert ( is_numeric($max) ); + assert ( $min<=$max ); + + $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); + } + + /// set float range filter + /// only match records if $attribute value is beetwen $min and $max (inclusive) + function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false ) + { + assert ( is_string($attribute) ); + assert ( is_float($min) ); + assert ( is_float($max) ); + assert ( $min<=$max ); + + $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); + } + + /// setup anchor point for geosphere distance calculations + /// required to use @geodist in filters and sorting + /// latitude and longitude must be in radians + function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long ) + { + assert ( is_string($attrlat) ); + assert ( is_string($attrlong) ); + assert ( is_float($lat) ); + assert ( is_float($long) ); + + $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long ); + } + + /// set grouping attribute and function + function SetGroupBy ( $attribute, $func, $groupsort="@group desc" ) + { + assert ( is_string($attribute) ); + assert ( is_string($groupsort) ); + assert ( $func==SPH_GROUPBY_DAY + || $func==SPH_GROUPBY_WEEK + || $func==SPH_GROUPBY_MONTH + || $func==SPH_GROUPBY_YEAR + || $func==SPH_GROUPBY_ATTR + || $func==SPH_GROUPBY_ATTRPAIR ); + + $this->_groupby = $attribute; + $this->_groupfunc = $func; + $this->_groupsort = $groupsort; + } + + /// set count-distinct attribute for group-by queries + function SetGroupDistinct ( $attribute ) + { + assert ( is_string($attribute) ); + $this->_groupdistinct = $attribute; + } + + /// set distributed retries count and delay + function SetRetries ( $count, $delay=0 ) + { + assert ( is_int($count) && $count>=0 ); + assert ( is_int($delay) && $delay>=0 ); + $this->_retrycount = $count; + $this->_retrydelay = $delay; + } + + /// set result set format (hash or array; hash by default) + /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs + function SetArrayResult ( $arrayresult ) + { + assert ( is_bool($arrayresult) ); + $this->_arrayresult = $arrayresult; + } + + /// set attribute values override + /// there can be only one override per attribute + /// $values must be a hash that maps document IDs to attribute values + function SetOverride ( $attrname, $attrtype, $values ) + { + assert ( is_string ( $attrname ) ); + assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) ); + assert ( is_array ( $values ) ); + + $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values ); + } + + /// set select-list (attributes or expressions), SQL-like syntax + function SetSelect ( $select ) + { + assert ( is_string ( $select ) ); + $this->_select = $select; + } + + ////////////////////////////////////////////////////////////////////////////// + + /// clear all filters (for multi-queries) + function ResetFilters () + { + $this->_filters = array(); + $this->_anchor = array(); + } + + /// clear groupby settings (for multi-queries) + function ResetGroupBy () + { + $this->_groupby = ""; + $this->_groupfunc = SPH_GROUPBY_DAY; + $this->_groupsort = "@group desc"; + $this->_groupdistinct= ""; + } + + /// clear all attribute value overrides (for multi-queries) + function ResetOverrides () + { + $this->_overrides = array (); + } + + ////////////////////////////////////////////////////////////////////////////// + + /// connect to searchd server, run given search query through given indexes, + /// and return the search results + function Query ( $query, $index="*", $comment="" ) + { + assert ( empty($this->_reqs) ); + + $this->AddQuery ( $query, $index, $comment ); + $results = $this->RunQueries (); + $this->_reqs = array (); // just in case it failed too early + + if ( !is_array($results) ) + return false; // probably network error; error message should be already filled + + $this->_error = $results[0]["error"]; + $this->_warning = $results[0]["warning"]; + if ( $results[0]["status"]==SEARCHD_ERROR ) + return false; + else + return $results[0]; + } + + /// helper to pack floats in network byte order + function _PackFloat ( $f ) + { + $t1 = pack ( "f", $f ); // machine order + list(,$t2) = unpack ( "L*", $t1 ); // int in machine order + return pack ( "N", $t2 ); + } + + /// add query to multi-query batch + /// returns index into results array from RunQueries() call + function AddQuery ( $query, $index="*", $comment="" ) + { + // mbstring workaround + $this->_MBPush (); + + // build request + $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits + $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby; + $req .= pack ( "N", strlen($query) ) . $query; // query itself + $req .= pack ( "N", count($this->_weights) ); // weights + foreach ( $this->_weights as $weight ) + $req .= pack ( "N", (int)$weight ); + $req .= pack ( "N", strlen($index) ) . $index; // indexes + $req .= pack ( "N", 1 ); // id64 range marker + $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range + + // filters + $req .= pack ( "N", count($this->_filters) ); + foreach ( $this->_filters as $filter ) + { + $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"]; + $req .= pack ( "N", $filter["type"] ); + switch ( $filter["type"] ) + { + case SPH_FILTER_VALUES: + $req .= pack ( "N", count($filter["values"]) ); + foreach ( $filter["values"] as $value ) + $req .= sphPackI64 ( $value ); + break; + + case SPH_FILTER_RANGE: + $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] ); + break; + + case SPH_FILTER_FLOATRANGE: + $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] ); + break; + + default: + assert ( 0 && "internal error: unhandled filter type" ); + } + $req .= pack ( "N", $filter["exclude"] ); + } + + // group-by clause, max-matches count, group-sort clause, cutoff count + $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby; + $req .= pack ( "N", $this->_maxmatches ); + $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort; + $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay ); + $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct; + + // anchor point + if ( empty($this->_anchor) ) + { + $req .= pack ( "N", 0 ); + } else + { + $a =& $this->_anchor; + $req .= pack ( "N", 1 ); + $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"]; + $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"]; + $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] ); + } + + // per-index weights + $req .= pack ( "N", count($this->_indexweights) ); + foreach ( $this->_indexweights as $idx=>$weight ) + $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight ); + + // max query time + $req .= pack ( "N", $this->_maxquerytime ); + + // per-field weights + $req .= pack ( "N", count($this->_fieldweights) ); + foreach ( $this->_fieldweights as $field=>$weight ) + $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight ); + + // comment + $req .= pack ( "N", strlen($comment) ) . $comment; + + // attribute overrides + $req .= pack ( "N", count($this->_overrides) ); + foreach ( $this->_overrides as $key => $entry ) + { + $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"]; + $req .= pack ( "NN", $entry["type"], count($entry["values"]) ); + foreach ( $entry["values"] as $id=>$val ) + { + assert ( is_numeric($id) ); + assert ( is_numeric($val) ); + + $req .= sphPackU64 ( $id ); + switch ( $entry["type"] ) + { + case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break; + case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break; + default: $req .= pack ( "N", $val ); break; + } + } + } + + // select-list + $req .= pack ( "N", strlen($this->_select) ) . $this->_select; + + // mbstring workaround + $this->_MBPop (); + + // store request to requests array + $this->_reqs[] = $req; + return count($this->_reqs)-1; + } + + /// connect to searchd, run queries batch, and return an array of result sets + function RunQueries () + { + if ( empty($this->_reqs) ) + { + $this->_error = "no queries defined, issue AddQuery() first"; + return false; + } + + // mbstring workaround + $this->_MBPush (); + + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop (); + return false; + } + + // send query, get response + $nreqs = count($this->_reqs); + $req = join ( "", $this->_reqs ); + $len = 8+strlen($req); + $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header + + if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) ) + { + $this->_MBPop (); + return false; + } + + // query sent ok; we can reset reqs now + $this->_reqs = array (); + + // parse and return response + return $this->_ParseSearchResponse ( $response, $nreqs ); + } + + /// parse and return search query (or queries) response + function _ParseSearchResponse ( $response, $nreqs ) + { + $p = 0; // current position + $max = strlen($response); // max position for checks, to protect against broken responses + + $results = array (); + for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ ) + { + $results[] = array(); + $result =& $results[$ires]; + + $result["error"] = ""; + $result["warning"] = ""; + + // extract status + list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $result["status"] = $status; + if ( $status!=SEARCHD_OK ) + { + list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $message = substr ( $response, $p, $len ); $p += $len; + + if ( $status==SEARCHD_WARNING ) + { + $result["warning"] = $message; + } else + { + $result["error"] = $message; + continue; + } + } + + // read schema + $fields = array (); + $attrs = array (); + + list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + while ( $nfields-->0 && $p<$max ) + { + list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $fields[] = substr ( $response, $p, $len ); $p += $len; + } + $result["fields"] = $fields; + + list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + while ( $nattrs-->0 && $p<$max ) + { + list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $attr = substr ( $response, $p, $len ); $p += $len; + list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $attrs[$attr] = $type; + } + $result["attrs"] = $attrs; + + // read match count + list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + + // read matches + $idx = -1; + while ( $count-->0 && $p<$max ) + { + // index into result array + $idx++; + + // parse document id and weight + if ( $id64 ) + { + $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8; + list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + } + else + { + list ( $doc, $weight ) = array_values ( unpack ( "N*N*", + substr ( $response, $p, 8 ) ) ); + $p += 8; + $doc = sphFixUint($doc); + } + $weight = sprintf ( "%u", $weight ); + + // create match entry + if ( $this->_arrayresult ) + $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight ); + else + $result["matches"][$doc]["weight"] = $weight; + + // parse and create attributes + $attrvals = array (); + foreach ( $attrs as $attr=>$type ) + { + // handle 64bit ints + if ( $type==SPH_ATTR_BIGINT ) + { + $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8; + continue; + } + + // handle floats + if ( $type==SPH_ATTR_FLOAT ) + { + list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); + $attrvals[$attr] = $fval; + continue; + } + + // handle everything else as unsigned ints + list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + if ( $type & SPH_ATTR_MULTI ) + { + $attrvals[$attr] = array (); + $nvalues = $val; + while ( $nvalues-->0 && $p<$max ) + { + list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $attrvals[$attr][] = sphFixUint($val); + } + } else if ( $type==SPH_ATTR_STRING ) + { + $attrvals[$attr] = substr ( $response, $p, $val ); + $p += $val; + } else + { + $attrvals[$attr] = sphFixUint($val); + } + } + + if ( $this->_arrayresult ) + $result["matches"][$idx]["attrs"] = $attrvals; + else + $result["matches"][$doc]["attrs"] = $attrvals; + } + + list ( $total, $total_found, $msecs, $words ) = + array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) ); + $result["total"] = sprintf ( "%u", $total ); + $result["total_found"] = sprintf ( "%u", $total_found ); + $result["time"] = sprintf ( "%.3f", $msecs/1000 ); + $p += 16; + + while ( $words-->0 && $p<$max ) + { + list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $word = substr ( $response, $p, $len ); $p += $len; + list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; + $result["words"][$word] = array ( + "docs"=>sprintf ( "%u", $docs ), + "hits"=>sprintf ( "%u", $hits ) ); + } + } + + $this->_MBPop (); + return $results; + } + + ///////////////////////////////////////////////////////////////////////////// + // excerpts generation + ///////////////////////////////////////////////////////////////////////////// + + /// connect to searchd server, and generate exceprts (snippets) + /// of given documents for given query. returns false on failure, + /// an array of snippets on success + function BuildExcerpts ( $docs, $index, $words, $opts=array() ) + { + assert ( is_array($docs) ); + assert ( is_string($index) ); + assert ( is_string($words) ); + assert ( is_array($opts) ); + + $this->_MBPush (); + + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop(); + return false; + } + + ///////////////// + // fixup options + ///////////////// + + if ( !isset($opts["before_match"]) ) $opts["before_match"] = ""; + if ( !isset($opts["after_match"]) ) $opts["after_match"] = ""; + if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... "; + if ( !isset($opts["limit"]) ) $opts["limit"] = 256; + if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0; + if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0; + if ( !isset($opts["around"]) ) $opts["around"] = 5; + if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false; + if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false; + if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false; + if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false; + if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false; + if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false; + if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1; + if ( !isset($opts["load_files"]) ) $opts["load_files"] = false; + if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index"; + if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false; + if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none"; + if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false; + + ///////////////// + // build request + ///////////////// + + // v.1.2 req + $flags = 1; // remove spaces + if ( $opts["exact_phrase"] ) $flags |= 2; + if ( $opts["single_passage"] ) $flags |= 4; + if ( $opts["use_boundaries"] ) $flags |= 8; + if ( $opts["weight_order"] ) $flags |= 16; + if ( $opts["query_mode"] ) $flags |= 32; + if ( $opts["force_all_words"] ) $flags |= 64; + if ( $opts["load_files"] ) $flags |= 128; + if ( $opts["allow_empty"] ) $flags |= 256; + if ( $opts["emit_zones"] ) $flags |= 512; + $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags + $req .= pack ( "N", strlen($index) ) . $index; // req index + $req .= pack ( "N", strlen($words) ) . $words; // req words + + // options + $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"]; + $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"]; + $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"]; + $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] ); + $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2 + $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"]; + $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"]; + + // documents + $req .= pack ( "N", count($docs) ); + foreach ( $docs as $doc ) + { + assert ( is_string($doc) ); + $req .= pack ( "N", strlen($doc) ) . $doc; + } + + //////////////////////////// + // send query, get response + //////////////////////////// + + $len = strlen($req); + $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header + if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) ) + { + $this->_MBPop (); + return false; + } + + ////////////////// + // parse response + ////////////////// + + $pos = 0; + $res = array (); + $rlen = strlen($response); + for ( $i=0; $i $rlen ) + { + $this->_error = "incomplete reply"; + $this->_MBPop (); + return false; + } + $res[] = $len ? substr ( $response, $pos, $len ) : ""; + $pos += $len; + } + + $this->_MBPop (); + return $res; + } + + + ///////////////////////////////////////////////////////////////////////////// + // keyword generation + ///////////////////////////////////////////////////////////////////////////// + + /// connect to searchd server, and generate keyword list for a given query + /// returns false on failure, + /// an array of words on success + function BuildKeywords ( $query, $index, $hits ) + { + assert ( is_string($query) ); + assert ( is_string($index) ); + assert ( is_bool($hits) ); + + $this->_MBPush (); + + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop(); + return false; + } + + ///////////////// + // build request + ///////////////// + + // v.1.0 req + $req = pack ( "N", strlen($query) ) . $query; // req query + $req .= pack ( "N", strlen($index) ) . $index; // req index + $req .= pack ( "N", (int)$hits ); + + //////////////////////////// + // send query, get response + //////////////////////////// + + $len = strlen($req); + $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header + if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) ) + { + $this->_MBPop (); + return false; + } + + ////////////////// + // parse response + ////////////////// + + $pos = 0; + $res = array (); + $rlen = strlen($response); + list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) ); + $pos += 4; + for ( $i=0; $i<$nwords; $i++ ) + { + list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; + $tokenized = $len ? substr ( $response, $pos, $len ) : ""; + $pos += $len; + + list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; + $normalized = $len ? substr ( $response, $pos, $len ) : ""; + $pos += $len; + + $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized ); + + if ( $hits ) + { + list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) ); + $pos += 8; + $res [$i]["docs"] = $ndocs; + $res [$i]["hits"] = $nhits; + } + + if ( $pos > $rlen ) + { + $this->_error = "incomplete reply"; + $this->_MBPop (); + return false; + } + } + + $this->_MBPop (); + return $res; + } + + function EscapeString ( $string ) + { + $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' ); + $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' ); + + return str_replace ( $from, $to, $string ); + } + + ///////////////////////////////////////////////////////////////////////////// + // attribute updates + ///////////////////////////////////////////////////////////////////////////// + + /// batch update given attributes in given rows in given indexes + /// returns amount of updated documents (0 or more) on success, or -1 on failure + function UpdateAttributes ( $index, $attrs, $values, $mva=false ) + { + // verify everything + assert ( is_string($index) ); + assert ( is_bool($mva) ); + + assert ( is_array($attrs) ); + foreach ( $attrs as $attr ) + assert ( is_string($attr) ); + + assert ( is_array($values) ); + foreach ( $values as $id=>$entry ) + { + assert ( is_numeric($id) ); + assert ( is_array($entry) ); + assert ( count($entry)==count($attrs) ); + foreach ( $entry as $v ) + { + if ( $mva ) + { + assert ( is_array($v) ); + foreach ( $v as $vv ) + assert ( is_int($vv) ); + } else + assert ( is_int($v) ); + } + } + + // build request + $this->_MBPush (); + $req = pack ( "N", strlen($index) ) . $index; + + $req .= pack ( "N", count($attrs) ); + foreach ( $attrs as $attr ) + { + $req .= pack ( "N", strlen($attr) ) . $attr; + $req .= pack ( "N", $mva ? 1 : 0 ); + } + + $req .= pack ( "N", count($values) ); + foreach ( $values as $id=>$entry ) + { + $req .= sphPackU64 ( $id ); + foreach ( $entry as $v ) + { + $req .= pack ( "N", $mva ? count($v) : $v ); + if ( $mva ) + foreach ( $v as $vv ) + $req .= pack ( "N", $vv ); + } + } + + // connect, send query, get response + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop (); + return -1; + } + + $len = strlen($req); + $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header + if ( !$this->_Send ( $fp, $req, $len+8 ) ) + { + $this->_MBPop (); + return -1; + } + + if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) )) + { + $this->_MBPop (); + return -1; + } + + // parse response + list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) ); + $this->_MBPop (); + return $updated; + } + + ///////////////////////////////////////////////////////////////////////////// + // persistent connections + ///////////////////////////////////////////////////////////////////////////// + + function Open() + { + if ( $this->_socket !== false ) + { + $this->_error = 'already connected'; + return false; + } + if ( !$fp = $this->_Connect() ) + return false; + + // command, command version = 0, body length = 4, body = 1 + $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 ); + if ( !$this->_Send ( $fp, $req, 12 ) ) + return false; + + $this->_socket = $fp; + return true; + } + + function Close() + { + if ( $this->_socket === false ) + { + $this->_error = 'not connected'; + return false; + } + + fclose ( $this->_socket ); + $this->_socket = false; + + return true; + } + + ////////////////////////////////////////////////////////////////////////// + // status + ////////////////////////////////////////////////////////////////////////// + + function Status () + { + $this->_MBPush (); + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop(); + return false; + } + + $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1 + if ( !( $this->_Send ( $fp, $req, 12 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) ) + { + $this->_MBPop (); + return false; + } + + $res = substr ( $response, 4 ); // just ignore length, error handling, etc + $p = 0; + list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; + + $res = array(); + for ( $i=0; $i<$rows; $i++ ) + for ( $j=0; $j<$cols; $j++ ) + { + list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; + $res[$i][] = substr ( $response, $p, $len ); $p += $len; + } + + $this->_MBPop (); + return $res; + } + + ////////////////////////////////////////////////////////////////////////// + // flush + ////////////////////////////////////////////////////////////////////////// + + function FlushAttributes () + { + $this->_MBPush (); + if (!( $fp = $this->_Connect() )) + { + $this->_MBPop(); + return -1; + } + + $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0 + if ( !( $this->_Send ( $fp, $req, 8 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) ) + { + $this->_MBPop (); + return -1; + } + + $tag = -1; + if ( strlen($response)==4 ) + list(,$tag) = unpack ( "N*", $response ); + else + $this->_error = "unexpected response length"; + + $this->_MBPop (); + return $tag; + } +} + +// +// $Id: sphinxapi.php 2758 2011-04-04 11:10:44Z kevg $ +// -- cgit v1.2.3-54-g00ecf From 2bb11658a8b032dc345b402de25265befa54608e Mon Sep 17 00:00:00 2001 From: Dave Zaikos Date: Mon, 12 May 2014 00:59:27 -0400 Subject: Added HOOK_FORMAT_ENCLOSURES plugin hook. Runs HTML and enclosures array through a plugin hook when rendering an article's enclosures in format_article_enclosures(). Allows plugins to override handling of how enclosures are presented by either filtering the array of enclosures, or generating the HTML to add to the article content. --- classes/pluginhost.php | 1 + include/functions2.php | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 4582c314a..bd6afa81d 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -40,6 +40,7 @@ class PluginHost { const HOOK_QUERY_HEADLINES = 23; const HOOK_HOUSE_KEEPING = 24; const HOOK_SEARCH = 25; + const HOOK_FORMAT_ENCLOSURES = 26; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/functions2.php b/include/functions2.php index 959e7154d..cf1fba17c 100644 --- a/include/functions2.php +++ b/include/functions2.php @@ -1846,8 +1846,17 @@ $result = get_article_enclosures($id); $rv = ''; - if (count($result) > 0) { + foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_FORMAT_ENCLOSURES) as $plugin) { + $retval = $plugin->hook_format_enclosures($rv, $result, $id, $always_display_enclosures, $article_content, $hide_images); + if (is_array($retval)) { + $rv = $retval[0]; + $result = $retval[1]; + } else { + $rv = $retval; + } + } + if ($rv === '' && is_array($result)) { $entries_html = array(); $entries = array(); $entries_inline = array(); -- cgit v1.2.3-54-g00ecf From 01465325b48270e5eee5db94c6178ba6d567ddf7 Mon Sep 17 00:00:00 2001 From: wltb Date: Sat, 17 May 2014 17:51:35 +0200 Subject: Add subscribe hook, give more information to fetch_feed hook --- classes/pluginhost.php | 1 + include/functions.php | 4 ++++ include/rssfuncs.php | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'classes/pluginhost.php') diff --git a/classes/pluginhost.php b/classes/pluginhost.php index bd6afa81d..744337e0f 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -41,6 +41,7 @@ class PluginHost { const HOOK_HOUSE_KEEPING = 24; const HOOK_SEARCH = 25; const HOOK_FORMAT_ENCLOSURES = 26; + const HOOK_SUBSCRIBE_FEED = 27; const KIND_ALL = 1; const KIND_SYSTEM = 2; diff --git a/include/functions.php b/include/functions.php index 01d8d47f8..2e76f157b 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1687,6 +1687,10 @@ return array("code" => 5, "message" => $fetch_last_error); } + foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_SUBSCRIBE_FEED) as $plugin) { + $contents = $plugin->hook_subscribe_feed($contents, $url, $auth_login, $auth_pass); + } + if (is_html($contents)) { $feedUrls = get_feeds_from_html($url, $contents); diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 15f1df0cc..010071e57 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -268,7 +268,7 @@ $force_refetch = isset($_REQUEST["force_refetch"]); foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) { - $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed); + $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, $last_article_timestamp, $auth_login, $auth_pass); } // try cache -- cgit v1.2.3-54-g00ecf From 7eb87b80d533d0bdd538938b69064352a8587204 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 19 Aug 2014 14:24:34 +0400 Subject: add pluginhost HOOK_HEADLINES_BEFORE (refs #814) --- classes/feeds.php | 6 ++++++ classes/pluginhost.php | 1 + 2 files changed, 7 insertions(+) (limited to 'classes/pluginhost.php') diff --git a/classes/feeds.php b/classes/feeds.php index 317a90afc..5ec109614 100644 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -281,6 +281,12 @@ class Feeds extends Handler_Protected { } } */ + if ($offset == 0) { + foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_HEADLINES_BEFORE) as $p) { + $reply['content'] .= $p->hook_headlines_before($feed, $cat_view, $qfh_ret); + } + } + if ($this->dbh->num_rows($result) > 0) { $lnum = $offset; diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 744337e0f..1ad7afd60 100644 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -42,6 +42,7 @@ class PluginHost { const HOOK_SEARCH = 25; const HOOK_FORMAT_ENCLOSURES = 26; const HOOK_SUBSCRIBE_FEED = 27; + const HOOK_HEADLINES_BEFORE = 28; const KIND_ALL = 1; const KIND_SYSTEM = 2; -- cgit v1.2.3-54-g00ecf