Removed BloomFilter classes
* This ends up being more complex than its worth and even more so for multi-DC support Bug: T93006 Change-Id: Iaa774fe69061e42955b11dc82d30dba93208e606
This commit is contained in:
parent
54dff6a5e5
commit
4bdbfe6c1b
9 changed files with 3 additions and 901 deletions
|
|
@ -422,6 +422,7 @@ changes to languages because of Bugzilla reports.
|
|||
thisisdeleted, viewdeleted, editlink, retrievedfrom, version-poweredby-others,
|
||||
retrievedfrom, thisisdeleted, viewsourcelink, lastmodifiedat, laggedslavemode,
|
||||
protect-summary-cascade
|
||||
* All BloomCache related code has been removed. This was largely experimental.
|
||||
|
||||
== Compatibility ==
|
||||
|
||||
|
|
|
|||
|
|
@ -170,9 +170,6 @@ $wgAutoloadLocalClasses = array(
|
|||
'Block' => __DIR__ . '/includes/Block.php',
|
||||
'BlockListPager' => __DIR__ . '/includes/specials/SpecialBlockList.php',
|
||||
'BlockLogFormatter' => __DIR__ . '/includes/logging/BlockLogFormatter.php',
|
||||
'BloomCache' => __DIR__ . '/includes/cache/bloom/BloomCache.php',
|
||||
'BloomCacheRedis' => __DIR__ . '/includes/cache/bloom/BloomCacheRedis.php',
|
||||
'BloomFilterTitleHasLogs' => __DIR__ . '/includes/cache/bloom/BloomFilters.php',
|
||||
'BmpHandler' => __DIR__ . '/includes/media/BMP.php',
|
||||
'BrokenRedirectsPage' => __DIR__ . '/includes/specials/SpecialBrokenRedirects.php',
|
||||
'BufferingStatsdDataFactory' => __DIR__ . '/includes/libs/BufferingStatsdDataFactory.php',
|
||||
|
|
@ -367,7 +364,6 @@ $wgAutoloadLocalClasses = array(
|
|||
'EmailNotification' => __DIR__ . '/includes/mail/EmailNotification.php',
|
||||
'EmaillingJob' => __DIR__ . '/includes/jobqueue/jobs/EmaillingJob.php',
|
||||
'EmptyBagOStuff' => __DIR__ . '/includes/libs/objectcache/EmptyBagOStuff.php',
|
||||
'EmptyBloomCache' => __DIR__ . '/includes/cache/bloom/BloomCache.php',
|
||||
'EncryptedPassword' => __DIR__ . '/includes/password/EncryptedPassword.php',
|
||||
'EnhancedChangesList' => __DIR__ . '/includes/changes/EnhancedChangesList.php',
|
||||
'EnotifNotifyJob' => __DIR__ . '/includes/jobqueue/jobs/EnotifNotifyJob.php',
|
||||
|
|
|
|||
|
|
@ -2147,28 +2147,6 @@ $wgObjectCaches = array(
|
|||
'hash' => array( 'class' => 'HashBagOStuff' ),
|
||||
);
|
||||
|
||||
/**
|
||||
* Map of bloom filter store names to configuration arrays.
|
||||
*
|
||||
* Example:
|
||||
* $wgBloomFilterStores['main'] = array(
|
||||
* 'cacheId' => 'main-v1',
|
||||
* 'class' => 'BloomCacheRedis',
|
||||
* 'redisServers' => array( '127.0.0.1:6379' ),
|
||||
* 'redisConfig' => array( 'connectTimeout' => 2 )
|
||||
* );
|
||||
*
|
||||
* A primary bloom filter must be created manually.
|
||||
* Example in eval.php:
|
||||
* <code>
|
||||
* BloomCache::get( 'main' )->init( 'shared', 1000000000, .001 );
|
||||
* </code>
|
||||
* The size should be as large as practical given wiki size and resources.
|
||||
*
|
||||
* @since 1.24
|
||||
*/
|
||||
$wgBloomFilterStores = array();
|
||||
|
||||
/**
|
||||
* The expiry time for the parser cache, in seconds.
|
||||
* The default is 86400 (one day).
|
||||
|
|
|
|||
324
includes/cache/bloom/BloomCache.php
vendored
324
includes/cache/bloom/BloomCache.php
vendored
|
|
@ -1,324 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @author Aaron Schulz
|
||||
*/
|
||||
|
||||
/**
|
||||
* Persistent bloom filter used to avoid expensive lookups
|
||||
*
|
||||
* @since 1.24
|
||||
*/
|
||||
abstract class BloomCache {
|
||||
/** @var string Unique ID for key namespacing */
|
||||
protected $cacheID;
|
||||
|
||||
/** @var array Map of (id => BloomCache) */
|
||||
protected static $instances = array();
|
||||
|
||||
/**
|
||||
* @param string $id
|
||||
* @return BloomCache
|
||||
*/
|
||||
final public static function get( $id ) {
|
||||
global $wgBloomFilterStores;
|
||||
|
||||
if ( !isset( self::$instances[$id] ) ) {
|
||||
if ( isset( $wgBloomFilterStores[$id] ) ) {
|
||||
$class = $wgBloomFilterStores[$id]['class'];
|
||||
self::$instances[$id] = new $class( $wgBloomFilterStores[$id] );
|
||||
} else {
|
||||
wfDebug( "No bloom filter store '$id'; using EmptyBloomCache." );
|
||||
return new EmptyBloomCache( array() );
|
||||
}
|
||||
}
|
||||
|
||||
return self::$instances[$id];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bloom cache instance from configuration.
|
||||
* This should only be called from within BloomCache.
|
||||
*
|
||||
* @param array $config Parameters include:
|
||||
* - cacheID : Prefix to all bloom filter names that is unique to this cache.
|
||||
* It should only consist of alphanumberic, '-', and '_' characters.
|
||||
* This ID is what avoids collisions if multiple logical caches
|
||||
* use the same storage system, so this should be set carefully.
|
||||
* @throws MWException
|
||||
*/
|
||||
public function __construct( array $config ) {
|
||||
$this->cacheID = $config['cacheId'];
|
||||
if ( !preg_match( '!^[a-zA-Z0-9-_]{1,32}$!', $this->cacheID ) ) {
|
||||
throw new MWException( "Cache ID '{$this->cacheID}' is invalid." );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a member is set in the bloom filter
|
||||
*
|
||||
* A member being set means that it *might* have been added.
|
||||
* A member not being set means it *could not* have been added.
|
||||
*
|
||||
* This abstracts over isHit() to deal with filter updates and readiness.
|
||||
* A class must exist with the name BloomFilter<type> and a static public
|
||||
* mergeAndCheck() method. The later takes the following arguments:
|
||||
* (BloomCache $bcache, $domain, $virtualKey, array $status)
|
||||
* The method should return a bool indicating whether to use the filter.
|
||||
*
|
||||
* The 'shared' bloom key must be used for any updates and will be used
|
||||
* for the membership check if the method returns true. Since the key is shared,
|
||||
* the method should never use delete(). The filter cannot be used in cases where
|
||||
* membership in the filter needs to be taken away. In such cases, code *cannot*
|
||||
* use this method - instead, it can directly use the other BloomCache methods
|
||||
* to manage custom filters with their own keys (e.g. not 'shared').
|
||||
*
|
||||
* @param string $domain
|
||||
* @param string $type
|
||||
* @param string $member
|
||||
* @return bool True if set, false if not (also returns true on error)
|
||||
*/
|
||||
final public function check( $domain, $type, $member ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
|
||||
try {
|
||||
$virtualKey = "$domain:$type";
|
||||
|
||||
$status = $this->getStatus( $virtualKey );
|
||||
if ( $status == false ) {
|
||||
wfDebug( "Could not query virtual bloom filter '$virtualKey'." );
|
||||
return true;
|
||||
}
|
||||
|
||||
$useFilter = call_user_func_array(
|
||||
array( "BloomFilter{$type}", 'mergeAndCheck' ),
|
||||
array( $this, $domain, $virtualKey, $status )
|
||||
);
|
||||
|
||||
if ( $useFilter ) {
|
||||
return ( $this->isHit( 'shared', "$virtualKey:$member" ) !== false );
|
||||
}
|
||||
} catch ( Exception $e ) {
|
||||
MWExceptionHandler::logException( $e );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inform the bloom filter of a new member in order to keep it up to date
|
||||
*
|
||||
* @param string $domain
|
||||
* @param string $type
|
||||
* @param string|array $members
|
||||
* @return bool Success
|
||||
*/
|
||||
final public function insert( $domain, $type, $members ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
|
||||
try {
|
||||
$virtualKey = "$domain:$type";
|
||||
$prefixedMembers = array();
|
||||
foreach ( (array)$members as $member ) {
|
||||
$prefixedMembers[] = "$virtualKey:$member";
|
||||
}
|
||||
|
||||
return $this->add( 'shared', $prefixedMembers );
|
||||
} catch ( Exception $e ) {
|
||||
MWExceptionHandler::logException( $e );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bloom filter at $key (if one does not exist yet)
|
||||
*
|
||||
* @param string $key
|
||||
* @param integer $size Bit length [default: 1000000]
|
||||
* @param float $precision [default: .001]
|
||||
* @return bool Success
|
||||
*/
|
||||
final public function init( $key, $size = 1000000, $precision = .001 ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doInit( "{$this->cacheID}:$key", $size, min( .1, $precision ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a member to the bloom filter at $key
|
||||
*
|
||||
* @param string $key
|
||||
* @param string|array $members
|
||||
* @return bool Success
|
||||
*/
|
||||
final public function add( $key, $members ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doAdd( "{$this->cacheID}:$key", (array)$members );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a member is set in the bloom filter.
|
||||
*
|
||||
* A member being set means that it *might* have been added.
|
||||
* A member not being set means it *could not* have been added.
|
||||
*
|
||||
* If this returns true, then the caller usually should do the
|
||||
* expensive check (whatever that may be). It can be avoided otherwise.
|
||||
*
|
||||
* @param string $key
|
||||
* @param string $member
|
||||
* @return bool|null True if set, false if not, null on error
|
||||
*/
|
||||
final public function isHit( $key, $member ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doIsHit( "{$this->cacheID}:$key", $member );
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a bloom filter at $key
|
||||
*
|
||||
* @param string $key
|
||||
* @return bool Success
|
||||
*/
|
||||
final public function delete( $key ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doDelete( "{$this->cacheID}:$key" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the status map of the virtual bloom filter at $key
|
||||
*
|
||||
* @param string $virtualKey
|
||||
* @param array $values Map including some of (lastID, asOfTime, epoch)
|
||||
* @return bool Success
|
||||
*/
|
||||
final public function setStatus( $virtualKey, array $values ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doSetStatus( "{$this->cacheID}:$virtualKey", $values );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the status map of the virtual bloom filter at $key
|
||||
*
|
||||
* The map includes:
|
||||
* - lastID : the highest ID of the items merged in
|
||||
* - asOfTime : UNIX timestamp that the filter is up-to-date as of
|
||||
* - epoch : UNIX timestamp that filter started being populated
|
||||
* Unset fields will have a null value.
|
||||
*
|
||||
* @param string $virtualKey
|
||||
* @return array|bool False on failure
|
||||
*/
|
||||
final public function getStatus( $virtualKey ) {
|
||||
$ps = Profiler::instance()->scopedProfileIn( get_class( $this ) . '::' . __FUNCTION__ );
|
||||
|
||||
return $this->doGetStatus( "{$this->cacheID}:$virtualKey" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an exclusive lock on a filter for updates
|
||||
*
|
||||
* @param string $virtualKey
|
||||
* @return ScopedCallback|ScopedLock|null Returns null if acquisition failed
|
||||
*/
|
||||
public function getScopedLock( $virtualKey ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @param integer $size Bit length
|
||||
* @param float $precision
|
||||
* @return bool Success
|
||||
*/
|
||||
abstract protected function doInit( $key, $size, $precision );
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @param array $members
|
||||
* @return bool Success
|
||||
*/
|
||||
abstract protected function doAdd( $key, array $members );
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @param string $member
|
||||
* @return bool|null
|
||||
*/
|
||||
abstract protected function doIsHit( $key, $member );
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return bool Success
|
||||
*/
|
||||
abstract protected function doDelete( $key );
|
||||
|
||||
/**
|
||||
* @param string $virtualKey
|
||||
* @param array $values
|
||||
* @return bool Success
|
||||
*/
|
||||
abstract protected function doSetStatus( $virtualKey, array $values );
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return array|bool
|
||||
*/
|
||||
abstract protected function doGetStatus( $key );
|
||||
}
|
||||
|
||||
class EmptyBloomCache extends BloomCache {
|
||||
public function __construct( array $config ) {
|
||||
parent::__construct( array( 'cacheId' => 'none' ) );
|
||||
}
|
||||
|
||||
protected function doInit( $key, $size, $precision ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function doAdd( $key, array $members ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function doIsHit( $key, $member ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function doDelete( $key ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function doSetStatus( $virtualKey, array $values ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function doGetStatus( $virtualKey ) {
|
||||
return array( 'lastID' => null, 'asOfTime' => null, 'epoch' => null );
|
||||
}
|
||||
}
|
||||
371
includes/cache/bloom/BloomCacheRedis.php
vendored
371
includes/cache/bloom/BloomCacheRedis.php
vendored
|
|
@ -1,371 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @author Aaron Schulz
|
||||
*/
|
||||
|
||||
/**
|
||||
* Bloom filter implemented using Redis
|
||||
*
|
||||
* The Redis server must be >= 2.6 and should have volatile-lru or volatile-ttl
|
||||
* if there is any eviction policy. It should not be allkeys-* in any case. Also,
|
||||
* this can be used in a simple master/slave setup or with Redis Sentinel preferably.
|
||||
*
|
||||
* Some bits are based on https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
||||
* but are simplified to use a single filter instead of up to 32 filters.
|
||||
*
|
||||
* @since 1.24
|
||||
*/
|
||||
class BloomCacheRedis extends BloomCache {
|
||||
/** @var RedisConnectionPool */
|
||||
protected $redisPool;
|
||||
/** @var RedisLockManager */
|
||||
protected $lockMgr;
|
||||
/** @var array */
|
||||
protected $servers;
|
||||
/** @var integer Federate each filter into this many redis bitfield objects */
|
||||
protected $segments = 128;
|
||||
|
||||
/**
|
||||
* @params include:
|
||||
* - redisServers : list of servers (address:<port>) (the first is the master)
|
||||
* - redisConf : additional redis configuration
|
||||
*
|
||||
* @param array $config
|
||||
*/
|
||||
public function __construct( array $config ) {
|
||||
parent::__construct( $config );
|
||||
|
||||
$redisConf = $config['redisConfig'];
|
||||
$redisConf['serializer'] = 'none'; // manage that in this class
|
||||
$this->redisPool = RedisConnectionPool::singleton( $redisConf );
|
||||
$this->servers = $config['redisServers'];
|
||||
$this->lockMgr = new RedisLockManager( array(
|
||||
'lockServers' => array( 'srv1' => $this->servers[0] ),
|
||||
'srvsByBucket' => array( 0 => array( 'srv1' ) ),
|
||||
'redisConfig' => $config['redisConfig']
|
||||
) );
|
||||
}
|
||||
|
||||
protected function doInit( $key, $size, $precision ) {
|
||||
$conn = $this->getConnection( 'master' );
|
||||
if ( !$conn ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 80000000 items at p = .001 take up 500MB and fit into one value.
|
||||
// Do not hit the 512MB redis value limit by reducing the demands.
|
||||
$size = min( $size, 80000000 * $this->segments );
|
||||
$precision = max( round( $precision, 3 ), .001 );
|
||||
$epoch = microtime( true );
|
||||
|
||||
static $script =
|
||||
<<<LUA
|
||||
local kMetadata, kData = unpack(KEYS)
|
||||
local aEntries, aPrec, aEpoch = unpack(ARGV)
|
||||
if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
|
||||
redis.call('DEL',kMetadata)
|
||||
redis.call('HSET',kMetadata,'entries',aEntries)
|
||||
redis.call('HSET',kMetadata,'precision',aPrec)
|
||||
redis.call('HSET',kMetadata,'epoch',aEpoch)
|
||||
redis.call('SET',kData,'')
|
||||
return 1
|
||||
end
|
||||
return 0
|
||||
LUA;
|
||||
|
||||
$res = false;
|
||||
try {
|
||||
$conn->script( 'load', $script );
|
||||
$conn->multi( Redis::MULTI );
|
||||
for ( $i = 0; $i < $this->segments; ++$i ) {
|
||||
$res = $conn->luaEval( $script,
|
||||
array(
|
||||
"$key:$i:bloom-metadata", # KEYS[1]
|
||||
"$key:$i:bloom-data", # KEYS[2]
|
||||
ceil( $size / $this->segments ), # ARGV[1]
|
||||
$precision, # ARGV[2]
|
||||
$epoch # ARGV[3]
|
||||
),
|
||||
2 # number of first argument(s) that are keys
|
||||
);
|
||||
}
|
||||
$results = $conn->exec();
|
||||
$res = $results && !in_array( false, $results, true );
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
return ( $res !== false );
|
||||
}
|
||||
|
||||
protected function doAdd( $key, array $members ) {
|
||||
$conn = $this->getConnection( 'master' );
|
||||
if ( !$conn ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static $script =
|
||||
<<<LUA
|
||||
local kMetadata, kData = unpack(KEYS)
|
||||
local aMember = unpack(ARGV)
|
||||
|
||||
-- Check if the filter was initialized
|
||||
if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
|
||||
return false
|
||||
end
|
||||
|
||||
-- Initial expected entries and desired precision
|
||||
local entries = 1*redis.call('HGET',kMetadata,'entries')
|
||||
local precision = 1*redis.call('HGET',kMetadata,'precision')
|
||||
local hash = redis.sha1hex(aMember)
|
||||
|
||||
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
||||
-- 0.480453013 = ln(2)^2
|
||||
local bits = math.ceil((entries * math.log(precision)) / -0.480453013)
|
||||
|
||||
-- 0.693147180 = ln(2)
|
||||
local k = math.floor(0.693147180 * bits / entries)
|
||||
|
||||
-- This uses a variation on:
|
||||
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
||||
-- http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
|
||||
local h = { }
|
||||
h[0] = tonumber(string.sub(hash, 1, 8 ), 16)
|
||||
h[1] = tonumber(string.sub(hash, 9, 16), 16)
|
||||
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
||||
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
||||
|
||||
for i=1, k do
|
||||
local pos = (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits
|
||||
redis.call('SETBIT', kData, pos, 1)
|
||||
end
|
||||
|
||||
return 1
|
||||
LUA;
|
||||
|
||||
$res = false;
|
||||
try {
|
||||
$conn->script( 'load', $script );
|
||||
$conn->multi( Redis::PIPELINE );
|
||||
foreach ( $members as $member ) {
|
||||
$i = $this->getSegment( $member );
|
||||
$conn->luaEval( $script,
|
||||
array(
|
||||
"$key:$i:bloom-metadata", # KEYS[1],
|
||||
"$key:$i:bloom-data", # KEYS[2]
|
||||
$member # ARGV[1]
|
||||
),
|
||||
2 # number of first argument(s) that are keys
|
||||
);
|
||||
}
|
||||
$results = $conn->exec();
|
||||
$res = $results && !in_array( false, $results, true );
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
if ( $res === false ) {
|
||||
wfDebug( "Could not add to the '$key' bloom filter; it may be missing." );
|
||||
}
|
||||
|
||||
return ( $res !== false );
|
||||
}
|
||||
|
||||
protected function doSetStatus( $virtualKey, array $values ) {
|
||||
$conn = $this->getConnection( 'master' );
|
||||
if ( !$conn ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$res = false;
|
||||
try {
|
||||
$res = $conn->hMSet( "$virtualKey:filter-metadata", $values );
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
return ( $res !== false );
|
||||
}
|
||||
|
||||
protected function doGetStatus( $virtualKey ) {
|
||||
$conn = $this->getConnection( 'slave' );
|
||||
if ( !$conn ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$res = false;
|
||||
try {
|
||||
$res = $conn->hGetAll( "$virtualKey:filter-metadata" );
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
if ( is_array( $res ) ) {
|
||||
$res['lastID'] = isset( $res['lastID'] ) ? $res['lastID'] : null;
|
||||
$res['asOfTime'] = isset( $res['asOfTime'] ) ? $res['asOfTime'] : null;
|
||||
$res['epoch'] = isset( $res['epoch'] ) ? $res['epoch'] : null;
|
||||
}
|
||||
|
||||
return $res;
|
||||
}
|
||||
|
||||
protected function doIsHit( $key, $member ) {
|
||||
$conn = $this->getConnection( 'slave' );
|
||||
if ( !$conn ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
static $script =
|
||||
<<<LUA
|
||||
local kMetadata, kData = unpack(KEYS)
|
||||
local aMember = unpack(ARGV)
|
||||
|
||||
-- Check if the filter was initialized
|
||||
if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
|
||||
return false
|
||||
end
|
||||
|
||||
-- Initial expected entries and desired precision.
|
||||
-- This determines the size of the first and subsequent filters.
|
||||
local entries = redis.call('HGET',kMetadata,'entries')
|
||||
local precision = redis.call('HGET',kMetadata,'precision')
|
||||
local hash = redis.sha1hex(aMember)
|
||||
|
||||
-- This uses a variation on:
|
||||
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
||||
-- http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
|
||||
local h = { }
|
||||
h[0] = tonumber(string.sub(hash, 1, 8 ), 16)
|
||||
h[1] = tonumber(string.sub(hash, 9, 16), 16)
|
||||
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
||||
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
||||
|
||||
-- 0.480453013 = ln(2)^2
|
||||
local bits = math.ceil((entries * math.log(precision)) / -0.480453013)
|
||||
|
||||
-- 0.693147180 = ln(2)
|
||||
local k = math.floor(0.693147180 * bits / entries)
|
||||
|
||||
local found = 1
|
||||
for i=1, k do
|
||||
local pos = (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits
|
||||
if redis.call('GETBIT', kData, pos) == 0 then
|
||||
found = 0
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return found
|
||||
LUA;
|
||||
|
||||
$res = null;
|
||||
try {
|
||||
$i = $this->getSegment( $member );
|
||||
$res = $conn->luaEval( $script,
|
||||
array(
|
||||
"$key:$i:bloom-metadata", # KEYS[1],
|
||||
"$key:$i:bloom-data", # KEYS[2]
|
||||
$member # ARGV[1]
|
||||
),
|
||||
2 # number of first argument(s) that are keys
|
||||
);
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
return is_int( $res ) ? (bool)$res : null;
|
||||
}
|
||||
|
||||
protected function doDelete( $key ) {
|
||||
$conn = $this->getConnection( 'master' );
|
||||
if ( !$conn ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$res = false;
|
||||
try {
|
||||
$keys = array();
|
||||
for ( $i = 0; $i < $this->segments; ++$i ) {
|
||||
$keys[] = "$key:$i:bloom-metadata";
|
||||
$keys[] = "$key:$i:bloom-data";
|
||||
}
|
||||
$res = $conn->delete( $keys );
|
||||
} catch ( RedisException $e ) {
|
||||
$this->handleException( $conn, $e );
|
||||
}
|
||||
|
||||
return ( $res !== false );
|
||||
}
|
||||
|
||||
public function getScopedLock( $virtualKey ) {
|
||||
$status = Status::newGood();
|
||||
return ScopedLock::factory( $this->lockMgr,
|
||||
array( $virtualKey ), LockManager::LOCK_EX, $status );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $member
|
||||
* @return integer
|
||||
*/
|
||||
protected function getSegment( $member ) {
|
||||
return hexdec( substr( md5( $member ), 0, 2 ) ) % $this->segments;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $to (master/slave)
|
||||
* @return RedisConnRef|bool Returns false on failure
|
||||
*/
|
||||
protected function getConnection( $to ) {
|
||||
if ( $to === 'master' ) {
|
||||
$conn = $this->redisPool->getConnection( $this->servers[0] );
|
||||
} else {
|
||||
static $lastServer = null;
|
||||
|
||||
$conn = false;
|
||||
if ( $lastServer ) {
|
||||
$conn = $this->redisPool->getConnection( $lastServer );
|
||||
if ( $conn ) {
|
||||
return $conn; // reuse connection
|
||||
}
|
||||
}
|
||||
$servers = $this->servers;
|
||||
$attempts = min( 3, count( $servers ) );
|
||||
for ( $i = 1; $i <= $attempts; ++$i ) {
|
||||
$index = mt_rand( 0, count( $servers ) - 1 );
|
||||
$conn = $this->redisPool->getConnection( $servers[$index] );
|
||||
if ( $conn ) {
|
||||
$lastServer = $servers[$index];
|
||||
return $conn;
|
||||
}
|
||||
unset( $servers[$index] ); // skip next time
|
||||
$servers = array_values( $servers ); // reindex
|
||||
}
|
||||
}
|
||||
|
||||
return $conn;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param RedisConnRef $conn
|
||||
* @param Exception $e
|
||||
*/
|
||||
protected function handleException( RedisConnRef $conn, $e ) {
|
||||
$this->redisPool->handleError( $conn, $e );
|
||||
}
|
||||
}
|
||||
82
includes/cache/bloom/BloomFilters.php
vendored
82
includes/cache/bloom/BloomFilters.php
vendored
|
|
@ -1,82 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @author Aaron Schulz
|
||||
*/
|
||||
|
||||
/**
|
||||
* @since 1.24
|
||||
*/
|
||||
class BloomFilterTitleHasLogs {
|
||||
public static function mergeAndCheck(
|
||||
BloomCache $bcache, $domain, $virtualKey, array $status
|
||||
) {
|
||||
$ttr = 5; // try to refresh before this many seconds
|
||||
$age = microtime( true ) - $status['asOfTime']; // seconds
|
||||
$chance = min( 1, $age / $ttr );
|
||||
|
||||
$scopedLock = ( mt_rand( 1, 1e9 ) <= 1e9 * $chance )
|
||||
? $bcache->getScopedLock( $virtualKey )
|
||||
: false;
|
||||
|
||||
if ( $scopedLock ) {
|
||||
$updates = self::merge( $bcache, $domain, $virtualKey, $status );
|
||||
if ( isset( $updates['asOfTime'] ) ) {
|
||||
$age = ( microtime( true ) - $updates['asOfTime'] );
|
||||
}
|
||||
}
|
||||
|
||||
return ( $age < 30 );
|
||||
}
|
||||
|
||||
public static function merge(
|
||||
BloomCache $bcache, $domain, $virtualKey, array $status
|
||||
) {
|
||||
$limit = 1000;
|
||||
$dbr = wfGetDB( DB_SLAVE, array(), $domain );
|
||||
$res = $dbr->select( 'logging',
|
||||
array( 'log_namespace', 'log_title', 'log_id', 'log_timestamp' ),
|
||||
array( 'log_id > ' . $dbr->addQuotes( (int)$status['lastID'] ) ),
|
||||
__METHOD__,
|
||||
array( 'ORDER BY' => 'log_id', 'LIMIT' => $limit )
|
||||
);
|
||||
|
||||
$updates = array();
|
||||
if ( $res->numRows() > 0 ) {
|
||||
$members = array();
|
||||
foreach ( $res as $row ) {
|
||||
$members[] = "$virtualKey:{$row->log_namespace}:{$row->log_title}";
|
||||
}
|
||||
$lastID = $row->log_id;
|
||||
$lastTime = $row->log_timestamp;
|
||||
if ( !$bcache->add( 'shared', $members ) ) {
|
||||
return false;
|
||||
}
|
||||
$updates['lastID'] = $lastID;
|
||||
$updates['asOfTime'] = wfTimestamp( TS_UNIX, $lastTime );
|
||||
} else {
|
||||
$updates['asOfTime'] = microtime( true );
|
||||
}
|
||||
|
||||
$updates['epoch'] = $status['epoch'] ?: microtime( true );
|
||||
|
||||
$bcache->setStatus( $virtualKey, $updates );
|
||||
|
||||
return $updates;
|
||||
}
|
||||
}
|
||||
|
|
@ -547,10 +547,6 @@ class ManualLogEntry extends LogEntryBase {
|
|||
$dbw->insert( 'log_search', $rows, __METHOD__, 'IGNORE' );
|
||||
}
|
||||
|
||||
// Update any bloom filter cache
|
||||
$member = $this->getTarget()->getNamespace() . ':' . $this->getTarget()->getDBkey();
|
||||
BloomCache::get( 'main' )->insert( wfWikiId(), 'TitleHasLogs', $member );
|
||||
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -541,29 +541,9 @@ class LogEventsList extends ContextSource {
|
|||
if ( $lim > 0 ) {
|
||||
$pager->mLimit = $lim;
|
||||
}
|
||||
|
||||
$knownEmptyResult = false;
|
||||
// Check if we can avoid the DB query all together
|
||||
if ( $page !== '' && !$param['useMaster'] ) {
|
||||
$title = ( $page instanceof Title ) ? $page : Title::newFromText( $page );
|
||||
if ( $title ) {
|
||||
$member = $title->getNamespace() . ':' . $title->getDBkey();
|
||||
if ( !BloomCache::get( 'main' )->check( wfWikiId(), 'TitleHasLogs', $member ) ) {
|
||||
$knownEmptyResult = true;
|
||||
}
|
||||
} else {
|
||||
$knownEmptyResult = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch the log rows and build the HTML if needed
|
||||
if ( $knownEmptyResult ) {
|
||||
$logBody = '';
|
||||
$numRows = 0;
|
||||
} else {
|
||||
$logBody = $pager->getBody();
|
||||
$numRows = $pager->getNumRows();
|
||||
}
|
||||
$logBody = $pager->getBody();
|
||||
$numRows = $pager->getNumRows();
|
||||
|
||||
$s = '';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,72 +0,0 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Test for BloomCacheRedis class.
|
||||
*
|
||||
* @TODO: some generic base "redis test server conf" for all testing?
|
||||
*
|
||||
* @covers BloomCacheRedis
|
||||
* @group Cache
|
||||
*/
|
||||
class BloomCacheRedisTest extends MediaWikiTestCase {
|
||||
private static $suffix;
|
||||
|
||||
protected function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
self::$suffix = self::$suffix ? : mt_rand();
|
||||
|
||||
$fcache = BloomCache::get( 'main' );
|
||||
if ( $fcache instanceof BloomCacheRedis ) {
|
||||
$fcache->delete( "unit-testing-" . self::$suffix );
|
||||
} else {
|
||||
$this->markTestSkipped( 'The main bloom cache is not redis.' );
|
||||
}
|
||||
}
|
||||
|
||||
public function testBloomCache() {
|
||||
$key = "unit-testing-" . self::$suffix;
|
||||
$fcache = BloomCache::get( 'main' );
|
||||
$count = 1500;
|
||||
|
||||
$this->assertTrue( $fcache->delete( $key ), "OK delete of filter '$key'." );
|
||||
$this->assertTrue( $fcache->init( $key, $count, .001 ), "OK init of filter '$key'." );
|
||||
|
||||
$members = array();
|
||||
for ( $i = 0; $i < $count; ++$i ) {
|
||||
$members[] = "$i-value-$i";
|
||||
}
|
||||
$this->assertTrue( $fcache->add( $key, $members ), "Addition of members to '$key' OK." );
|
||||
|
||||
for ( $i = 0; $i < $count; ++$i ) {
|
||||
$this->assertTrue( $fcache->isHit( $key, "$i-value-$i" ), "Hit on member '$i-value-$i'." );
|
||||
}
|
||||
|
||||
$falsePositives = array();
|
||||
for ( $i = $count; $i < 2 * $count; ++$i ) {
|
||||
if ( $fcache->isHit( $key, "value$i" ) ) {
|
||||
$falsePositives[] = "value$i";
|
||||
}
|
||||
}
|
||||
|
||||
$eFalsePositives = array(
|
||||
'value1763',
|
||||
'value2245',
|
||||
'value2353',
|
||||
'value2791',
|
||||
'value2898',
|
||||
'value2975'
|
||||
);
|
||||
$this->assertEquals( $eFalsePositives, $falsePositives,
|
||||
"Correct number of false positives found." );
|
||||
}
|
||||
|
||||
protected function tearDown() {
|
||||
parent::tearDown();
|
||||
|
||||
$fcache = BloomCache::get( 'main' );
|
||||
if ( $fcache instanceof BloomCacheRedis ) {
|
||||
$fcache->delete( "unit-testing-" . self::$suffix );
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue