[JobQueue] Added aggregate empty/non-empty queue caching.

* The default class is JobQueueAggregatorMemc.
  This essentially has the logic that nextJobDB.php used.
* Also created a JobQueueAggregatorRedis class.
  This is much more efficient and more responsive.
* This can speed up calls to getQueuesWithJobs().
* Removed unused getDefaultQueuesWithJobs() function.

Change-Id: Ifb3c6c881decd643da1b662956ded69db4b39431
This commit is contained in:
Aaron Schulz 2013-02-05 12:00:24 -08:00
parent f4e7488007
commit d0985f4f04
10 changed files with 488 additions and 120 deletions

View file

@ -661,6 +661,9 @@ $wgAutoloadLocalClasses = array(
# includes/job
'Job' => 'includes/job/Job.php',
'JobQueue' => 'includes/job/JobQueue.php',
'JobQueueAggregator' => 'includes/job/JobQueueAggregator.php',
'JobQueueAggregatorMemc' => 'includes/job/JobQueueAggregatorMemc.php',
'JobQueueAggregatorRedis' => 'includes/job/JobQueueAggregatorRedis.php',
'JobQueueDB' => 'includes/job/JobQueueDB.php',
'JobQueueGroup' => 'includes/job/JobQueueGroup.php',
'JobQueueRedis' => 'includes/job/JobQueueRedis.php',

View file

@ -5536,6 +5536,14 @@ $wgJobTypeConf = array(
'default' => array( 'class' => 'JobQueueDB', 'order' => 'random' ),
);
/**
* Which aggregator to use for tracking which queues have jobs.
* These settings should be global to all wikis.
*/
$wgJobQueueAggregator = array(
'class' => 'JobQueueAggregatorMemc'
);
/**
* Additional functions to be performed with updateSpecialPages.
* Expensive Querypages are already updated.

View file

@ -169,9 +169,7 @@ abstract class JobQueue {
* @throws MWException
*/
final public function push( $jobs, $flags = 0 ) {
$jobs = is_array( $jobs ) ? $jobs : array( $jobs );
return $this->batchPush( $jobs, $flags );
return $this->batchPush( is_array( $jobs ) ? $jobs : array( $jobs ), $flags );
}
/**
@ -184,11 +182,15 @@ abstract class JobQueue {
* @throws MWException
*/
final public function batchPush( array $jobs, $flags = 0 ) {
if ( !count( $jobs ) ) {
return true; // nothing to do
}
foreach ( $jobs as $job ) {
if ( $job->getType() !== $this->type ) {
throw new MWException( "Got '{$job->getType()}' job; expected '{$this->type}'." );
}
}
wfProfileIn( __METHOD__ );
$ok = $this->doBatchPush( $jobs, $flags );
wfProfileOut( __METHOD__ );
@ -205,7 +207,7 @@ abstract class JobQueue {
* Pop a job off of the queue.
* This requires $wgJobClasses to be set for the given job type.
*
* @return Job|bool Returns false on failure
* @return Job|bool Returns false if there are no jobs
* @throws MWException
*/
final public function pop() {

View file

@ -0,0 +1,139 @@
<?php
/**
* Job queue aggregator code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Aaron Schulz
*/
/**
* Class to handle tracking information about all queues
*
* @ingroup JobQueue
* @since 1.21
*/
abstract class JobQueueAggregator {
/** @var JobQueueAggregator */
protected static $instance = null;
/**
* @param array $params
*/
protected function __construct( array $params ) {}
/**
* @return JobQueueAggregator
*/
final public static function singleton() {
global $wgJobQueueAggregator;
if ( !isset( self::$instance ) ) {
$class = $wgJobQueueAggregator['class'];
$obj = new $class( $wgJobQueueAggregator );
if ( !( $obj instanceof JobQueueAggregator ) ) {
throw new MWException( "Class '$class' is not a JobQueueAggregator class." );
}
self::$instance = $obj;
}
return self::$instance;
}
/**
* Destroy the singleton instance
*
* @return void
*/
final public static function destroySingleton() {
self::$instance = null;
}
/**
* Mark a queue as being empty
*
* @param string $wiki
* @param string $type
* @return bool Success
*/
final public function notifyQueueEmpty( $wiki, $type ) {
wfProfileIn( __METHOD__ );
$ok = $this->doNotifyQueueEmpty( $wiki, $type );
wfProfileOut( __METHOD__ );
return $ok;
}
/**
* @see JobQueueAggregator::notifyQueueEmpty()
*/
abstract protected function doNotifyQueueEmpty( $wiki, $type );
/**
* Mark a queue as being non-empty
*
* @param string $wiki
* @param string $type
* @return bool Success
*/
final public function notifyQueueNonEmpty( $wiki, $type ) {
wfProfileIn( __METHOD__ );
$ok = $this->doNotifyQueueNonEmpty( $wiki, $type );
wfProfileOut( __METHOD__ );
return $ok;
}
/**
* @see JobQueueAggregator::notifyQueueNonEmpty()
*/
abstract protected function doNotifyQueueNonEmpty( $wiki, $type );
/**
* Get the list of all of the queues with jobs
*
* @return Array (job type => (list of wiki IDs))
*/
final public function getAllReadyWikiQueues() {
wfProfileIn( __METHOD__ );
$res = $this->doGetAllReadyWikiQueues();
wfProfileOut( __METHOD__ );
return $res;
}
/**
* @see JobQueueAggregator::getAllReadyWikiQueues()
*/
abstract protected function doGetAllReadyWikiQueues();
/**
* Get all databases that have a pending job.
* This poll all the queues and is this expensive.
*
* @return Array (job type => (list of wiki IDs))
*/
protected function findPendingWikiQueues() {
global $wgLocalDatabases;
$pendingDBs = array(); // (job type => (db list))
foreach ( $wgLocalDatabases as $db ) {
foreach ( JobQueueGroup::singleton( $db )->getQueuesWithJobs() as $type ) {
$pendingDBs[$type][] = $db;
}
}
return $pendingDBs;
}
}

View file

@ -0,0 +1,117 @@
<?php
/**
* Job queue aggregator code that uses BagOStuff.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Aaron Schulz
*/
/**
* Class to handle tracking information about all queues using BagOStuff
*
* @ingroup JobQueue
* @since 1.21
*/
class JobQueueAggregatorMemc extends JobQueueAggregator {
/** @var BagOStuff */
protected $cache;
protected $cacheTTL; // integer; seconds
/**
* @params include:
* - objectCache : Name of an object cache registered in $wgObjectCaches.
* This defaults to the one specified by $wgMainCacheType.
* - cacheTTL : Seconds to cache the aggregate data before regenerating.
* @param array $params
*/
protected function __construct( array $params ) {
parent::__construct( $params );
$this->cache = isset( $params['objectCache'] )
? wfGetCache( $params['objectCache'] )
: wfGetMainCache();
$this->cacheTTL = isset( $params['cacheTTL'] ) ? $params['cacheTTL'] : 180; // 3 min
}
/**
* @see JobQueueAggregator::doNotifyQueueEmpty()
*/
protected function doNotifyQueueEmpty( $wiki, $type ) {
$key = $this->getReadyQueueCacheKey();
// Delist the queue from the "ready queue" list
if ( $this->cache->add( "$key:lock", 1, 60 ) ) { // lock
$curInfo = $this->cache->get( $key );
if ( is_array( $curInfo ) && isset( $curInfo['pendingDBs'][$type] ) ) {
if ( in_array( $wiki, $curInfo['pendingDBs'][$type] ) ) {
$curInfo['pendingDBs'][$type] = array_diff(
$curInfo['pendingDBs'][$type], array( $wiki ) );
$this->cache->set( $key, $curInfo );
}
}
$this->cache->delete( "$key:lock" ); // unlock
}
return true;
}
/**
* @see JobQueueAggregator::doNotifyQueueNonEmpty()
*/
protected function doNotifyQueueNonEmpty( $wiki, $type ) {
return true; // updated periodically
}
/**
* @see JobQueueAggregator::doAllGetReadyWikiQueues()
*/
protected function doGetAllReadyWikiQueues() {
$key = $this->getReadyQueueCacheKey();
// If the cache entry wasn't present, is stale, or in .1% of cases otherwise,
// regenerate the cache. Use any available stale cache if another process is
// currently regenerating the pending DB information.
$pendingDbInfo = $this->cache->get( $key );
if ( !is_array( $pendingDbInfo )
|| ( time() - $pendingDbInfo['timestamp'] ) > $this->cacheTTL
|| mt_rand( 0, 999 ) == 0
) {
if ( $this->cache->add( "$key:rebuild", 1, 1800 ) ) { // lock
$pendingDbInfo = array(
'pendingDBs' => $this->findPendingWikiQueues(),
'timestamp' => time()
);
for ( $attempts=1; $attempts <= 25; ++$attempts ) {
if ( $this->cache->add( "$key:lock", 1, 60 ) ) { // lock
$this->cache->set( $key, $pendingDbInfo );
$this->cache->delete( "$key:lock" ); // unlock
break;
}
}
$this->cache->delete( "$key:rebuild" ); // unlock
}
}
return is_array( $pendingDbInfo )
? $pendingDbInfo['pendingDBs']
: array(); // cache is both empty and locked
}
/**
* @return string
*/
private function getReadyQueueCacheKey() {
return "jobqueue:aggregator:ready-queues:v1"; // global
}
}

View file

@ -0,0 +1,165 @@
<?php
/**
* Job queue aggregator code that uses PhpRedis.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Aaron Schulz
*/
/**
* Class to handle tracking information about all queues using PhpRedis
*
* @ingroup JobQueue
* @since 1.21
*/
class JobQueueAggregatorRedis extends JobQueueAggregator {
/** @var RedisConnectionPool */
protected $redisPool;
/**
* @params include:
* - redisConfig : An array of parameters to RedisConnectionPool::__construct().
* - redisServer : A hostname/port combination or the absolute path of a UNIX socket.
* If a hostname is specified but no port, the standard port number
* 6379 will be used. Required.
* @param array $params
*/
protected function __construct( array $params ) {
parent::__construct( $params );
$this->server = $params['redisServer'];
$this->redisPool = RedisConnectionPool::singleton( $params['redisConfig'] );
}
/**
* @see JobQueueAggregator::doNotifyQueueEmpty()
*/
protected function doNotifyQueueEmpty( $wiki, $type ) {
$conn = $this->getConnection();
if ( !$conn ) {
return false;
}
try {
$conn->hDel( $this->getReadyQueueKey(), $this->encQueueName( $type, $wiki ) );
return true;
} catch ( RedisException $e ) {
$this->handleException( $conn, $e );
return false;
}
}
/**
* @see JobQueueAggregator::doNotifyQueueNonEmpty()
*/
protected function doNotifyQueueNonEmpty( $wiki, $type ) {
$conn = $this->getConnection();
if ( !$conn ) {
return false;
}
try {
$conn->hSet( $this->getReadyQueueKey(), $this->encQueueName( $type, $wiki ), time() );
return true;
} catch ( RedisException $e ) {
$this->handleException( $conn, $e );
return false;
}
}
/**
* @see JobQueueAggregator::doAllGetReadyWikiQueues()
*/
protected function doGetAllReadyWikiQueues() {
$conn = $this->getConnection();
if ( !$conn ) {
return array();
}
try {
$conn->multi( Redis::PIPELINE );
$conn->exists( $this->getReadyQueueKey() );
$conn->hGetAll( $this->getReadyQueueKey() );
list( $exists, $map ) = $conn->exec();
if ( $exists ) { // cache hit
$pendingDBs = array(); // (type => list of wikis)
foreach ( $map as $key => $time ) {
list( $type, $wiki ) = $this->dencQueueName( $key );
$pendingDBs[$type][] = $wiki;
}
} else { // cache miss
$pendingDBs = $this->findPendingWikiQueues(); // (type => list of wikis)
$now = time();
$map = array();
foreach ( $pendingDBs as $type => $wikis ) {
foreach ( $wikis as $wiki ) {
$map[$this->encQueueName( $type, $wiki )] = $now;
}
}
$conn->hMSet( $this->getReadyQueueKey(), $map );
}
return $pendingDBs;
} catch ( RedisException $e ) {
$this->handleException( $conn, $e );
return array();
}
}
/**
* Get a connection to the server that handles all sub-queues for this queue
*
* @return Array (server name, Redis instance)
* @throws MWException
*/
protected function getConnection() {
return $this->redisPool->getConnection( $this->server );
}
/**
* @param RedisConnRef $conn
* @param RedisException $e
* @return void
*/
protected function handleException( RedisConnRef $conn, $e ) {
$this->redisPool->handleException( $this->server, $conn, $e );
}
/**
* @return string
*/
private function getReadyQueueKey() {
return "jobqueue:aggregator:h-ready-queues:v1"; // global
}
/**
* @param string $type
* @param string $wiki
* @return string
*/
private function encQueueName( $type, $wiki ) {
return rawurlencode( $type ) . '/' . rawurlencode( $wiki );
}
/**
* @param string $name
* @return string
*/
private function dencQueueName( $name ) {
list( $type, $wiki ) = explode( '/', $name, 2 );
return array( rawurldecode( $type ), rawurldecode( $wiki ) );
}
}

View file

@ -150,12 +150,11 @@ class JobQueueDB extends JobQueue {
}
}
$key = $this->getCacheKey( 'empty' );
$atomic = ( $flags & self::QoS_Atomic );
$key = $this->getCacheKey( 'empty' );
$ttl = self::CACHE_TTL_LONG;
$dbw->onTransactionIdle(
function() use ( $dbw, $rowSet, $rowList, $atomic, $key, $ttl, $scope
function() use ( $dbw, $rowSet, $rowList, $atomic, $key, $scope
) {
global $wgMemc;
@ -197,7 +196,7 @@ class JobQueueDB extends JobQueue {
$dbw->commit( __METHOD__ );
}
$wgMemc->set( $key, 'false', $ttl ); // queue is not empty
$wgMemc->set( $key, 'false', JobQueueDB::CACHE_TTL_LONG );
} );
}

View file

@ -39,16 +39,18 @@ class JobQueueGroup {
const TYPE_DEFAULT = 1; // integer; jobs popped by default
const TYPE_ANY = 2; // integer; any job
const USE_CACHE = 1; // integer; use process cache
const USE_CACHE = 1; // integer; use process or persistent cache
const PROC_CACHE_TTL = 15; // integer; seconds
const CACHE_VERSION = 1; // integer; cache version
/**
* @param $wiki string Wiki ID
*/
protected function __construct( $wiki ) {
$this->wiki = $wiki;
$this->cache = new ProcessCacheLRU( 1 );
$this->cache = new ProcessCacheLRU( 10 );
}
/**
@ -111,7 +113,9 @@ class JobQueueGroup {
$ok = true;
foreach ( $jobsByType as $type => $jobs ) {
if ( !$this->get( $type )->push( $jobs ) ) {
if ( $this->get( $type )->push( $jobs ) ) {
JobQueueAggregator::singleton()->notifyQueueNonEmpty( $this->wiki, $type );
} else {
$ok = false;
}
}
@ -129,35 +133,44 @@ class JobQueueGroup {
/**
* Pop a job off one of the job queues
*
* @param $queueType integer JobQueueGroup::TYPE_* constant
* @param $qtype integer|string JobQueueGroup::TYPE_DEFAULT or type string
* @param $flags integer Bitfield of JobQueueGroup::USE_* constants
* @return Job|bool Returns false on failure
*/
public function pop( $queueType = self::TYPE_DEFAULT, $flags = 0 ) {
if ( $flags & self::USE_CACHE ) {
if ( !$this->cache->has( 'queues-ready', 'list', self::PROC_CACHE_TTL ) ) {
$this->cache->set( 'queues-ready', 'list', $this->getQueuesWithJobs() );
public function pop( $qtype = self::TYPE_DEFAULT, $flags = 0 ) {
if ( is_string( $qtype ) ) { // specific job type
$job = $this->get( $qtype )->pop();
if ( !$job ) {
JobQueueAggregator::singleton()->notifyQueueEmpty( $this->wiki, $qtype );
}
$types = $this->cache->get( 'queues-ready', 'list' );
} else {
$types = $this->getQueuesWithJobs();
}
if ( $queueType == self::TYPE_DEFAULT ) {
$types = array_intersect( $types, $this->getDefaultQueueTypes() );
}
shuffle( $types ); // avoid starvation
foreach ( $types as $type ) { // for each queue...
$job = $this->get( $type )->pop();
if ( $job ) { // found
return $job;
} else { // not found
$this->cache->clear( 'queues-ready' );
return $job;
} else { // any job in the "default" jobs types
if ( $flags & self::USE_CACHE ) {
if ( !$this->cache->has( 'queues-ready', 'list', self::PROC_CACHE_TTL ) ) {
$this->cache->set( 'queues-ready', 'list', $this->getQueuesWithJobs() );
}
$types = $this->cache->get( 'queues-ready', 'list' );
} else {
$types = $this->getQueuesWithJobs();
}
}
return false; // no jobs found
if ( $qtype == self::TYPE_DEFAULT ) {
$types = array_intersect( $types, $this->getDefaultQueueTypes() );
}
shuffle( $types ); // avoid starvation
foreach ( $types as $type ) { // for each queue...
$job = $this->get( $type )->pop();
if ( $job ) { // found
return $job;
} else { // not found
JobQueueAggregator::singleton()->notifyQueueEmpty( $this->wiki, $type );
$this->cache->clear( 'queues-ready' );
}
}
return false; // no jobs found
}
}
/**
@ -204,6 +217,8 @@ class JobQueueGroup {
}
/**
* Get the list of job types that have non-empty queues
*
* @return Array List of job types that have non-empty queues
*/
public function getQueuesWithJobs() {
@ -216,19 +231,6 @@ class JobQueueGroup {
return $types;
}
/**
* @return Array List of default job types that have non-empty queues
*/
public function getDefaultQueuesWithJobs() {
$types = array();
foreach ( $this->getDefaultQueueTypes() as $type ) {
if ( !$this->get( $type )->isEmpty() ) {
$types[] = $type;
}
}
return $types;
}
/**
* Execute any due periodic queue maintenance tasks for all queues.
*

View file

@ -51,37 +51,12 @@ class nextJobDB extends Maintenance {
// Handle any required periodic queue maintenance
$this->executeReadyPeriodicTasks();
$memcKey = 'jobqueue:dbs:v3';
$pendingDbInfo = $wgMemc->get( $memcKey );
// If the cache entry wasn't present, is stale, or in .1% of cases otherwise,
// regenerate the cache. Use any available stale cache if another process is
// currently regenerating the pending DB information.
if ( !is_array( $pendingDbInfo )
|| ( time() - $pendingDbInfo['timestamp'] ) > 300 // 5 minutes
|| mt_rand( 0, 999 ) == 0
) {
if ( $wgMemc->add( "$memcKey:rebuild", 1, 1800 ) ) { // lock
$pendingDbInfo = array(
'pendingDBs' => $this->getPendingDbs(),
'timestamp' => time()
);
for ( $attempts=1; $attempts <= 25; ++$attempts ) {
if ( $wgMemc->add( "$memcKey:lock", 1, 60 ) ) { // lock
$wgMemc->set( $memcKey, $pendingDbInfo );
$wgMemc->delete( "$memcKey:lock" ); // unlock
break;
}
}
$wgMemc->delete( "$memcKey:rebuild" ); // unlock
}
}
if ( !is_array( $pendingDbInfo ) || !$pendingDbInfo['pendingDBs'] ) {
// Get all the queues with jobs in them
$pendingDBs = JobQueueAggregator::singleton()->getAllReadyWikiQueues();
if ( !count( $pendingDBs ) ) {
return; // no DBs with jobs or cache is both empty and locked
}
$pendingDBs = $pendingDbInfo['pendingDBs']; // convenience
do {
$again = false;
@ -101,20 +76,8 @@ class nextJobDB extends Maintenance {
list( $type, $db ) = $candidates[ mt_rand( 0, count( $candidates ) - 1 ) ];
if ( !$this->checkJob( $type, $db ) ) { // queue is actually empty?
$pendingDBs = $this->delistDB( $pendingDBs, $db, $type );
// Update the cache to remove the outdated information.
// Make sure that this does not race (especially with full rebuilds).
if ( $wgMemc->add( "$memcKey:lock", 1, 60 ) ) { // lock
$curInfo = $wgMemc->get( $memcKey );
if ( is_array( $curInfo ) ) {
$curInfo['pendingDBs'] =
$this->delistDB( $curInfo['pendingDBs'], $db, $type );
$wgMemc->set( $memcKey, $curInfo );
// May as well make use of this newer information
$pendingDBs = $curInfo['pendingDBs'];
}
$wgMemc->delete( "$memcKey:lock" ); // unlock
}
$pendingDBs[$type] = array_diff( $pendingDBs[$type], $db );
JobQueueAggregator::singleton()->notifyQueueEmpty( $db, $type );
$again = true;
}
} while ( $again );
@ -126,19 +89,6 @@ class nextJobDB extends Maintenance {
}
}
/**
* Remove a type/DB entry from the list of queues with jobs
*
* @param $pendingDBs array
* @param $db string
* @param $type string
* @return Array
*/
private function delistDB( array $pendingDBs, $db, $type ) {
$pendingDBs[$type] = array_diff( $pendingDBs[$type], array( $db ) );
return $pendingDBs;
}
/**
* Check if the specified database has a job of the specified type in it.
* The type may be false to indicate "all".
@ -150,23 +100,6 @@ class nextJobDB extends Maintenance {
return !JobQueueGroup::singleton( $dbName )->get( $type )->isEmpty();
}
/**
* Get all databases that have a pending job
* @return array
*/
private function getPendingDbs() {
global $wgLocalDatabases;
$pendingDBs = array(); // (job type => (db list))
foreach ( $wgLocalDatabases as $db ) {
foreach ( JobQueueGroup::singleton( $db )->getQueuesWithJobs() as $type ) {
$pendingDBs[$type][] = $db;
}
}
return $pendingDBs;
}
/**
* Do all ready periodic jobs for all databases every 5 minutes (and .1% of the time)
* @return integer

View file

@ -85,7 +85,7 @@ class RunJobs extends Maintenance {
do {
$job = ( $type === false )
? $group->pop( JobQueueGroup::TYPE_DEFAULT, JobQueueGroup::USE_CACHE )
: $group->get( $type )->pop(); // job from a single queue
: $group->pop( $type ); // job from a single queue
if ( $job ) { // found a job
// Perform the job (logging success/failure and runtime)...
$t = microtime( true );