Merge "Don't throw an exception when waiting for replication times out"

This commit is contained in:
jenkins-bot 2018-09-04 02:20:12 +00:00 committed by Gerrit Code Review
commit 2f86f8dbb9
11 changed files with 42 additions and 73 deletions

View file

@ -30,7 +30,6 @@ use MediaWiki\Session\SessionManager;
use MediaWiki\MediaWikiServices;
use MediaWiki\Shell\Shell;
use Wikimedia\ScopedCallback;
use Wikimedia\Rdbms\DBReplicationWaitError;
use Wikimedia\WrappedString;
/**
@ -2922,17 +2921,13 @@ function wfGetNull() {
* @param float|null $ifWritesSince Only wait if writes were done since this UNIX timestamp
* @param string|bool $wiki Wiki identifier accepted by wfGetLB
* @param string|bool $cluster Cluster name accepted by LBFactory. Default: false.
* @param int|null $timeout Max wait time. Default: 1 day (cli), ~10 seconds (web)
* @param int|null $timeout Max wait time. Default: 60 seconds (cli), 1 second (web)
* @return bool Success (able to connect and no timeouts reached)
* @deprecated since 1.27 Use LBFactory::waitForReplication
*/
function wfWaitForSlaves(
$ifWritesSince = null, $wiki = false, $cluster = false, $timeout = null
) {
if ( $timeout === null ) {
$timeout = wfIsCLI() ? 60 : 10;
}
if ( $cluster === '*' ) {
$cluster = false;
$wiki = false;
@ -2940,20 +2935,18 @@ function wfWaitForSlaves(
$wiki = wfWikiID();
}
try {
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$lbFactory->waitForReplication( [
'wiki' => $wiki,
'cluster' => $cluster,
'timeout' => $timeout,
// B/C: first argument used to be "max seconds of lag"; ignore such values
'ifWritesSince' => ( $ifWritesSince > 1e9 ) ? $ifWritesSince : null
] );
} catch ( DBReplicationWaitError $e ) {
return false;
$opts = [
'wiki' => $wiki,
'cluster' => $cluster,
// B/C: first argument used to be "max seconds of lag"; ignore such values
'ifWritesSince' => ( $ifWritesSince > 1e9 ) ? $ifWritesSince : null
];
if ( $timeout !== null ) {
$opts['timeout'] = $timeout;
}
return true;
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
return $lbFactory->waitForReplication( $opts );
}
/**

View file

@ -34,6 +34,8 @@ require_once __DIR__ . '/../../maintenance/Maintenance.php';
* @since 1.17
*/
abstract class DatabaseUpdater {
const REPLICATION_WAIT_TIMEOUT = 300;
/**
* Array of updates to perform on the database
*
@ -484,7 +486,7 @@ abstract class DatabaseUpdater {
flush();
if ( $ret !== false ) {
$updatesDone[] = $origParams;
$lbFactory->waitForReplication();
$lbFactory->waitForReplication( [ 'timeout' => self::REPLICATION_WAIT_TIMEOUT ] );
} else {
$updatesSkipped[] = [ $func, $params, $origParams ];
}

View file

@ -924,7 +924,8 @@ class MysqlUpdater extends DatabaseUpdater {
$count = ( $count + 1 ) % 100;
if ( $count == 0 ) {
$lbFactory = $services->getDBLoadBalancerFactory();
$lbFactory->waitForReplication( [ 'wiki' => wfWikiID() ] );
$lbFactory->waitForReplication( [
'wiki' => wfWikiID(), 'timeout' => self::REPLICATION_WAIT_TIMEOUT ] );
}
$this->db->insert( 'templatelinks',
[

View file

@ -29,7 +29,6 @@ use Psr\Log\LoggerInterface;
use Wikimedia\ScopedCallback;
use Wikimedia\Rdbms\LBFactory;
use Wikimedia\Rdbms\DBError;
use Wikimedia\Rdbms\DBReplicationWaitError;
/**
* Job queue runner utility methods
@ -225,21 +224,16 @@ class JobRunner implements LoggerAwareInterface {
// other wikis in the farm (on different masters) get a chance.
$timePassed = microtime( true ) - $lastCheckTime;
if ( $timePassed >= self::LAG_CHECK_PERIOD || $timePassed < 0 ) {
try {
$lbFactory->waitForReplication( [
'ifWritesSince' => $lastCheckTime,
'timeout' => self::MAX_ALLOWED_LAG
] );
} catch ( DBReplicationWaitError $e ) {
$success = $lbFactory->waitForReplication( [
'ifWritesSince' => $lastCheckTime,
'timeout' => self::MAX_ALLOWED_LAG,
] );
if ( !$success ) {
$response['reached'] = 'replica-lag-limit';
break;
}
$lastCheckTime = microtime( true );
}
// Don't let any queue replica DBs/backups fall behind
if ( $jobsPopped > 0 && ( $jobsPopped % 100 ) == 0 ) {
$group->waitForBackups();
}
// Bail if near-OOM instead of in a job
if ( !$this->checkMemoryOK() ) {

View file

@ -19,7 +19,6 @@
* @ingroup JobQueue
*/
use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\DBReplicationWaitError;
/**
* Job for pruning recent changes
@ -105,11 +104,9 @@ class RecentChangesUpdateJob extends Job {
$dbw->delete( 'recentchanges', [ 'rc_id' => $rcIds ], __METHOD__ );
Hooks::run( 'RecentChangesPurgeRows', [ $rows ] );
// There might be more, so try waiting for replica DBs
try {
$factory->commitAndWaitForReplication(
__METHOD__, $ticket, [ 'timeout' => 3 ]
);
} catch ( DBReplicationWaitError $e ) {
if ( !$factory->commitAndWaitForReplication(
__METHOD__, $ticket, [ 'timeout' => 3 ]
) ) {
// Another job will continue anyway
break;
}

View file

@ -21,7 +21,6 @@
* @ingroup JobQueue
*/
use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\DBReplicationWaitError;
/**
* Job to update link tables for pages
@ -89,13 +88,11 @@ class RefreshLinksJob extends Job {
// From then on, we know that any template changes at the time the base job was
// enqueued will be reflected in backlink page parses when the leaf jobs run.
if ( !isset( $this->params['range'] ) ) {
try {
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$lbFactory->waitForReplication( [
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
if ( !$lbFactory->waitForReplication( [
'wiki' => wfWikiID(),
'timeout' => self::LAG_WAIT_TIMEOUT
] );
} catch ( DBReplicationWaitError $e ) { // only try so hard
] ) ) { // only try so hard
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
$stats->increment( 'refreshlinks.lag_wait_failed' );
}

View file

@ -23,6 +23,7 @@ namespace Wikimedia\Rdbms;
/**
* Exception class for replica DB wait timeouts
* @deprecated since 1.32
* @ingroup Database
*/
class DBReplicationWaitError extends DBExpectedError {

View file

@ -269,9 +269,9 @@ interface ILBFactory {
* @param array $opts Optional fields that include:
* - domain : wait on the load balancer DBs that handles the given domain ID
* - cluster : wait on the given external load balancer DBs
* - timeout : Max wait time. Default: ~60 seconds
* - timeout : Max wait time. Default: 60 seconds for CLI, 1 second for web.
* - ifWritesSince: Only wait if writes were done since this UNIX timestamp
* @throws DBReplicationWaitError If a timeout or error occurred waiting on a DB cluster
* @return bool True on success, false if a timeout or error occurred while waiting
*/
public function waitForReplication( array $opts = [] );
@ -301,7 +301,7 @@ interface ILBFactory {
* @param string $fname Caller name (e.g. __METHOD__)
* @param mixed $ticket Result of getEmptyTransactionTicket()
* @param array $opts Options to waitForReplication()
* @throws DBReplicationWaitError
* @return bool True if the wait was successful, false on timeout
*/
public function commitAndWaitForReplication( $fname, $ticket, array $opts = [] );

View file

@ -375,7 +375,7 @@ abstract class LBFactory implements ILBFactory {
$opts += [
'domain' => false,
'cluster' => false,
'timeout' => $this->cliMode ? 60 : 10,
'timeout' => $this->cliMode ? 60 : 1,
'ifWritesSince' => null
];
@ -432,13 +432,7 @@ abstract class LBFactory implements ILBFactory {
}
}
if ( $failed ) {
throw new DBReplicationWaitError(
null,
"Could not wait for replica DBs to catch up to " .
implode( ', ', $failed )
);
}
return !$failed;
}
public function setWaitForReplicationListener( $name, callable $callback = null ) {
@ -478,12 +472,13 @@ abstract class LBFactory implements ILBFactory {
}
$this->commitMasterChanges( $fnameEffective );
$this->waitForReplication( $opts );
$waitSucceeded = $this->waitForReplication( $opts );
// If a nested caller committed on behalf of $fname, start another empty $fname
// transaction, leaving the caller with the same empty transaction state as before.
if ( $fnameEffective !== $fname ) {
$this->beginMasterChanges( $fnameEffective );
}
return $waitSucceeded;
}
public function getChronologyProtectorTouched( $dbName ) {

View file

@ -26,7 +26,6 @@ require_once __DIR__ . '/../includes/PHPVersionCheck.php';
wfEntryPointCheck( 'cli' );
use MediaWiki\Shell\Shell;
use Wikimedia\Rdbms\DBReplicationWaitError;
/**
* @defgroup MaintenanceArchive Maintenance archives
@ -1393,17 +1392,12 @@ abstract class Maintenance {
*/
protected function commitTransaction( IDatabase $dbw, $fname ) {
$dbw->commit( $fname );
try {
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$lbFactory->waitForReplication(
[ 'timeout' => 30, 'ifWritesSince' => $this->lastReplicationWait ]
);
$this->lastReplicationWait = microtime( true );
return true;
} catch ( DBReplicationWaitError $e ) {
return false;
}
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
$waitSucceeded = $lbFactory->waitForReplication(
[ 'timeout' => 30, 'ifWritesSince' => $this->lastReplicationWait ]
);
$this->lastReplicationWait = microtime( true );
return $waitSucceeded;
}
/**

View file

@ -25,7 +25,6 @@
require_once __DIR__ . '/Maintenance.php';
use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\DBReplicationWaitError;
/**
* Maintenance script to update cached special pages.
@ -134,11 +133,7 @@ class UpdateSpecialPages extends Maintenance {
$this->output( "Reconnected\n\n" );
}
// Wait for the replica DB to catch up
try {
$lbFactory->waitForReplication();
} catch ( DBReplicationWaitError $e ) {
// ignore
}
$lbFactory->waitForReplication();
}
public function doSpecialPageCacheUpdates( $dbw ) {