Merge "Don't throw an exception when waiting for replication times out"
This commit is contained in:
commit
2f86f8dbb9
11 changed files with 42 additions and 73 deletions
|
|
@ -30,7 +30,6 @@ use MediaWiki\Session\SessionManager;
|
|||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Shell\Shell;
|
||||
use Wikimedia\ScopedCallback;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
use Wikimedia\WrappedString;
|
||||
|
||||
/**
|
||||
|
|
@ -2922,17 +2921,13 @@ function wfGetNull() {
|
|||
* @param float|null $ifWritesSince Only wait if writes were done since this UNIX timestamp
|
||||
* @param string|bool $wiki Wiki identifier accepted by wfGetLB
|
||||
* @param string|bool $cluster Cluster name accepted by LBFactory. Default: false.
|
||||
* @param int|null $timeout Max wait time. Default: 1 day (cli), ~10 seconds (web)
|
||||
* @param int|null $timeout Max wait time. Default: 60 seconds (cli), 1 second (web)
|
||||
* @return bool Success (able to connect and no timeouts reached)
|
||||
* @deprecated since 1.27 Use LBFactory::waitForReplication
|
||||
*/
|
||||
function wfWaitForSlaves(
|
||||
$ifWritesSince = null, $wiki = false, $cluster = false, $timeout = null
|
||||
) {
|
||||
if ( $timeout === null ) {
|
||||
$timeout = wfIsCLI() ? 60 : 10;
|
||||
}
|
||||
|
||||
if ( $cluster === '*' ) {
|
||||
$cluster = false;
|
||||
$wiki = false;
|
||||
|
|
@ -2940,20 +2935,18 @@ function wfWaitForSlaves(
|
|||
$wiki = wfWikiID();
|
||||
}
|
||||
|
||||
try {
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
$lbFactory->waitForReplication( [
|
||||
'wiki' => $wiki,
|
||||
'cluster' => $cluster,
|
||||
'timeout' => $timeout,
|
||||
// B/C: first argument used to be "max seconds of lag"; ignore such values
|
||||
'ifWritesSince' => ( $ifWritesSince > 1e9 ) ? $ifWritesSince : null
|
||||
] );
|
||||
} catch ( DBReplicationWaitError $e ) {
|
||||
return false;
|
||||
$opts = [
|
||||
'wiki' => $wiki,
|
||||
'cluster' => $cluster,
|
||||
// B/C: first argument used to be "max seconds of lag"; ignore such values
|
||||
'ifWritesSince' => ( $ifWritesSince > 1e9 ) ? $ifWritesSince : null
|
||||
];
|
||||
if ( $timeout !== null ) {
|
||||
$opts['timeout'] = $timeout;
|
||||
}
|
||||
|
||||
return true;
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
return $lbFactory->waitForReplication( $opts );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ require_once __DIR__ . '/../../maintenance/Maintenance.php';
|
|||
* @since 1.17
|
||||
*/
|
||||
abstract class DatabaseUpdater {
|
||||
const REPLICATION_WAIT_TIMEOUT = 300;
|
||||
|
||||
/**
|
||||
* Array of updates to perform on the database
|
||||
*
|
||||
|
|
@ -484,7 +486,7 @@ abstract class DatabaseUpdater {
|
|||
flush();
|
||||
if ( $ret !== false ) {
|
||||
$updatesDone[] = $origParams;
|
||||
$lbFactory->waitForReplication();
|
||||
$lbFactory->waitForReplication( [ 'timeout' => self::REPLICATION_WAIT_TIMEOUT ] );
|
||||
} else {
|
||||
$updatesSkipped[] = [ $func, $params, $origParams ];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -924,7 +924,8 @@ class MysqlUpdater extends DatabaseUpdater {
|
|||
$count = ( $count + 1 ) % 100;
|
||||
if ( $count == 0 ) {
|
||||
$lbFactory = $services->getDBLoadBalancerFactory();
|
||||
$lbFactory->waitForReplication( [ 'wiki' => wfWikiID() ] );
|
||||
$lbFactory->waitForReplication( [
|
||||
'wiki' => wfWikiID(), 'timeout' => self::REPLICATION_WAIT_TIMEOUT ] );
|
||||
}
|
||||
$this->db->insert( 'templatelinks',
|
||||
[
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ use Psr\Log\LoggerInterface;
|
|||
use Wikimedia\ScopedCallback;
|
||||
use Wikimedia\Rdbms\LBFactory;
|
||||
use Wikimedia\Rdbms\DBError;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
|
||||
/**
|
||||
* Job queue runner utility methods
|
||||
|
|
@ -225,21 +224,16 @@ class JobRunner implements LoggerAwareInterface {
|
|||
// other wikis in the farm (on different masters) get a chance.
|
||||
$timePassed = microtime( true ) - $lastCheckTime;
|
||||
if ( $timePassed >= self::LAG_CHECK_PERIOD || $timePassed < 0 ) {
|
||||
try {
|
||||
$lbFactory->waitForReplication( [
|
||||
'ifWritesSince' => $lastCheckTime,
|
||||
'timeout' => self::MAX_ALLOWED_LAG
|
||||
] );
|
||||
} catch ( DBReplicationWaitError $e ) {
|
||||
$success = $lbFactory->waitForReplication( [
|
||||
'ifWritesSince' => $lastCheckTime,
|
||||
'timeout' => self::MAX_ALLOWED_LAG,
|
||||
] );
|
||||
if ( !$success ) {
|
||||
$response['reached'] = 'replica-lag-limit';
|
||||
break;
|
||||
}
|
||||
$lastCheckTime = microtime( true );
|
||||
}
|
||||
// Don't let any queue replica DBs/backups fall behind
|
||||
if ( $jobsPopped > 0 && ( $jobsPopped % 100 ) == 0 ) {
|
||||
$group->waitForBackups();
|
||||
}
|
||||
|
||||
// Bail if near-OOM instead of in a job
|
||||
if ( !$this->checkMemoryOK() ) {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@
|
|||
* @ingroup JobQueue
|
||||
*/
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
|
||||
/**
|
||||
* Job for pruning recent changes
|
||||
|
|
@ -105,11 +104,9 @@ class RecentChangesUpdateJob extends Job {
|
|||
$dbw->delete( 'recentchanges', [ 'rc_id' => $rcIds ], __METHOD__ );
|
||||
Hooks::run( 'RecentChangesPurgeRows', [ $rows ] );
|
||||
// There might be more, so try waiting for replica DBs
|
||||
try {
|
||||
$factory->commitAndWaitForReplication(
|
||||
__METHOD__, $ticket, [ 'timeout' => 3 ]
|
||||
);
|
||||
} catch ( DBReplicationWaitError $e ) {
|
||||
if ( !$factory->commitAndWaitForReplication(
|
||||
__METHOD__, $ticket, [ 'timeout' => 3 ]
|
||||
) ) {
|
||||
// Another job will continue anyway
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@
|
|||
* @ingroup JobQueue
|
||||
*/
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
|
||||
/**
|
||||
* Job to update link tables for pages
|
||||
|
|
@ -89,13 +88,11 @@ class RefreshLinksJob extends Job {
|
|||
// From then on, we know that any template changes at the time the base job was
|
||||
// enqueued will be reflected in backlink page parses when the leaf jobs run.
|
||||
if ( !isset( $this->params['range'] ) ) {
|
||||
try {
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
$lbFactory->waitForReplication( [
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
if ( !$lbFactory->waitForReplication( [
|
||||
'wiki' => wfWikiID(),
|
||||
'timeout' => self::LAG_WAIT_TIMEOUT
|
||||
] );
|
||||
} catch ( DBReplicationWaitError $e ) { // only try so hard
|
||||
] ) ) { // only try so hard
|
||||
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
|
||||
$stats->increment( 'refreshlinks.lag_wait_failed' );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ namespace Wikimedia\Rdbms;
|
|||
|
||||
/**
|
||||
* Exception class for replica DB wait timeouts
|
||||
* @deprecated since 1.32
|
||||
* @ingroup Database
|
||||
*/
|
||||
class DBReplicationWaitError extends DBExpectedError {
|
||||
|
|
|
|||
|
|
@ -269,9 +269,9 @@ interface ILBFactory {
|
|||
* @param array $opts Optional fields that include:
|
||||
* - domain : wait on the load balancer DBs that handles the given domain ID
|
||||
* - cluster : wait on the given external load balancer DBs
|
||||
* - timeout : Max wait time. Default: ~60 seconds
|
||||
* - timeout : Max wait time. Default: 60 seconds for CLI, 1 second for web.
|
||||
* - ifWritesSince: Only wait if writes were done since this UNIX timestamp
|
||||
* @throws DBReplicationWaitError If a timeout or error occurred waiting on a DB cluster
|
||||
* @return bool True on success, false if a timeout or error occurred while waiting
|
||||
*/
|
||||
public function waitForReplication( array $opts = [] );
|
||||
|
||||
|
|
@ -301,7 +301,7 @@ interface ILBFactory {
|
|||
* @param string $fname Caller name (e.g. __METHOD__)
|
||||
* @param mixed $ticket Result of getEmptyTransactionTicket()
|
||||
* @param array $opts Options to waitForReplication()
|
||||
* @throws DBReplicationWaitError
|
||||
* @return bool True if the wait was successful, false on timeout
|
||||
*/
|
||||
public function commitAndWaitForReplication( $fname, $ticket, array $opts = [] );
|
||||
|
||||
|
|
|
|||
|
|
@ -375,7 +375,7 @@ abstract class LBFactory implements ILBFactory {
|
|||
$opts += [
|
||||
'domain' => false,
|
||||
'cluster' => false,
|
||||
'timeout' => $this->cliMode ? 60 : 10,
|
||||
'timeout' => $this->cliMode ? 60 : 1,
|
||||
'ifWritesSince' => null
|
||||
];
|
||||
|
||||
|
|
@ -432,13 +432,7 @@ abstract class LBFactory implements ILBFactory {
|
|||
}
|
||||
}
|
||||
|
||||
if ( $failed ) {
|
||||
throw new DBReplicationWaitError(
|
||||
null,
|
||||
"Could not wait for replica DBs to catch up to " .
|
||||
implode( ', ', $failed )
|
||||
);
|
||||
}
|
||||
return !$failed;
|
||||
}
|
||||
|
||||
public function setWaitForReplicationListener( $name, callable $callback = null ) {
|
||||
|
|
@ -478,12 +472,13 @@ abstract class LBFactory implements ILBFactory {
|
|||
}
|
||||
|
||||
$this->commitMasterChanges( $fnameEffective );
|
||||
$this->waitForReplication( $opts );
|
||||
$waitSucceeded = $this->waitForReplication( $opts );
|
||||
// If a nested caller committed on behalf of $fname, start another empty $fname
|
||||
// transaction, leaving the caller with the same empty transaction state as before.
|
||||
if ( $fnameEffective !== $fname ) {
|
||||
$this->beginMasterChanges( $fnameEffective );
|
||||
}
|
||||
return $waitSucceeded;
|
||||
}
|
||||
|
||||
public function getChronologyProtectorTouched( $dbName ) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ require_once __DIR__ . '/../includes/PHPVersionCheck.php';
|
|||
wfEntryPointCheck( 'cli' );
|
||||
|
||||
use MediaWiki\Shell\Shell;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
|
||||
/**
|
||||
* @defgroup MaintenanceArchive Maintenance archives
|
||||
|
|
@ -1393,17 +1392,12 @@ abstract class Maintenance {
|
|||
*/
|
||||
protected function commitTransaction( IDatabase $dbw, $fname ) {
|
||||
$dbw->commit( $fname );
|
||||
try {
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
$lbFactory->waitForReplication(
|
||||
[ 'timeout' => 30, 'ifWritesSince' => $this->lastReplicationWait ]
|
||||
);
|
||||
$this->lastReplicationWait = microtime( true );
|
||||
|
||||
return true;
|
||||
} catch ( DBReplicationWaitError $e ) {
|
||||
return false;
|
||||
}
|
||||
$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
|
||||
$waitSucceeded = $lbFactory->waitForReplication(
|
||||
[ 'timeout' => 30, 'ifWritesSince' => $this->lastReplicationWait ]
|
||||
);
|
||||
$this->lastReplicationWait = microtime( true );
|
||||
return $waitSucceeded;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
require_once __DIR__ . '/Maintenance.php';
|
||||
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use Wikimedia\Rdbms\DBReplicationWaitError;
|
||||
|
||||
/**
|
||||
* Maintenance script to update cached special pages.
|
||||
|
|
@ -134,11 +133,7 @@ class UpdateSpecialPages extends Maintenance {
|
|||
$this->output( "Reconnected\n\n" );
|
||||
}
|
||||
// Wait for the replica DB to catch up
|
||||
try {
|
||||
$lbFactory->waitForReplication();
|
||||
} catch ( DBReplicationWaitError $e ) {
|
||||
// ignore
|
||||
}
|
||||
$lbFactory->waitForReplication();
|
||||
}
|
||||
|
||||
public function doSpecialPageCacheUpdates( $dbw ) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue