Make getLagFromPtHeartbeat() always use the LB cluster master entry
Before, it just used the immediate master entry, which could be another slave. In that case, it may not even exists at all. Bug: T119648 Change-Id: Iea970b81ad2c9855aafcccf0bb0662fc0b3a8d4d
This commit is contained in:
parent
90058e5ce7
commit
9c0b55d926
5 changed files with 164 additions and 23 deletions
|
|
@ -148,6 +148,9 @@ abstract class DatabaseBase implements IDatabase {
|
|||
*/
|
||||
private $mTrxWriteDuration = 0.0;
|
||||
|
||||
/** @var IDatabase|null Lazy handle to the master DB this server replicates from */
|
||||
private $lazyMasterHandle;
|
||||
|
||||
/**
|
||||
* @since 1.21
|
||||
* @var resource File handle for upgrade
|
||||
|
|
@ -328,6 +331,25 @@ abstract class DatabaseBase implements IDatabase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a lazy-connecting DB handle to the master DB (for replication status purposes)
|
||||
*
|
||||
* @param IDatabase $conn
|
||||
* @since 1.27
|
||||
*/
|
||||
public function setLazyMasterHandle( IDatabase $conn ) {
|
||||
$this->lazyMasterHandle = $conn;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return IDatabase|null
|
||||
* @see setLazyMasterHandle()
|
||||
* @since 1.27
|
||||
*/
|
||||
public function getLazyMasterHandle() {
|
||||
return $this->lazyMasterHandle;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return TransactionProfiler
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -621,13 +621,20 @@ abstract class DatabaseMysqlBase extends Database {
|
|||
abstract protected function mysqlPing();
|
||||
|
||||
function getLag() {
|
||||
if ( $this->lagDetectionMethod === 'pt-heartbeat' ) {
|
||||
if ( $this->getLagDetectionMethod() === 'pt-heartbeat' ) {
|
||||
return $this->getLagFromPtHeartbeat();
|
||||
} else {
|
||||
return $this->getLagFromSlaveStatus();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
protected function getLagDetectionMethod() {
|
||||
return $this->lagDetectionMethod;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool|int
|
||||
*/
|
||||
|
|
@ -645,35 +652,82 @@ abstract class DatabaseMysqlBase extends Database {
|
|||
* @return bool|float
|
||||
*/
|
||||
protected function getLagFromPtHeartbeat() {
|
||||
$key = wfMemcKey( 'mysql', 'master-server-id', $this->getServer() );
|
||||
$masterId = intval( $this->srvCache->get( $key ) );
|
||||
if ( !$masterId ) {
|
||||
$res = $this->query( 'SHOW SLAVE STATUS', __METHOD__ );
|
||||
$row = $res ? $res->fetchObject() : false;
|
||||
if ( $row && strval( $row->Master_Server_Id ) !== '' ) {
|
||||
$masterId = intval( $row->Master_Server_Id );
|
||||
$this->srvCache->set( $key, $masterId, 30 );
|
||||
}
|
||||
$masterInfo = $this->getMasterServerInfo();
|
||||
if ( !$masterInfo ) {
|
||||
return false; // could not get master server ID
|
||||
}
|
||||
|
||||
if ( !$masterId ) {
|
||||
return false;
|
||||
}
|
||||
list( $time, $nowUnix ) = $this->getHeartbeatData( $masterInfo['serverId'] );
|
||||
if ( $time !== null ) {
|
||||
// @time is in ISO format like "2015-09-25T16:48:10.000510"
|
||||
$dateTime = new DateTime( $time, new DateTimeZone( 'UTC' ) );
|
||||
$timeUnix = (int)$dateTime->format( 'U' ) + $dateTime->format( 'u' ) / 1e6;
|
||||
|
||||
$res = $this->query(
|
||||
"SELECT TIMESTAMPDIFF(MICROSECOND,ts,UTC_TIMESTAMP(6)) AS Lag " .
|
||||
"FROM heartbeat.heartbeat WHERE server_id = $masterId"
|
||||
);
|
||||
$row = $res ? $res->fetchObject() : false;
|
||||
if ( $row ) {
|
||||
return max( floatval( $row->Lag ) / 1e6, 0.0 );
|
||||
return max( $nowUnix - $timeUnix, 0.0 );
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
protected function getMasterServerInfo() {
|
||||
$cache = $this->srvCache;
|
||||
$key = $cache->makeGlobalKey(
|
||||
'mysql',
|
||||
'master-info',
|
||||
// Using one key for all cluster slaves is preferable
|
||||
$this->getLBInfo( 'clusterMasterHost' ) ?: $this->getServer()
|
||||
);
|
||||
|
||||
$that = $this;
|
||||
return $cache->getWithSetCallback(
|
||||
$key,
|
||||
$cache::TTL_INDEFINITE,
|
||||
function () use ( $that, $cache, $key ) {
|
||||
// Get and leave a lock key in place for a short period
|
||||
if ( !$cache->lock( $key, 0, 10 ) ) {
|
||||
return false; // avoid master connection spike slams
|
||||
}
|
||||
|
||||
$conn = $that->getLazyMasterHandle();
|
||||
if ( !$conn ) {
|
||||
return false; // something is misconfigured
|
||||
}
|
||||
|
||||
// Connect to and query the master; catch errors to avoid outages
|
||||
try {
|
||||
$res = $conn->query( 'SELECT @@server_id AS id', __METHOD__ );
|
||||
$row = $res ? $res->fetchObject() : false;
|
||||
$id = $row ? (int)$row->id : 0;
|
||||
} catch ( DBError $e ) {
|
||||
$id = 0;
|
||||
}
|
||||
|
||||
// Cache the ID if it was retrieved
|
||||
return $id ? array( 'serverId' => $id, 'asOf' => time() ) : false;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $masterId Server ID
|
||||
* @return array (heartbeat `ts` column value or null, UNIX timestamp)
|
||||
* @see https://www.percona.com/doc/percona-toolkit/2.1/pt-heartbeat.html
|
||||
*/
|
||||
protected function getHeartbeatData( $masterId ) {
|
||||
// Get the status row for this master; use the oldest for sanity in case the master
|
||||
// has entries listed under different server IDs (which should really not happen).
|
||||
// Note: this would use "MAX(TIMESTAMPDIFF(MICROSECOND,ts,UTC_TIMESTAMP(6)))" but the
|
||||
// percision field is not supported in MySQL <= 5.5.
|
||||
$res = $this->query(
|
||||
"SELECT ts FROM heartbeat.heartbeat WHERE server_id=" . intval( $masterId )
|
||||
);
|
||||
$row = $res ? $res->fetchObject() : false;
|
||||
|
||||
return array( $row ? $row->ts : null, microtime( true ) );
|
||||
}
|
||||
|
||||
public function getApproximateLagStatus() {
|
||||
if ( $this->lagDetectionMethod === 'pt-heartbeat' ) {
|
||||
if ( $this->getLagDetectionMethod() === 'pt-heartbeat' ) {
|
||||
// Disable caching since this is fast enough and we don't wan't
|
||||
// to be *too* pessimistic by having both the cache TTL and the
|
||||
// pt-heartbeat interval count as lag in getSessionLagStatus()
|
||||
|
|
|
|||
|
|
@ -816,11 +816,14 @@ class LoadBalancer {
|
|||
$server['dbname'] = $dbNameOverride;
|
||||
}
|
||||
|
||||
// Let the handle know what the cluster master is (e.g. "db1052")
|
||||
$masterName = $this->getServerName( 0 );
|
||||
$server['clusterMasterHost'] = $masterName;
|
||||
|
||||
// Log when many connection are made on requests
|
||||
if ( ++$this->connsOpened >= self::CONN_HELD_WARN_THRESHOLD ) {
|
||||
$masterAddr = $this->getServerName( 0 );
|
||||
wfDebugLog( 'DBPerformance', __METHOD__ . ": " .
|
||||
"{$this->connsOpened}+ connections made (master=$masterAddr)\n" .
|
||||
"{$this->connsOpened}+ connections made (master=$masterName)\n" .
|
||||
wfBacktrace( true ) );
|
||||
}
|
||||
|
||||
|
|
@ -834,6 +837,9 @@ class LoadBalancer {
|
|||
}
|
||||
|
||||
$db->setLBInfo( $server );
|
||||
$db->setLazyMasterHandle(
|
||||
$this->getLazyConnectionRef( DB_MASTER, array(), $db->getWikiID() )
|
||||
);
|
||||
|
||||
return $db;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -256,4 +256,59 @@ class DatabaseMysqlBaseTest extends MediaWikiTestCase {
|
|||
$this->assertTrue( $pos2->hasReached( $pos1 ) );
|
||||
$this->assertFalse( $pos1->hasReached( $pos2 ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider provideLagAmounts
|
||||
*/
|
||||
function testPtHeartbeat( $lag ) {
|
||||
$db = $this->getMockBuilder( 'DatabaseMysql' )
|
||||
->disableOriginalConstructor()
|
||||
->setMethods( array(
|
||||
'getLagDetectionMethod', 'getHeartbeatData', 'getMasterServerInfo' ) )
|
||||
->getMock();
|
||||
|
||||
$db->expects( $this->any() )
|
||||
->method( 'getLagDetectionMethod' )
|
||||
->will( $this->returnValue( 'pt-heartbeat' ) );
|
||||
|
||||
$db->expects( $this->any() )
|
||||
->method( 'getMasterServerInfo' )
|
||||
->will( $this->returnValue( array( 'serverId' => 172, 'asOf' => time() ) ) );
|
||||
|
||||
// Fake the current time.
|
||||
list( $nowSecFrac, $nowSec ) = explode( ' ', microtime() );
|
||||
$now = (float)$nowSec + (float)$nowSecFrac;
|
||||
// Fake the heartbeat time.
|
||||
// Work arounds for weak DataTime microseconds support.
|
||||
$ptTime = $now - $lag;
|
||||
$ptSec = (int)$ptTime;
|
||||
$ptSecFrac = ( $ptTime - $ptSec );
|
||||
$ptDateTime = new DateTime( "@$ptSec" );
|
||||
$ptTimeISO = $ptDateTime->format( 'Y-m-d\TH:i:s' );
|
||||
$ptTimeISO .= ltrim( number_format( $ptSecFrac, 6 ), '0' );
|
||||
|
||||
$db->expects( $this->any() )
|
||||
->method( 'getHeartbeatData' )
|
||||
->with( 172 )
|
||||
->will( $this->returnValue( array( $ptTimeISO, $now ) ) );
|
||||
|
||||
$db->setLBInfo( 'clusterMasterHost', 'db1052' );
|
||||
$lagEst = $db->getLag();
|
||||
|
||||
$this->assertGreaterThan( $lag - .010, $lagEst, "Correct heatbeat lag" );
|
||||
$this->assertLessThan( $lag + .010, $lagEst, "Correct heatbeat lag" );
|
||||
}
|
||||
|
||||
function provideLagAmounts() {
|
||||
return array(
|
||||
array( 0 ),
|
||||
array( 0.3 ),
|
||||
array( 6.5 ),
|
||||
array( 10.1 ),
|
||||
array( 200.2 ),
|
||||
array( 400.7 ),
|
||||
array( 600.22 ),
|
||||
array( 1000.77 ),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,9 +103,13 @@ class LBFactoryTest extends MediaWikiTestCase {
|
|||
|
||||
$dbw = $lb->getConnection( DB_MASTER );
|
||||
$this->assertTrue( $dbw->getLBInfo( 'master' ), 'master shows as master' );
|
||||
$this->assertEquals(
|
||||
$wgDBserver, $dbw->getLBInfo( 'clusterMasterHost' ), 'cluster master set' );
|
||||
|
||||
$dbr = $lb->getConnection( DB_SLAVE );
|
||||
$this->assertTrue( $dbr->getLBInfo( 'slave' ), 'slave shows as slave' );
|
||||
$this->assertEquals(
|
||||
$wgDBserver, $dbr->getLBInfo( 'clusterMasterHost' ), 'cluster master set' );
|
||||
|
||||
$factory->shutdown();
|
||||
$lb->closeAll();
|
||||
|
|
|
|||
Loading…
Reference in a new issue