Allow sharding in site_stats update

This would be useful in cases that lots of edits happen at the same time
and this would reduce the time waiting to get the row lock.

Bug: T306589
Change-Id: I3b869e3b85dfd66575390ef4247f2f81f19c878e
This commit is contained in:
Amir Sarabadani 2022-04-26 23:09:38 +02:00
parent e46d36c658
commit 5156ae0404
7 changed files with 81 additions and 5 deletions

View file

@ -2176,6 +2176,17 @@ config-schema:
enqueued for later.
There should be no reason to set this in a normal production environment.
@since 1.38
MultiShardSiteStats:
default: false
type: boolean
description: |-
Whether site_stats table should have multiple rows. If set to true, in each update,
one of ten rows gets updated at random to reduce lock wait time in wikis
that have lots of concurrent edits.
It should be set to true in really large wikis with big flow of edits,
Otherwise it can cause inaccuracy in data.
@since 1.39
@warning EXPERIMENTAL!
CacheDirectory:
default: false
description: |-

View file

@ -1412,6 +1412,12 @@ class MainConfigNames {
*/
public const ForceDeferredUpdatesPreSend = 'ForceDeferredUpdatesPreSend';
/**
* Name constant for the MultiShardSiteStats setting, for use with Config::get()
* @see MainConfigSchema::MultiShardSiteStats
*/
public const MultiShardSiteStats = 'MultiShardSiteStats';
/**
* Name constant for the CacheDirectory setting, for use with Config::get()
* @see MainConfigSchema::CacheDirectory

View file

@ -3416,6 +3416,21 @@ class MainConfigSchema {
'default' => false,
];
/**
* Whether site_stats table should have multiple rows. If set to true, in each update,
* one of ten rows gets updated at random to reduce lock wait time in wikis
* that have lots of concurrent edits.
* It should be set to true in really large wikis with big flow of edits,
* Otherwise it can cause inaccuracy in data.
*
* @since 1.39
* @warning EXPERIMENTAL!
*/
public const MultiShardSiteStats = [
'default' => false,
'type' => 'boolean',
];
// endregion -- end performance hacks
/***************************************************************************/

View file

@ -237,15 +237,26 @@ class SiteStats {
/**
* @param IDatabase $db
* @return stdClass|bool
* @return stdClass
*/
private static function doLoadFromDB( IDatabase $db ) {
return $db->selectRow(
$rows = $db->select(
'site_stats',
self::selectFields(),
[ 'ss_row_id' => 1 ],
'*',
__METHOD__
);
$finalRow = new stdClass();
foreach ( $rows as $row ) {
foreach ( self::selectFields() as $field ) {
$finalRow->$field = $finalRow->$field ?? 0;
if ( $row->$field ) {
$finalRow->$field += $row->$field;
}
}
}
return $finalRow;
}
/**

View file

@ -424,6 +424,7 @@ return [
'LinkHolderBatchSize' => 1000,
'MaximumMovedPages' => 100,
'ForceDeferredUpdatesPreSend' => false,
'MultiShardSiteStats' => false,
'CacheDirectory' => false,
'MainCacheType' => 0,
'MessageCacheType' => -1,
@ -2569,6 +2570,7 @@ return [
0 => 'integer',
1 => 'boolean',
],
'MultiShardSiteStats' => 'boolean',
'ObjectCaches' => 'object',
'MainWANCache' => [
0 => 'integer',

View file

@ -1396,6 +1396,12 @@ $wgMaximumMovedPages = null;
*/
$wgForceDeferredUpdatesPreSend = null;
/**
* Config variable stub for the MultiShardSiteStats setting, for use by phpdoc and IDEs.
* @see MediaWiki\MainConfigSchema::MultiShardSiteStats
*/
$wgMultiShardSiteStats = null;
/**
* Config variable stub for the CacheDirectory setting, for use by phpdoc and IDEs.
* @see MediaWiki\MainConfigSchema::CacheDirectory

View file

@ -37,6 +37,9 @@ class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate {
/** @var int */
protected $images = 0;
private const SHARDS_OFF = 1;
private const SHARDS_ON = 10;
/** @var string[] Map of (table column => counter type) */
private const COUNTERS = [
'ss_total_edits' => 'edits',
@ -98,6 +101,8 @@ class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate {
public function doUpdate() {
$services = MediaWikiServices::getInstance();
$stats = $services->getStatsdDataFactory();
$shards = $services->getMainConfig()->get( MainConfigNames::MultiShardSiteStats ) ?
self::SHARDS_ON : self::SHARDS_OFF;
$deltaByType = [];
foreach ( self::COUNTERS as $type ) {
@ -111,16 +116,26 @@ class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate {
( new AutoCommitUpdate(
$services->getDBLoadBalancer()->getConnectionRef( DB_PRIMARY ),
__METHOD__,
static function ( IDatabase $dbw, $fname ) use ( $deltaByType ) {
static function ( IDatabase $dbw, $fname ) use ( $deltaByType, $shards ) {
$set = [];
$initValues = [];
if ( $shards > 1 ) {
$shard = mt_rand( 1, $shards );
} else {
$shard = 1;
}
$hasNegativeDelta = false;
foreach ( self::COUNTERS as $field => $type ) {
$delta = (int)$deltaByType[$type];
$initValues[$field] = $delta;
if ( $delta > 0 ) {
$set[] = "$field=" . $dbw->buildGreatest(
[ $field => $dbw->addIdentifierQuotes( $field ) . '+' . abs( $delta ) ],
0
);
} elseif ( $delta < 0 ) {
$hasNegativeDelta = true;
$set[] = "$field=" . $dbw->buildGreatest(
[ 'new' => $dbw->addIdentifierQuotes( $field ) . '-' . abs( $delta ) ],
0
@ -129,7 +144,17 @@ class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate {
}
if ( $set ) {
$dbw->update( 'site_stats', $set, [ 'ss_row_id' => 1 ], $fname );
if ( $hasNegativeDelta ) {
$dbw->update( 'site_stats', $set, [ 'ss_row_id' => $shard ], $fname );
} else {
$dbw->upsert(
'site_stats',
array_merge( [ 'ss_row_id' => $shard ], $initValues ),
'ss_row_id',
$set,
$fname
);
}
}
}
) )->doUpdate();