rdbms: Remove synchronous pre-send waitForReplication fallback
The `$workCallback` passed from MediaWiki.php to LBFactory is an
optional callback that Rdbms may invoke whenever it expects to idle
for some time before waiting for something.
In practice, this never happens because the main reason it was
introduced was for use in ChronologyProtector, and it was removed from
there last year in commit bd7cf4dce9 (I0456f5d40a).
The code wrapping for it has remained in-place, because there was
still one theoretical caller in LBFactory for when Redis (or some
other ChronologyProtector store) is down.
I propose that if Redis is down, the fallback is that we let
replication happen normally, which we expect in most cases to suffice
by itself, since replication is generally faster than a full
server-client-server RTT. CP logs a warning when this happens, for
operational visibility.
In the past 90 days, there are exactly 2 entries in unsampled WMF
Logstash for `channel:DBReplication message:persistSessionReplicationPositions`
(not including labswiki/cloudweb which were having problems in this
timeframe, failing for related reasons).
Logs for the two matching reqId:
1. 2022-07-14 wikidata POST /w/api.php (referer: -)
> - MediumSpecificBagOStuff::doLock failed due to timeout for
> global:Wikimedia\Rdbms\ChronologyProtector:87f2d18…:v2.
> - …::persistSessionReplicationPositions: failed to save primary
positions for DB cluster(s) s8, cluster27
2. 2022-07-27 enwiki POST /w/index.php (referer: Special:CreateAccount)
> - MediumSpecificBagOStuff::doLock failed due to timeout for
> global:Wikimedia\Rdbms\ChronologyProtector:d19e340…:v2.
> - …::persistSessionReplicationPositions: failed to save primary
positions for DB cluster(s) extension2
Bug: T314434
Change-Id: Id60cf994c7a40c50b487e6281c6bd114ddf13690
This commit is contained in:
parent
2e54519207
commit
7e9650da18
2 changed files with 11 additions and 36 deletions
|
|
@ -634,11 +634,10 @@ class MediaWiki {
|
|||
|
||||
/**
|
||||
* @see MediaWiki::preOutputCommit()
|
||||
* @param callable|null $postCommitWork [default: null]
|
||||
* @since 1.26
|
||||
*/
|
||||
public function doPreOutputCommit( callable $postCommitWork = null ) {
|
||||
self::preOutputCommit( $this->context, $postCommitWork );
|
||||
private function doPreOutputCommit() {
|
||||
self::preOutputCommit( $this->context );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -649,11 +648,11 @@ class MediaWiki {
|
|||
* If there is a significant amount of content to flush, it can be done in $postCommitWork
|
||||
*
|
||||
* @param IContextSource $context
|
||||
* @param callable|null $postCommitWork [default: null]
|
||||
* @param callable|null $postCommitWork Unused as of MediaWiki 1.39
|
||||
* @since 1.27
|
||||
*/
|
||||
public static function preOutputCommit(
|
||||
IContextSource $context, callable $postCommitWork = null
|
||||
IContextSource $context, $postCommitWork = null
|
||||
) {
|
||||
$config = $context->getConfig();
|
||||
$request = $context->getRequest();
|
||||
|
|
@ -702,7 +701,7 @@ class MediaWiki {
|
|||
$cpClientId = null;
|
||||
$lbFactory->shutdown(
|
||||
$lbFactory::SHUTDOWN_NORMAL,
|
||||
$postCommitWork,
|
||||
null,
|
||||
$cpIndex,
|
||||
$cpClientId
|
||||
);
|
||||
|
|
@ -912,24 +911,15 @@ class MediaWiki {
|
|||
// should commit, print the output, do deferred updates, jobs, and profiling.
|
||||
}
|
||||
|
||||
// GUI-ify and stash the page output in MediaWiki::doPreOutputCommit()
|
||||
$buffer = null;
|
||||
$outputWork = static function () use ( $output, &$buffer ) {
|
||||
if ( $buffer === null ) {
|
||||
$buffer = $output->output( true );
|
||||
}
|
||||
|
||||
return $buffer;
|
||||
};
|
||||
|
||||
// Commit any changes in the current transaction round so that:
|
||||
// a) the transaction is not rolled back after success output was already sent
|
||||
// b) error output is not jumbled together with success output in the response
|
||||
$this->doPreOutputCommit( $outputWork );
|
||||
$this->doPreOutputCommit();
|
||||
// If needed, push a deferred update to run jobs after the output is send
|
||||
$this->schedulePostSendJobs();
|
||||
// Ask OutputPage/Skin to render the page output
|
||||
// If no exceptions occurred then send the output since it is safe now
|
||||
$this->outputResponsePayload( $outputWork() );
|
||||
$this->outputResponsePayload( $output->output( true ) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ abstract class LBFactory implements ILBFactory {
|
|||
|
||||
$chronProt = $this->getChronologyProtector();
|
||||
if ( ( $flags & self::SHUTDOWN_NO_CHRONPROT ) != self::SHUTDOWN_NO_CHRONPROT ) {
|
||||
$this->shutdownChronologyProtector( $chronProt, $workCallback, $cpIndex );
|
||||
$this->shutdownChronologyProtector( $chronProt, $cpIndex );
|
||||
$this->replLogger->debug( __METHOD__ . ': finished ChronologyProtector shutdown' );
|
||||
}
|
||||
$cpClientId = $chronProt->getClientId();
|
||||
|
|
@ -705,32 +705,17 @@ abstract class LBFactory implements ILBFactory {
|
|||
* Get and record all of the staged DB positions into persistent memory storage
|
||||
*
|
||||
* @param ChronologyProtector $cp
|
||||
* @param callable|null $workCallback Work to do instead of waiting on syncing positions
|
||||
* @param int|null &$cpIndex DB position key write counter; incremented on update [returned]
|
||||
*/
|
||||
protected function shutdownChronologyProtector(
|
||||
ChronologyProtector $cp, $workCallback, &$cpIndex = null
|
||||
ChronologyProtector $cp, &$cpIndex = null
|
||||
) {
|
||||
// Remark all of the relevant DB primary positions
|
||||
foreach ( $this->getLBsForOwner() as $lb ) {
|
||||
$cp->stageSessionReplicationPosition( $lb );
|
||||
}
|
||||
// Write the positions to the persistent stash
|
||||
$unsavedPositions = $cp->persistSessionReplicationPositions( $cpIndex );
|
||||
if ( $unsavedPositions && $workCallback ) {
|
||||
// Invoke callback in case it did not cache the result yet
|
||||
$workCallback();
|
||||
}
|
||||
// If the positions failed to write to the stash, then wait on the local datacenter
|
||||
// replica DBs to catch up before sending an HTTP response. As long as the request that
|
||||
// caused such DB writes occurred in the primary datacenter, and clients are temporarily
|
||||
// pinned to the primary datacenter after causing DB writes, then this should suffice.
|
||||
foreach ( $this->getLBsForOwner() as $lb ) {
|
||||
$primaryName = $lb->getServerName( $lb->getWriterIndex() );
|
||||
if ( isset( $unsavedPositions[$primaryName] ) ) {
|
||||
$lb->waitForAll( $unsavedPositions[$primaryName] );
|
||||
}
|
||||
}
|
||||
$cp->persistSessionReplicationPositions( $cpIndex );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in a new issue