Split out new RefreshSecondaryDataUpdate class
Make DerivedPageDataUpdater bundle all the related DataUpdate tasks on page change with a RefreshSecondaryDataUpdate wrapper. If one of the DataUpdate tasks fails, then the entire bundle of updates can be re-run in the form of enqueueing a RefreshLinksJob instance (these jobs are idempotent). If several of the bundled tasks fail, it is easy for DeferredUpdates to know that only one RefreshLinksJob should be enqueued. The goal is to make DataUpdate tasks more reliable and resilient. Most of these deferred update failures are due to ephemeral problems like lock contention. Since the job queue is already able to reliably store and retry jobs, and the time that a regular web request can spend in post-send is more limited, it makes the most sense to just enqueue tasks as jobs if they fail post-send. Make LinkUpdate no longer defined as enqueueable as RefreshLinksJob since they are not very congruent (LinksUpdate only does some of the work that RefreshLinksJob does). Only the wrapper, with the bundle of DataUpdate instances, is congruent to RefreshLinksJob. This change does not itself implement the enqueue-on-failure logic in DeferredUpdates, but is merely a prerequisite. Bug: T206288 Change-Id: I191103c1aeff4c9fedbf524ee387dad9bdf5fab8
This commit is contained in:
parent
c7d3fcebc4
commit
a1f7fd3ada
4 changed files with 141 additions and 40 deletions
|
|
@ -1215,6 +1215,7 @@ $wgAutoloadLocalClasses = [
|
|||
'RefreshImageMetadata' => __DIR__ . '/maintenance/refreshImageMetadata.php',
|
||||
'RefreshLinks' => __DIR__ . '/maintenance/refreshLinks.php',
|
||||
'RefreshLinksJob' => __DIR__ . '/includes/jobqueue/jobs/RefreshLinksJob.php',
|
||||
'RefreshSecondaryDataUpdate' => __DIR__ . '/includes/deferred/RefreshSecondaryDataUpdate.php',
|
||||
'RegexlikeReplacer' => __DIR__ . '/includes/libs/replacers/RegexlikeReplacer.php',
|
||||
'RemexStripTagHandler' => __DIR__ . '/includes/parser/RemexStripTagHandler.php',
|
||||
'RemoveInvalidEmails' => __DIR__ . '/maintenance/removeInvalidEmails.php',
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ namespace MediaWiki\Storage;
|
|||
|
||||
use ApiStashEdit;
|
||||
use CategoryMembershipChangeJob;
|
||||
use RefreshSecondaryDataUpdate;
|
||||
use Content;
|
||||
use ContentHandler;
|
||||
use DataUpdate;
|
||||
|
|
@ -1590,14 +1591,31 @@ class DerivedPageDataUpdater implements IDBAccessObject {
|
|||
$update->setRevision( $legacyRevision );
|
||||
$update->setTriggeringUser( $triggeringUser );
|
||||
}
|
||||
if ( $options['defer'] === false ) {
|
||||
if ( $options['transactionTicket'] !== null ) {
|
||||
}
|
||||
|
||||
if ( $options['defer'] === false ) {
|
||||
foreach ( $updates as $update ) {
|
||||
if ( $update instanceof DataUpdate && $options['transactionTicket'] !== null ) {
|
||||
$update->setTransactionTicket( $options['transactionTicket'] );
|
||||
}
|
||||
$update->doUpdate();
|
||||
} else {
|
||||
DeferredUpdates::addUpdate( $update, $options['defer'] );
|
||||
}
|
||||
} else {
|
||||
$cacheTime = $this->getCanonicalParserOutput()->getCacheTime();
|
||||
// Bundle all of the data updates into a single deferred update wrapper so that
|
||||
// any failure will cause at most one refreshLinks job to be enqueued by
|
||||
// DeferredUpdates::doUpdates(). This is hard to do when there are many separate
|
||||
// updates that are not defined as being related.
|
||||
$update = new RefreshSecondaryDataUpdate(
|
||||
$this->wikiPage,
|
||||
$updates,
|
||||
$options,
|
||||
$cacheTime,
|
||||
$this->loadbalancerFactory->getLocalDomainID()
|
||||
);
|
||||
$update->setRevision( $legacyRevision );
|
||||
$update->setTriggeringUser( $triggeringUser );
|
||||
DeferredUpdates::addUpdate( $update, $options['defer'] );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ use Wikimedia\ScopedCallback;
|
|||
*
|
||||
* See docs/deferred.txt
|
||||
*/
|
||||
class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
|
||||
class LinksUpdate extends DataUpdate {
|
||||
// @todo make members protected, but make sure extensions don't break
|
||||
|
||||
/** @var int Page ID of the article linked from */
|
||||
|
|
@ -1187,39 +1187,4 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
|
|||
|
||||
return $this->db;
|
||||
}
|
||||
|
||||
public function getAsJobSpecification() {
|
||||
if ( $this->user ) {
|
||||
$userInfo = [
|
||||
'userId' => $this->user->getId(),
|
||||
'userName' => $this->user->getName(),
|
||||
];
|
||||
} else {
|
||||
$userInfo = false;
|
||||
}
|
||||
|
||||
if ( $this->mRevision ) {
|
||||
$triggeringRevisionId = $this->mRevision->getId();
|
||||
} else {
|
||||
$triggeringRevisionId = false;
|
||||
}
|
||||
|
||||
return [
|
||||
'wiki' => WikiMap::getWikiIdFromDbDomain( $this->getDB()->getDomainID() ),
|
||||
'job' => new JobSpecification(
|
||||
'refreshLinksPrioritized',
|
||||
[
|
||||
// Reuse the parser cache if it was saved
|
||||
'rootJobTimestamp' => $this->mParserOutput->getCacheTime(),
|
||||
'useRecursiveLinksUpdate' => $this->mRecursive,
|
||||
'triggeringUser' => $userInfo,
|
||||
'triggeringRevisionId' => $triggeringRevisionId,
|
||||
'causeAction' => $this->getCauseAction(),
|
||||
'causeAgent' => $this->getCauseAgent()
|
||||
],
|
||||
[ 'removeDuplicates' => true ],
|
||||
$this->getTitle()
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
117
includes/deferred/RefreshSecondaryDataUpdate.php
Normal file
117
includes/deferred/RefreshSecondaryDataUpdate.php
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
<?php
|
||||
/**
|
||||
* Updater for secondary data after a page edit.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
/**
|
||||
* Update object handling the cleanup of secondary data after a page was edited.
|
||||
*
|
||||
* This makes makes it possible for DeferredUpdates to have retry logic using a single
|
||||
* refreshLinks job if any of the bundled updates fail.
|
||||
*/
|
||||
class RefreshSecondaryDataUpdate extends DataUpdate implements EnqueueableDataUpdate {
|
||||
/** @var WikiPage */
|
||||
private $page;
|
||||
/** @var DeferrableUpdate[] */
|
||||
private $updates;
|
||||
/** @var bool */
|
||||
private $recursive;
|
||||
/** @var string */
|
||||
private $cacheTimestamp;
|
||||
/** @var string Database domain ID */
|
||||
private $domain;
|
||||
|
||||
/** @var Revision|null */
|
||||
private $revision;
|
||||
/** @var User|null */
|
||||
private $user;
|
||||
|
||||
/**
|
||||
* @param WikiPage $page Page we are updating
|
||||
* @param DeferrableUpdate[] $updates Updates from DerivedPageDataUpdater::getSecondaryUpdates()
|
||||
* @param array $options Options map (causeAction, causeAgent, recursive)
|
||||
* @param string $cacheTime Result of ParserOutput::getCacheTime() for the source output
|
||||
* @param string $domain The database domain ID of the wiki the update is for
|
||||
*/
|
||||
function __construct(
|
||||
WikiPage $page,
|
||||
array $updates,
|
||||
array $options,
|
||||
$cacheTime,
|
||||
$domain
|
||||
) {
|
||||
parent::__construct();
|
||||
|
||||
$this->page = $page;
|
||||
$this->updates = $updates;
|
||||
$this->causeAction = $options['causeAction'] ?? 'unknown';
|
||||
$this->causeAgent = $options['causeAgent'] ?? 'unknown';
|
||||
$this->recursive = !empty( $options['recursive'] );
|
||||
$this->cacheTimestamp = $cacheTime;
|
||||
$this->domain = $domain;
|
||||
}
|
||||
|
||||
public function doUpdate() {
|
||||
foreach ( $this->updates as $update ) {
|
||||
$update->doUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the revision corresponding to this LinksUpdate
|
||||
* @param Revision $revision
|
||||
*/
|
||||
public function setRevision( Revision $revision ) {
|
||||
$this->revision = $revision;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the User who triggered this LinksUpdate
|
||||
* @param User $user
|
||||
*/
|
||||
public function setTriggeringUser( User $user ) {
|
||||
$this->user = $user;
|
||||
}
|
||||
|
||||
public function getAsJobSpecification() {
|
||||
return [
|
||||
'wiki' => WikiMap::getWikiIdFromDomain( $this->domain ),
|
||||
'job' => new JobSpecification(
|
||||
'refreshLinksPrioritized',
|
||||
[
|
||||
// Reuse the parser cache if it was saved
|
||||
'rootJobTimestamp' => $this->cacheTimestamp,
|
||||
'useRecursiveLinksUpdate' => $this->recursive,
|
||||
'triggeringUser' => $this->user
|
||||
? [
|
||||
'userId' => $this->user->getId(),
|
||||
'userName' => $this->user->getName()
|
||||
]
|
||||
: false,
|
||||
'triggeringRevisionId' => $this->revision ? $this->revision->getId() : false,
|
||||
'causeAction' => $this->getCauseAction(),
|
||||
'causeAgent' => $this->getCauseAgent()
|
||||
],
|
||||
[ 'removeDuplicates' => true ],
|
||||
$this->page->getTitle()
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue