Fix use of stale backlink partition cache

If a template is created, and then used on a page, and then the template
is updated, all within an hour, then the page_touched of the page is
never updated and the user will always see the old template contents.
This is because htmlCacheUpdate jobs are fully suppressed for one hour
following template creation, due to the WAN backlink partition cache.

So:

* Revert 4f3efbf406, so that htmlCacheUpdate jobs always do
  something regardless of the state of the partition cache.
* Factor out the job queueing parts of WikiPage::onArticleCreate,
  ::onArticleDelete and ::onArticleEdit. Instead of queueing a job
  unconditionally, check for the existence of backlinks in a post-send
  deferred update. If there are none, don't queue the job.
* It's convenient to use BacklinkCache::hasLinks(), however, it suffered
  from the same stale cache problem as BacklinkCache::partition(). It's
  a short and fast query, and code review shows that none of the callers
  are particularly performance sensitive. So, do not use the WAN cache
  in BacklinkCache::hasLinks().
* Since hasLinks() and getNumLinks() no longer share a significant
  amount of code, separate them. Remove the $max parameter from
  getNumLinks(), which only existed to support hasLinks() and has no
  other usages in codesearch.
* Log a debug message when entering the post-send request stage, so that
  it's easier to confirm that no additional pre-send queries are done.
* Add a regression test, confirmed to previously fail.

Bug: T368006
Change-Id: Id5c7af6d4fcdbeb6724a9036133742c5f76624df
This commit is contained in:
Tim Starling 2024-06-25 11:22:35 +10:00
parent 7804c43be4
commit 0837da9484
5 changed files with 143 additions and 92 deletions

View file

@ -1089,6 +1089,7 @@ abstract class MediaWikiEntryPoint {
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
@flush();
}
wfDebug( "Output buffer flushed" );
}
/**
@ -1230,7 +1231,9 @@ abstract class MediaWikiEntryPoint {
return false;
}
return $this->environment->fastCgiFinishRequest();
$success = $this->environment->fastCgiFinishRequest();
wfDebug( $success ? 'FastCGI request finished' : 'FastCGI request finish failed' );
return $success;
}
/**

View file

@ -91,6 +91,13 @@ class BacklinkCache {
*/
protected $fullResultCache = [];
/**
* Cache for hasLinks()
*
* @var bool[]
*/
private $hasLinksCache = [];
/** @var WANObjectCache */
protected $wanCache;
@ -331,61 +338,60 @@ class BacklinkCache {
}
/**
* Check if there are any backlinks
* Check if there are any backlinks. Only use the process cache, since the
* WAN cache is potentially stale (T368006).
*
* @param string $table
* @return bool
*/
public function hasLinks( $table ) {
return ( $this->getNumLinks( $table, 1 ) > 0 );
if ( isset( $this->hasLinksCache[$table] ) ) {
return $this->hasLinksCache[$table];
}
if ( isset( $this->partitionCache[$table] ) ) {
$entry = reset( $this->partitionCache[$table] );
return (bool)$entry['numRows'];
}
if ( isset( $this->fullResultCache[$table] ) ) {
return (bool)$this->fullResultCache[$table]->numRows();
}
$hasLinks = (bool)$this->queryLinks( $table, false, false, 1 )->numRows();
$this->hasLinksCache[$table] = $hasLinks;
return $hasLinks;
}
/**
* Get the approximate number of backlinks
* @param string $table
* @param int|float $max Only count up to this many backlinks, or INF for no max
* @return int
*/
public function getNumLinks( $table, $max = INF ) {
public function getNumLinks( $table ) {
if ( isset( $this->partitionCache[$table] ) ) {
$entry = reset( $this->partitionCache[$table] );
return min( $max, $entry['numRows'] );
return $entry['numRows'];
}
if ( isset( $this->fullResultCache[$table] ) ) {
return min( $max, $this->fullResultCache[$table]->numRows() );
return $this->fullResultCache[$table]->numRows();
}
$count = $this->wanCache->getWithSetCallback(
return $this->wanCache->getWithSetCallback(
$this->wanCache->makeKey(
'numbacklinks',
CacheKeyHelper::getKeyForPage( $this->page ),
$table
),
self::CACHE_EXPIRY,
function ( $oldValue, &$ttl, array &$setOpts ) use ( $table, $max ) {
function ( $oldValue, &$ttl, array &$setOpts ) use ( $table ) {
$setOpts += Database::getCacheSetOptions( $this->getDB() );
if ( is_infinite( $max ) ) {
// Use partition() since it will batch the query and skip the JOIN.
// Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
$batchSize = $this->options->get( MainConfigNames::UpdateRowsPerJob );
$this->partition( $table, $batchSize );
$value = $this->partitionCache[$table][$batchSize]['numRows'];
} else {
// Fetch the full title info, since the caller will likely need it.
// Cache the row count if the result set limit made no difference.
$value = iterator_count( $this->getLinkPages( $table, false, false, $max ) );
if ( $value >= $max ) {
$ttl = WANObjectCache::TTL_UNCACHEABLE;
}
}
return $value;
// Use partition() since it will batch the query and skip the JOIN.
// Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
$batchSize = $this->options->get( MainConfigNames::UpdateRowsPerJob );
$this->partition( $table, $batchSize );
return $this->partitionCache[$table][$batchSize]['numRows'];
}
);
return min( $max, $count );
}
/**
@ -437,7 +443,7 @@ class BacklinkCache {
// Merge the link count and range partitions for this chunk
$value['numRows'] += $partitions['numRows'];
$value['batches'] = array_merge( $value['batches'], $partitions['batches'] );
if ( count( $partitions['batches'] ) ) {
if ( $partitions['numRows'] ) {
[ , $lEnd ] = end( $partitions['batches'] );
$start = $lEnd + 1; // pick up after this inclusive range
}
@ -463,10 +469,18 @@ class BacklinkCache {
* @return array
*/
protected function partitionResult( $res, $batchSize, $isComplete = true ) {
$batches = [];
$numRows = $res->numRows();
$numBatches = ceil( $numRows / $batchSize );
if ( $numRows === 0 ) {
// Return a single batch that covers all IDs (T368006)
return [
'numRows' => 0,
'batches' => [ [ false, false ] ]
];
}
$numBatches = ceil( $numRows / $batchSize );
$batches = [];
for ( $i = 0; $i < $numBatches; $i++ ) {
if ( $i == 0 && $isComplete ) {
$start = false;

View file

@ -2484,23 +2484,11 @@ class WikiPage implements Stringable, Page, PageRecord {
$services->getLinkCache()->invalidateTitle( $title );
// Invalidate caches of articles which include this page
$jobs = [];
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'templatelinks',
[ 'causeAction' => 'create-page' ]
DeferredUpdates::addCallableUpdate(
static function () use ( $title, $maybeIsRedirect ) {
self::queueBacklinksJobs( $title, true, $maybeIsRedirect, 'create-page' );
}
);
// Images
if ( $maybeIsRedirect && $title->getNamespace() === NS_FILE ) {
// Process imagelinks when the file page was created as a redirect
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'imagelinks',
[ 'causeAction' => 'create-page' ]
);
}
$services->getJobQueueGroup()->lazyPush( $jobs );
if ( $title->getNamespace() === NS_CATEGORY ) {
// Load the Category object, which will schedule a job to create
@ -2538,21 +2526,9 @@ class WikiPage implements Stringable, Page, PageRecord {
}
// Invalidate caches of articles which include this page
$jobs = [];
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'templatelinks',
[ 'causeAction' => 'delete-page' ]
);
// Images
if ( $title->getNamespace() === NS_FILE ) {
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'imagelinks',
[ 'causeAction' => 'delete-page' ]
);
}
$services->getJobQueueGroup()->lazyPush( $jobs );
DeferredUpdates::addCallableUpdate( static function () use ( $title ) {
self::queueBacklinksJobs( $title, true, true, 'delete-page' );
} );
// User talk pages
if ( $title->getNamespace() === NS_USER_TALK ) {
@ -2589,35 +2565,18 @@ class WikiPage implements Stringable, Page, PageRecord {
) {
// TODO: move this into a PageEventEmitter service
$jobs = [];
if ( $slotsChanged === null || in_array( SlotRecord::MAIN, $slotsChanged ) ) {
// Invalidate caches of articles which include this page.
// Only for the main slot, because only the main slot is transcluded.
// TODO: MCR: not true for TemplateStyles! [SlotHandler]
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'templatelinks',
[ 'causeAction' => 'edit-page' ]
);
}
// Images
if ( $maybeRedirectChanged && $title->getNamespace() === NS_FILE ) {
// Process imagelinks in case the redirect target has changed
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'imagelinks',
[ 'causeAction' => 'edit-page' ]
);
}
// Invalidate the caches of all pages which redirect here
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'redirect',
[ 'causeAction' => 'edit-page' ]
DeferredUpdates::addCallableUpdate(
static function () use ( $title, $slotsChanged, $maybeRedirectChanged ) {
self::queueBacklinksJobs(
$title,
$slotsChanged === null || in_array( SlotRecord::MAIN, $slotsChanged ),
$maybeRedirectChanged,
'edit-page'
);
}
);
$services = MediaWikiServices::getInstance();
$services->getJobQueueGroup()->lazyPush( $jobs );
$services = MediaWikiServices::getInstance();
$services->getLinkCache()->invalidateTitle( $title );
$hcu = MediaWikiServices::getInstance()->getHtmlCacheUpdater();
@ -2633,6 +2592,49 @@ class WikiPage implements Stringable, Page, PageRecord {
self::purgeInterwikiCheckKey( $title );
}
private static function queueBacklinksJobs(
Title $title, $mainSlotChanged, $maybeRedirectChanged, $causeAction
) {
$services = MediaWikiServices::getInstance();
$backlinkCache = $services->getBacklinkCacheFactory()->getBacklinkCache( $title );
$jobs = [];
if ( $mainSlotChanged
&& $backlinkCache->hasLinks( 'templatelinks' )
) {
// Invalidate caches of articles which include this page.
// Only for the main slot, because only the main slot is transcluded.
// TODO: MCR: not true for TemplateStyles! [SlotHandler]
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'templatelinks',
[ 'causeAction' => $causeAction ]
);
}
// Images
if ( $maybeRedirectChanged && $title->getNamespace() === NS_FILE
&& $backlinkCache->hasLinks( 'imagelinks' )
) {
// Process imagelinks in case the redirect target has changed
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'imagelinks',
[ 'causeAction' => $causeAction ]
);
}
// Invalidate the caches of all pages which redirect here
if ( $backlinkCache->hasLinks( 'redirect' ) ) {
$jobs[] = HTMLCacheUpdateJob::newForBacklinks(
$title,
'redirect',
[ 'causeAction' => $causeAction ]
);
}
if ( $jobs ) {
$services->getJobQueueGroup()->push( $jobs );
}
}
/**
* Purge the check key for cross-wiki cache entries referencing this page
*

View file

@ -1429,4 +1429,37 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
$this->assertIsObject( $cached );
}
/**
* Helper for testTemplateUpdate
*
* @param WikiPage $page
* @param string $content
*/
private function editAndUpdate( $page, $content ) {
$this->createRevision( $page, $content );
$this->getServiceContainer()->resetServiceForTesting( 'BacklinkCacheFactory' );
$this->runJobs();
}
/**
* Regression test for T368006
*/
public function testTemplateUpdate() {
$clock = MWTimestamp::convert( TS_UNIX, '20100101000000' );
MWTimestamp::setFakeTime( static function () use ( &$clock ) {
return $clock++;
} );
$template = $this->getPage( 'Template:TestTemplateUpdate' );
$page = $this->getPage( 'TestTemplateUpdate' );
$this->editAndUpdate( $template, '1' );
$this->editAndUpdate( $page, '{{TestTemplateUpdate}}' );
$oldTouched = $page->getTouched();
$page->clear();
$this->editAndUpdate( $template, '2' );
$newTouched = $page->getTouched();
$this->assertGreaterThan( $oldTouched, $newTouched );
}
}

View file

@ -51,7 +51,6 @@ class BacklinkCacheTest extends MediaWikiIntegrationTestCase {
public static function provideCasesForGetNumLinks() {
return [
[ 4, 'BacklinkCacheTest_1', 'pagelinks' ],
[ 1, 'BacklinkCacheTest_1', 'pagelinks', 1 ],
[ 0, 'BacklinkCacheTest_2', 'pagelinks' ],
[ 1, 'Image:test.png', 'imagelinks' ],
];
@ -61,10 +60,10 @@ class BacklinkCacheTest extends MediaWikiIntegrationTestCase {
* @dataProvider provideCasesForGetNumLinks
* @covers \MediaWiki\Cache\BacklinkCache::getNumLinks
*/
public function testGetNumLinks( int $numLinks, string $title, string $table, $max = INF ) {
public function testGetNumLinks( int $numLinks, string $title, string $table ) {
$blcFactory = $this->getServiceContainer()->getBacklinkCacheFactory();
$backlinkCache = $blcFactory->getBacklinkCache( Title::newFromText( $title ) );
$this->assertEquals( $numLinks, $backlinkCache->getNumLinks( $table, $max ) );
$this->assertEquals( $numLinks, $backlinkCache->getNumLinks( $table ) );
}
public static function provideCasesForGetLinks() {