wiki.techinc.nl/includes/poolcounter/PoolWorkArticleViewCurrent.php
David Causse ce511406e0 ParserCache: add an option to explicitly trigger links update
Triggering an opportunistic LinksUpdate on every cache miss of the
current revision might not be appropriate in some cases.
Some functions like ContentHandler::getParserOutputForIndexing might
be called after all LinksUpdates but if these functions do explicitely
disallow populating the parser cache via OPT_NO_UPDATE_CACHE we might
enter a case where involved jobs would trigger themselves forever.

It is happening in the case of the CirrusSearch extension that listens
to LinksUpdate and is relying on
ContentHandler::getParserOutputForIndexing to fetch the parser output.

Introduce a new option ParserOutputAccess::OPT_LINKS_UPDATE to be
more intentional on whether such cascading LinksUpdate might occur
or not on cache misses.

Change the default to not trigger a LinksUpdate on every cache miss
and enable it only when rendering the article view (Article::view).
It does not seem ideal that this behavior is owned by the ParserCache
and further refactoring might be needed to separate these concerns.

Bug: T329842
Change-Id: Ib3c3ca935f316ea880ff6c6b393fa80166e42bd3
2023-05-16 11:32:55 +02:00

184 lines
5.7 KiB
PHP

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
use MediaWiki\Logger\Spi as LoggerSpi;
use MediaWiki\Page\PageRecord;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\RevisionRenderer;
use Wikimedia\Rdbms\ILBFactory;
/**
* PoolWorkArticleView for the current revision of a page, using ParserCache.
*
* @internal
*/
class PoolWorkArticleViewCurrent extends PoolWorkArticleView {
/** @var string */
private $workKey;
/** @var PageRecord */
private $page;
/** @var ParserCache */
private $parserCache;
/** @var ILBFactory */
private $lbFactory;
/** @var WikiPageFactory */
private $wikiPageFactory;
/** @var bool Whether it should trigger an opportunistic LinksUpdate or not */
private bool $triggerLinksUpdate;
/**
* @param string $workKey
* @param PageRecord $page
* @param RevisionRecord $revision Revision to render
* @param ParserOptions $parserOptions ParserOptions to use for the parse
* @param RevisionRenderer $revisionRenderer
* @param ParserCache $parserCache
* @param ILBFactory $lbFactory
* @param LoggerSpi $loggerSpi
* @param WikiPageFactory $wikiPageFactory
* @param bool $cacheable Whether it should store the result in cache or not
* @param bool $triggerLinksUpdate Whether it should trigger an opportunistic LinksUpdate or not
*/
public function __construct(
string $workKey,
PageRecord $page,
RevisionRecord $revision,
ParserOptions $parserOptions,
RevisionRenderer $revisionRenderer,
ParserCache $parserCache,
ILBFactory $lbFactory,
LoggerSpi $loggerSpi,
WikiPageFactory $wikiPageFactory,
bool $cacheable = true,
bool $triggerLinksUpdate = false
) {
// TODO: Remove support for partially initialized RevisionRecord instances once
// Article no longer uses fake revisions.
if ( $revision->getPageId() && $revision->getPageId() !== $page->getId() ) {
throw new InvalidArgumentException( '$page parameter mismatches $revision parameter' );
}
parent::__construct( $workKey, $revision, $parserOptions, $revisionRenderer, $loggerSpi );
$this->workKey = $workKey;
$this->page = $page;
$this->parserCache = $parserCache;
$this->lbFactory = $lbFactory;
$this->wikiPageFactory = $wikiPageFactory;
$this->cacheable = $cacheable;
$this->triggerLinksUpdate = $triggerLinksUpdate;
}
/**
* @return Status
*/
public function doWork() {
// Reduce effects of race conditions for slow parses (T48014)
$cacheTime = wfTimestampNow();
$status = $this->renderRevision();
/** @var ParserOutput|null $output */
$output = $status->getValue();
if ( $output ) {
if ( $this->cacheable && $output->isCacheable() ) {
$this->parserCache->save(
$output,
$this->page,
$this->parserOptions,
$cacheTime,
$this->revision->getId()
);
}
if ( $this->triggerLinksUpdate ) {
$this->wikiPageFactory->newFromTitle( $this->page )
->triggerOpportunisticLinksUpdate( $output );
}
}
return $status;
}
/**
* @return Status|false
*/
public function getCachedWork() {
$parserOutput = $this->parserCache->get( $this->page, $this->parserOptions );
$logger = $this->loggerSpi->getLogger( 'PoolWorkArticleView' );
$logger->debug( $parserOutput ? 'parser cache hit' : 'parser cache miss' );
return $parserOutput ? Status::newGood( $parserOutput ) : false;
}
/**
* @param bool $fast Fast stale request
* @return Status|false
*/
public function fallback( $fast ) {
$parserOutput = $this->parserCache->getDirty( $this->page, $this->parserOptions );
$logger = $this->loggerSpi->getLogger( 'dirty' );
if ( !$parserOutput ) {
$logger->info( 'dirty missing' );
return false;
}
if ( $fast ) {
/* Check if the stale response is from before the last write to the
* DB by this user. Declining to return a stale response in this
* case ensures that the user will see their own edit after page
* save.
*
* Note that the CP touch time is the timestamp of the shutdown of
* the save request, so there is a bias towards avoiding fast stale
* responses of potentially several seconds.
*/
$lastWriteTime = $this->lbFactory->getChronologyProtectorTouched();
$cacheTime = MWTimestamp::convert( TS_UNIX, $parserOutput->getCacheTime() );
if ( $lastWriteTime && $cacheTime <= $lastWriteTime ) {
$logger->info(
'declining to send dirty output since cache time ' .
'{cacheTime} is before last write time {lastWriteTime}',
[
'workKey' => $this->workKey,
'cacheTime' => $cacheTime,
'lastWriteTime' => $lastWriteTime,
]
);
// Forget this ParserOutput -- we will request it again if
// necessary in slow mode. There might be a newer entry
// available by that time.
return false;
}
}
$logger->info( $fast ? 'fast dirty output' : 'dirty output', [ 'workKey' => $this->workKey ] );
$status = Status::newGood( $parserOutput );
$status->warning( 'view-pool-dirty-output' );
$status->warning( $fast ? 'view-pool-contention' : 'view-pool-overload' );
return $status;
}
}