jobqueue: Warm parsoid parser cache using a job
This patch introduces ParsoidParserCachePrewarm job that is used to warm PC with parsoid outputs in order to speed up page reads on large wikis. Bug: T322427 Change-Id: Ib63a02d3cf5348b36f4f166ff6939f4d2e7fef6f
This commit is contained in:
parent
f985fc8363
commit
99d989087d
8 changed files with 209 additions and 45 deletions
|
|
@ -2128,6 +2128,7 @@ $wgAutoloadLocalClasses = [
|
|||
'ParserFactory' => __DIR__ . '/includes/parser/ParserFactory.php',
|
||||
'ParserOptions' => __DIR__ . '/includes/parser/ParserOptions.php',
|
||||
'ParserOutput' => __DIR__ . '/includes/parser/ParserOutput.php',
|
||||
'ParsoidCachePrewarmJob' => __DIR__ . '/includes/jobqueue/jobs/ParsoidCachePrewarmJob.php',
|
||||
'ParsoidVirtualRESTService' => __DIR__ . '/includes/libs/virtualrest/ParsoidVirtualRESTService.php',
|
||||
'Password' => __DIR__ . '/includes/password/Password.php',
|
||||
'PasswordError' => __DIR__ . '/includes/password/PasswordError.php',
|
||||
|
|
|
|||
|
|
@ -6889,6 +6889,7 @@ config-schema:
|
|||
revertedTagUpdate: RevertedTagUpdateJob
|
||||
'null': NullJob
|
||||
userEditCountInit: UserEditCountInitJob
|
||||
parsoidCachePrewarm: ParsoidCachePrewarmJob
|
||||
type: object
|
||||
description: |-
|
||||
Maps jobs to their handlers; extensions
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ use MWOldPassword;
|
|||
use MWSaltedPassword;
|
||||
use NamespaceInfo;
|
||||
use NullJob;
|
||||
use ParsoidCachePrewarmJob;
|
||||
use PatrolLogFormatter;
|
||||
use ProtectLogFormatter;
|
||||
use PublishStashedFileJob;
|
||||
|
|
@ -11071,6 +11072,7 @@ class MainConfigSchema {
|
|||
'revertedTagUpdate' => RevertedTagUpdateJob::class,
|
||||
'null' => NullJob::class,
|
||||
'userEditCountInit' => UserEditCountInitJob::class,
|
||||
'parsoidCachePrewarm' => ParsoidCachePrewarmJob::class,
|
||||
],
|
||||
'type' => 'map',
|
||||
];
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ use MWUnknownContentModelException;
|
|||
use ParserCache;
|
||||
use ParserOptions;
|
||||
use ParserOutput;
|
||||
use ParsoidCachePrewarmJob;
|
||||
use Psr\Log\LoggerAwareInterface;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Psr\Log\NullLogger;
|
||||
|
|
@ -1862,49 +1863,12 @@ class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface, P
|
|||
// If we enable cache warming with parsoid outputs, let's do it at the same
|
||||
// time we're populating the parser cache with pre-generated HTML.
|
||||
if ( $this->warmParsoidParserCache ) {
|
||||
$this->doParsoidCacheUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public function doParsoidCacheUpdate() {
|
||||
$this->assertHasRevision( __METHOD__ );
|
||||
|
||||
$wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead
|
||||
$rev = $this->getRevision();
|
||||
$parserOpts = $this->getCanonicalParserOptions();
|
||||
|
||||
[ $causeAction, ] = $this->getCause();
|
||||
$parserOpts->setRenderReason( $causeAction );
|
||||
|
||||
$mainSlot = $rev->getSlot( SlotRecord::MAIN );
|
||||
if ( !$this->parsoidOutputAccess->supportsContentModel( $mainSlot->getModel() ) ) {
|
||||
$this->logger->debug( __METHOD__ . ': Parsoid does not support content model ' . $mainSlot->getModel() );
|
||||
return;
|
||||
}
|
||||
|
||||
// Make sure that ParsoidOutputAccess recognizes the revision as the current one.
|
||||
Assert::precondition(
|
||||
$wikiPage->getLatest() === $rev->getId(),
|
||||
'The ID of the new revision must match the page\'s current revision ID'
|
||||
);
|
||||
|
||||
$this->logger->debug( __METHOD__ . ': generating Parsoid output' );
|
||||
|
||||
// getParserOutput() will write to ParserCache
|
||||
$status = $this->parsoidOutputAccess->getParserOutput(
|
||||
$wikiPage,
|
||||
$parserOpts,
|
||||
$rev,
|
||||
ParsoidOutputAccess::OPT_FORCE_PARSE
|
||||
| ParsoidOutputAccess::OPT_LOG_LINT_DATA
|
||||
);
|
||||
|
||||
if ( !$status->isOK() ) {
|
||||
$this->logger->error( __METHOD__ . ': Parsoid error', [
|
||||
'errors' => $status->getErrors(),
|
||||
'page' => $wikiPage->getTitle()->getPrefixedText(),
|
||||
'rev' => $rev->getId(),
|
||||
] );
|
||||
$this->jobQueueGroup->lazyPush(
|
||||
ParsoidCachePrewarmJob::newSpec(
|
||||
$this->revision->getId(),
|
||||
$wikiPage->getId()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2055,6 +2055,7 @@ return [
|
|||
'revertedTagUpdate' => 'RevertedTagUpdateJob',
|
||||
'null' => 'NullJob',
|
||||
'userEditCountInit' => 'UserEditCountInitJob',
|
||||
'parsoidCachePrewarm' => 'ParsoidCachePrewarmJob',
|
||||
],
|
||||
'JobTypesExcludedFromDefaultQueue' => [
|
||||
0 => 'AssembleUploadChunks',
|
||||
|
|
|
|||
110
includes/jobqueue/jobs/ParsoidCachePrewarmJob.php
Normal file
110
includes/jobqueue/jobs/ParsoidCachePrewarmJob.php
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
use MediaWiki\Logger\LoggerFactory;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
|
||||
use MediaWiki\Revision\SlotRecord;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* @ingroup JobQueue
|
||||
*/
|
||||
class ParsoidCachePrewarmJob extends Job implements GenericParameterJob {
|
||||
private LoggerInterface $logger;
|
||||
private ParsoidOutputAccess $parsoidOutputAccess;
|
||||
|
||||
public function __construct( array $params ) {
|
||||
parent::__construct( 'parsoidCachePrewarm', $params );
|
||||
|
||||
$this->parsoidOutputAccess = MediaWikiServices::getInstance()->getParsoidOutputAccess();
|
||||
$this->logger = LoggerFactory::getInstance( 'ParsoidCachePrewarmJob' );
|
||||
}
|
||||
|
||||
public static function newSpec(
|
||||
int $revisionId,
|
||||
int $pageId
|
||||
): JobSpecification {
|
||||
return new JobSpecification(
|
||||
'parsoidCachePrewarm',
|
||||
[
|
||||
'revId' => $revisionId,
|
||||
'pageId' => $pageId
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
private function doParsoidCacheUpdate() {
|
||||
$services = MediaWikiServices::getInstance();
|
||||
$page = $services->getPageStore()->getPageById( $this->params['pageId'] );
|
||||
$revId = $this->params['revId'];
|
||||
|
||||
if ( $page === null ) {
|
||||
// This happens when the page got deleted in the meantime.
|
||||
$this->logger->info( "Page with ID {$this->params['pageId']} not found" );
|
||||
return;
|
||||
}
|
||||
|
||||
if ( $page->getLatest() !== $revId ) {
|
||||
$this->logger->info(
|
||||
'ParsoidCachePrewarmJob: The ID of the new revision does not match the page\'s current revision ID'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
$rev = $services->getRevisionLookup()->getRevisionById( $revId );
|
||||
if ( !$rev ) {
|
||||
return;
|
||||
}
|
||||
|
||||
$parserOpts = ParserOptions::newFromAnon();
|
||||
|
||||
$mainSlot = $rev->getSlot( SlotRecord::MAIN );
|
||||
if ( !$this->parsoidOutputAccess->supportsContentModel( $mainSlot->getModel() ) ) {
|
||||
$this->logger->debug( __METHOD__ . ': Parsoid does not support content model ' . $mainSlot->getModel() );
|
||||
return;
|
||||
}
|
||||
|
||||
$this->logger->debug( __METHOD__ . ': generating Parsoid output' );
|
||||
|
||||
// getParserOutput() will write to ParserCache
|
||||
$status = $this->parsoidOutputAccess->getParserOutput(
|
||||
$page,
|
||||
$parserOpts,
|
||||
$rev,
|
||||
ParsoidOutputAccess::OPT_FORCE_PARSE
|
||||
| ParsoidOutputAccess::OPT_LOG_LINT_DATA
|
||||
);
|
||||
|
||||
if ( !$status->isOK() ) {
|
||||
$this->logger->error( __METHOD__ . ': Parsoid error', [
|
||||
'errors' => $status->getErrors(),
|
||||
'page' => $page->getDBkey(),
|
||||
'rev' => $rev->getId(),
|
||||
] );
|
||||
}
|
||||
}
|
||||
|
||||
public function run() {
|
||||
$this->doParsoidCacheUpdate();
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1251,7 +1251,7 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
|
|||
|
||||
/**
|
||||
* @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParserCacheUpdate()
|
||||
* @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParsoidCacheUpdate()
|
||||
* @covers ParsoidCachePrewarmJob::doParsoidCacheUpdate()
|
||||
*/
|
||||
public function testDoParserCacheUpdate() {
|
||||
$this->overrideConfigValue(
|
||||
|
|
@ -1304,6 +1304,9 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
|
|||
TestingAccessWrapper::newFromObject( $page )->setLastEdit( $rev );
|
||||
$updater->doParserCacheUpdate();
|
||||
|
||||
// FIXME: We need to fake the revision store we render the $rev.
|
||||
$this->runJobs( [ 'minJobs' => 2 ], [ 'type' => 'parsoidCachePrewarm' ] );
|
||||
|
||||
// Parsoid cache should have an entry
|
||||
$parsoidCached = $parsoidCache->get( $page, $updater->getCanonicalParserOptions(), true );
|
||||
$this->assertIsObject( $parsoidCached );
|
||||
|
|
@ -1342,7 +1345,7 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
|
|||
|
||||
/**
|
||||
* @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParserCacheUpdate()
|
||||
* @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParsoidCacheUpdate()
|
||||
* @covers ParsoidCachePrewarmJob::doParsoidCacheUpdate()
|
||||
*/
|
||||
public function testDoParserCacheUpdateForJavaScriptContent() {
|
||||
$this->overrideConfigValue(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,82 @@
|
|||
<?php
|
||||
|
||||
use MediaWiki\Page\PageIdentityValue;
|
||||
use MediaWiki\Page\PageRecord;
|
||||
|
||||
/**
|
||||
* @group JobQueue
|
||||
* @group Database
|
||||
*
|
||||
* @license GPL-2.0-or-later
|
||||
*/
|
||||
class ParsoidCachePrewarmJobTest extends MediaWikiIntegrationTestCase {
|
||||
|
||||
private const NON_JOB_QUEUE_EDIT = 'parsoid edit not executed by job queue';
|
||||
private const JOB_QUEUE_EDIT = 'parsoid edit executed by job queue';
|
||||
|
||||
private function getPageIdentity( PageRecord $page ): PageIdentityValue {
|
||||
return PageIdentityValue::localIdentity(
|
||||
$page->getId(),
|
||||
$page->getNamespace(),
|
||||
$page->getDBkey()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ParsoidCachePrewarmJob::doParsoidCacheUpdate
|
||||
* @covers ParsoidCachePrewarmJob::newSpec
|
||||
* @covers ParsoidCachePrewarmJob::run
|
||||
*/
|
||||
public function testRun() {
|
||||
$page = $this->getExistingTestPage( 'ParsoidPrewarmJob' )->toPageRecord();
|
||||
$rev1 = $this->editPage( $page, self::NON_JOB_QUEUE_EDIT )->getNewRevision();
|
||||
|
||||
$parsoidPrewarmJob = new ParsoidCachePrewarmJob(
|
||||
[ 'revId' => $rev1->getId(), 'pageId' => $page->getId() ]
|
||||
);
|
||||
|
||||
// NOTE: calling ->run() will not run the job scheduled in the queue but will
|
||||
// instead call doParsoidCacheUpdate() directly. Will run the job and assert
|
||||
// below.
|
||||
$execStatus = $parsoidPrewarmJob->run();
|
||||
$this->assertTrue( $execStatus );
|
||||
|
||||
$parsoidOutput = $this->getServiceContainer()->getParsoidOutputAccess()->getCachedParserOutput(
|
||||
$this->getPageIdentity( $page ),
|
||||
ParserOptions::newFromAnon(),
|
||||
$rev1
|
||||
);
|
||||
|
||||
// Ensure we have the parsoid output in parser cache as an HTML document
|
||||
$this->assertStringContainsString( '<html', $parsoidOutput->getText() );
|
||||
$this->assertStringContainsString( self::NON_JOB_QUEUE_EDIT, $parsoidOutput->getText() );
|
||||
|
||||
$rev2 = $this->editPage( $page, self::JOB_QUEUE_EDIT )->getNewRevision();
|
||||
$parsoidPrewarmJob = new ParsoidCachePrewarmJob(
|
||||
[ 'revId' => $rev2->getId(), 'pageId' => $page->getId() ]
|
||||
);
|
||||
|
||||
$jobQueueGroup = $this->getServiceContainer()->getJobQueueGroup();
|
||||
$jobQueueGroup->push( $parsoidPrewarmJob );
|
||||
|
||||
// At this point, we have 1 job scheduled for this job type.
|
||||
$this->assertSame( 1, $jobQueueGroup->getQueueSizes()['parsoidCachePrewarm'] );
|
||||
|
||||
// doParsoidCacheUpdate() now with a job queue instead of calling directly.
|
||||
$this->runJobs( [ 'maxJobs' => 1 ], [ 'type' => 'parsoidCachePrewarm' ] );
|
||||
|
||||
// At this point, we have 0 jobs scheduled for this job type.
|
||||
$this->assertSame( 0, $jobQueueGroup->getQueueSizes()['parsoidCachePrewarm'] );
|
||||
|
||||
$parsoidOutput = $this->getServiceContainer()->getParsoidOutputAccess()->getCachedParserOutput(
|
||||
$this->getPageIdentity( $page ),
|
||||
ParserOptions::newFromAnon(),
|
||||
$rev2
|
||||
);
|
||||
|
||||
// Ensure we have the parsoid output in parser cache as an HTML document
|
||||
$this->assertStringContainsString( '<html', $parsoidOutput->getText() );
|
||||
$this->assertStringContainsString( self::JOB_QUEUE_EDIT, $parsoidOutput->getText() );
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in a new issue