HtmlInputTransformHelper: Fall back to ParserCache

If a render ID is given via the use-cache parameter, but the key is not
found in the parsoid stash, look at the most recent known rendering of
the revision, and use it if it matches the render ID.

This patch moves the responsibility for looking up RevisionRecords and
PageRecords into ParsoidOutputAccess. This way, callers only need to
have a PageIdentity, and optionally a revision ID.

Bug: T318395
Change-Id: I1aa5b0fd9fb1acaa2544d5a58125fa3810a0eb39
This commit is contained in:
daniel 2022-09-13 17:52:44 +02:00 committed by Daniel Kinzler
parent 4629d4a588
commit a02be0b3f8
9 changed files with 194 additions and 32 deletions

View file

@ -30,16 +30,21 @@ use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\HTMLTransform;
use MediaWiki\Parser\Parsoid\HTMLTransformFactory;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidRenderID;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\HttpException;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\ResponseInterface;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionRecord;
use MWUnknownContentModelException;
use ParserOptions;
use Wikimedia\Message\MessageValue;
use Wikimedia\ParamValidator\ParamValidator;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
use Wikimedia\Parsoid\Parsoid;
@ -81,6 +86,11 @@ class HtmlInputTransformHelper {
*/
private $parsoidOutputStash;
/**
* @var ParsoidOutputAccess
*/
private $parsoidOutputAccess;
/**
* @var array
*/
@ -90,12 +100,14 @@ class HtmlInputTransformHelper {
* @param IBufferingStatsdDataFactory $statsDataFactory
* @param HTMLTransformFactory $htmlTransformFactory
* @param ParsoidOutputStash $parsoidOutputStash
* @param ParsoidOutputAccess $parsoidOutputAccess
* @param array $envOptions
*/
public function __construct(
IBufferingStatsdDataFactory $statsDataFactory,
HTMLTransformFactory $htmlTransformFactory,
ParsoidOutputStash $parsoidOutputStash,
ParsoidOutputAccess $parsoidOutputAccess,
array $envOptions = []
) {
$this->stats = $statsDataFactory;
@ -105,6 +117,7 @@ class HtmlInputTransformHelper {
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
'offsetType' => 'byte',
];
$this->parsoidOutputAccess = $parsoidOutputAccess;
}
/**
@ -306,7 +319,7 @@ class HtmlInputTransformHelper {
if ( !isset( $body['original']['html'] ) && !empty( $body['original']['renderid'] ) ) {
// If the client asked for a render ID, load original data from stash
try {
$original = $this->fetchOriginalDataFromStash( $body['original']['renderid'] );
$original = $this->fetchOriginalDataFromStash( $page, $body['original']['renderid'] );
} catch ( InvalidArgumentException $ex ) {
throw new HttpException(
'Bad stash key',
@ -432,7 +445,35 @@ class HtmlInputTransformHelper {
$response->getBody()->write( $data );
}
private function fetchOriginalDataFromStash( string $key ): ?array {
private function fetchOriginalDataFromCache( PageIdentity $page, ?int $revId ): ?array {
$parserOptions = ParserOptions::newFromAnon();
try {
$parserOutput = $this->parsoidOutputAccess->getCachedParserOutput(
$page,
$parserOptions,
$revId
);
} catch ( RevisionAccessException $e ) {
// The client supplied bad revision ID, or the revision was deleted or suppressed.
throw new HttpException(
'The specified revision does not exist.',
404,
[ 'reason' => $e->getMessage() ]
);
}
if ( !$parserOutput ) {
return null;
}
$pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
$renderID = $this->parsoidOutputAccess->getParsoidRenderID( $parserOutput );
return $this->makeOriginalDataArrayFromPageBundle( $pb, $renderID );
}
private function fetchOriginalDataFromStash( PageIdentity $page, string $key ): ?array {
if ( preg_match( '!^(W/)?".*"$!', $key ) ) {
$renderID = ParsoidRenderID::newFromETag( $key );
} else {
@ -442,16 +483,45 @@ class HtmlInputTransformHelper {
$pb = $this->parsoidOutputStash->get( $renderID );
if ( !$pb ) {
return null;
// Looks like the rendering is gone from stash (or the client send us a bogus key).
// Try to load it from the parser cache instead.
// On a wiki with low edit frequency, there is a good chance that it's still there.
try {
$original = $this->fetchOriginalDataFromCache( $page, $renderID->getRevisionID() );
} catch ( HttpException $e ) {
// If the revision isn't found, don't trigger a 404. Return null to trigger a 412.
return null;
}
if ( !$original || $original['html']['key'] !== $renderID->getKey() ) {
// Nothing found in the parser cache, or it's not the correct rendering.
return null;
}
return $original;
}
return $this->makeOriginalDataArrayFromPageBundle( $pb, $renderID );
}
/**
* @param PageBundle $pb
* @param ParsoidRenderID $renderID
*
* @return array
*/
private function makeOriginalDataArrayFromPageBundle(
PageBundle $pb,
ParsoidRenderID $renderID
): array {
$original = [
'contentmodel' => $pb->contentmodel,
'revid' => $renderID->getRevisionID(),
'html' => [
'version' => $pb->version,
'headers' => $pb->headers ?: [],
'body' => $pb->html
'body' => $pb->html,
'key' => $renderID->getKey(),
],
'data-parsoid' => [
'body' => $pb->parsoid,

View file

@ -309,6 +309,7 @@ abstract class ParsoidHandler extends Handler {
$services->getStatsdDataFactory(),
$services->getHTMLTransformFactory(),
$services->getParsoidOutputStash(),
$services->getParsoidOutputAccess(),
$attribs['envOptions']
);

View file

@ -1357,6 +1357,7 @@ return [
$services->getMainConfig()
),
$services->getParserCacheFactory(),
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$services->getStatsdDataFactory(),

View file

@ -29,6 +29,7 @@ use MediaWiki\Config\ServiceOptions;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageLookup;
use MediaWiki\Page\PageRecord;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
@ -93,16 +94,19 @@ class ParsoidOutputAccess {
/** @var PageConfigFactory */
private $parsoidPageConfigFactory;
/** @var PageLookup */
private $pageLookup;
/** @var RevisionLookup */
private $revisionLookup;
/**
* @var SiteConfig
*/
/** @var SiteConfig */
private $siteConfig;
/**
* @param ServiceOptions $options
* @param ParserCacheFactory $parserCacheFactory
* @param PageLookup $pageLookup
* @param RevisionLookup $revisionLookup
* @param GlobalIdGenerator $globalIdGenerator
* @param IBufferingStatsdDataFactory $stats
@ -113,6 +117,7 @@ class ParsoidOutputAccess {
public function __construct(
ServiceOptions $options,
ParserCacheFactory $parserCacheFactory,
PageLookup $pageLookup,
RevisionLookup $revisionLookup,
GlobalIdGenerator $globalIdGenerator,
IBufferingStatsdDataFactory $stats,
@ -125,6 +130,7 @@ class ParsoidOutputAccess {
$this->revisionOutputCache = $parserCacheFactory
->getRevisionOutputCache( self::PARSOID_PARSER_CACHE_NAME );
$this->parserCache = $parserCacheFactory->getParserCache( self::PARSOID_PARSER_CACHE_NAME );
$this->pageLookup = $pageLookup;
$this->revisionLookup = $revisionLookup;
$this->globalIdGenerator = $globalIdGenerator;
$this->stats = $stats;
@ -147,32 +153,22 @@ class ParsoidOutputAccess {
}
/**
* @param PageRecord $page
* @param PageIdentity $page
* @param ParserOptions $parserOpts
* @param ?RevisionRecord $revision
* @param RevisionRecord|int|null $revision
* @param int $options See the OPT_XXX constants
*
* @return Status<ParserOutput>
*/
public function getParserOutput(
PageRecord $page,
PageIdentity $page,
ParserOptions $parserOpts,
?RevisionRecord $revision = null,
$revision = null,
int $options = 0
): Status {
$revId = $revision ? $revision->getId() : $page->getLatest();
if ( !$revision ) {
$revision = $this->revisionLookup->getRevisionById( $revId );
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
$isOld = $revision->getId() !== $page->getLatest();
if ( !$revision ) {
throw new RevisionAccessException(
'Revision {revId} not found',
[ 'revId' => $revId ]
);
}
}
$isOld = $revId !== $page->getLatest();
$statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
@ -181,7 +177,7 @@ class ParsoidOutputAccess {
}
if ( !( $options & self::OPT_FORCE_PARSE ) ) {
$parserOutput = $this->getCachedParserOutput(
$parserOutput = $this->getCachedParserOutputInternal(
$page,
$parserOpts,
$revision,
@ -281,6 +277,32 @@ class ParsoidOutputAccess {
return ParsoidRenderID::newFromKey( $renderId );
}
/**
* @param PageIdentity $page
* @param ParserOptions $parserOpts
* @param RevisionRecord|int|null $revision
*
* @return ?ParserOutput
*/
public function getCachedParserOutput(
PageIdentity $page,
ParserOptions $parserOpts,
$revision = null
): ?ParserOutput {
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
$isOld = $revision->getId() !== $page->getLatest();
$statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
return $this->getCachedParserOutputInternal(
$page,
$parserOpts,
$revision,
$isOld,
$statsKey
);
}
/**
* @param PageRecord $page
* @param ParserOptions $parserOpts
@ -290,7 +312,7 @@ class ParsoidOutputAccess {
*
* @return ?ParserOutput
*/
protected function getCachedParserOutput(
protected function getCachedParserOutputInternal(
PageRecord $page,
ParserOptions $parserOpts,
?RevisionRecord $revision,
@ -352,4 +374,41 @@ class ParsoidOutputAccess {
return $status;
}
/**
* @param PageIdentity $page
* @param RevisionRecord|int|null $revision
*
* @return array [ PageRecord $page, RevisionRecord $revision ]
*/
private function resolveRevision( PageIdentity $page, $revision ): array {
if ( !$page instanceof PageRecord ) {
$name = "$page";
$page = $this->pageLookup->getPageByReference( $page );
if ( !$page ) {
throw new RevisionAccessException(
'Page {name} not found',
[ 'name' => $name ]
);
}
}
if ( $revision === null ) {
$revision = $page->getLatest();
}
if ( is_int( $revision ) ) {
$revId = $revision;
$revision = $this->revisionLookup->getRevisionById( $revId );
if ( !$revision ) {
throw new RevisionAccessException(
'Revision {revId} not found',
[ 'revId' => $revId ]
);
}
}
return [ $page, $revision ];
}
}

View file

@ -81,7 +81,8 @@ class HtmlInputTransformHelperTest extends MediaWikiIntegrationTestCase {
$helper = new HtmlInputTransformHelper(
new NullStatsdDataFactory(),
$this->newMockHTMLTransformFactory( $transformMethodOverrides ),
$this->getServiceContainer()->getParsoidOutputStash()
$this->getServiceContainer()->getParsoidOutputStash(),
$this->getServiceContainer()->getParsoidOutputAccess()
);
return $helper;
@ -786,6 +787,13 @@ class HtmlInputTransformHelperTest extends MediaWikiIntegrationTestCase {
$page = $this->getExistingTestPage();
$html = 'whatever';
// Call getParserOutput() to make sure a rendering is in the ParserCache.
// Even though we find a rendering, it should be discarded because it doesn't match
// the ETag.
$access = $this->getServiceContainer()->getParsoidOutputAccess();
$popt = ParserOptions::newFromAnon();
$access->getParserOutput( $page, $popt )->getValue();
$revid = $page->getLatest();
$eTag = "\"$revid/nope-nope-nope\"";
@ -818,7 +826,7 @@ class HtmlInputTransformHelperTest extends MediaWikiIntegrationTestCase {
$helper->getContent();
}
public function testResponseWithUseStashFallbackToParserCache() {
public function testResponseWithRenderIDFallbackToParserCache() {
$page = $this->getExistingTestPage();
$oldWikitext = $page->getContent()->serialize();
@ -830,10 +838,9 @@ class HtmlInputTransformHelperTest extends MediaWikiIntegrationTestCase {
$key = $access->getParsoidRenderID( $pout );
$html = $pout->getRawText();
$body = [ 'html' => $html ];
// Load the original data based on the ETag
$params = [ 'use-stash' => $key ];
$body = [ 'html' => $html, 'original' => [ 'renderid' => $key ] ];
$params = [];
$helper = $this->newHelper();

View file

@ -104,6 +104,7 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
$services->getMainConfig()
),
$parserCacheFactory,
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$services->getStatsdDataFactory(),

View file

@ -1303,7 +1303,8 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
$helper = new HtmlInputTransformHelper(
new NullStatsdDataFactory(),
$factory,
$this->getServiceContainer()->getParsoidOutputStash()
$this->getServiceContainer()->getParsoidOutputStash(),
$this->getServiceContainer()->getParsoidOutputAccess()
);
$helper->init( $page, [ 'html' => $html ], [] );

View file

@ -85,6 +85,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
[ 'ParsoidCacheConfig' => $parsoidCacheConfig ]
),
$parserCacheFactory,
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$stats,
@ -192,7 +193,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* in the parsoid parser cache.
*
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getCachedParserOutput
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getCachedParserOutputInternal
*/
public function testLatestRevisionIsCached() {
$access = $this->getParsoidOutputAccessWithCache( 1 );
@ -204,6 +205,14 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$status = $access->getParserOutput( $page, $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
// Get the ParserOutput from cache
$status = $access->getCachedParserOutput( $page, $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
// Get the ParserOutput from cache, without supplying a PageRecord
$status = $access->getCachedParserOutput( $page->getTitle(), $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
// Get the ParserOutput again, this should not trigger a new parse.
$status = $access->getParserOutput( $page, $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
@ -278,6 +287,14 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$access->getParserOutput( $page, $parserOptions, $rev );
// Get the ParserOutput from cache, using revision object
$status = $access->getCachedParserOutput( $page, $parserOptions, $rev );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
// Get the ParserOutput from cache, using revision ID
$status = $access->getCachedParserOutput( $page->getTitle(), $parserOptions, $rev->getId() );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
// Get the ParserOutput again, this should not trigger a new parse.
$status = $access->getParserOutput( $page, $parserOptions, $rev );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
@ -299,6 +316,10 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$status1 = $access->getParserOutput( $page, $parserOptions, $rev1 );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status1 );
// Again, using just the revision ID
$status1 = $access->getParserOutput( $page, $parserOptions, $rev1->getId() );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status1 );
// check that getParsoidRenderID() doesn't throw
$output1 = $status1->getValue();
$this->assertNotNull( $access->getParsoidRenderID( $output1 ) );

View file

@ -116,6 +116,7 @@ class RevisionHTMLHandlerTest extends MediaWikiIntegrationTestCase {
$services->getMainConfig()
),
$parserCacheFactory,
$services->getPageStore(),
$services->getRevisionLookup(),
$idGenerator,
$services->getStatsdDataFactory(),