Hacks to avoid cold cache misses after ParsoidOutputAccess changes

* ParsoidOutputAccess used a 'parsoid' ParserCache instance and did not
  set the 'useParsoid' parser option for tier 2 ParserOutput cache key
  computations.

* ParserOutputAccess uses 'pcache' for legacy parser output and
  'parsoid-pcache' for Parsoid parser output objects based on whether
  'useParsoid' parser option is true or false.

* 'parsoid-pcache' is right now very sparsely populated since useParsoid
  is only used for testing.

* In Ic9b7cc0fcf36, where we make ParsoidOutputAccess a thin wrapper
  over ParserOutputAccess, all Parsoid parser output requests will go
  to ParserOutputAccess's 'parsoid-pcache' instance which is sparsely
  populated and hence will result in a lot of cold cache misses.

* To eliminate this scenario, this patch adds hardcoded hacks to both
  ParserOutputAccess and ParserCache to query the 'parsoid' PC instance
  on cache misses to the 'parsoid-pcache' instance. Over a 3-week
  period, as 'parsoid-pcache' fills up, there will be fewer and fewer
  access to the 'parsoid' PC instance which will also expire. At the
  end of that period, we can remove this hack.

  T347632 tracks removal of these hacks.

* Added new PHP unit test verifying that the hack work as intended.

Bug: T332931
Change-Id: I7f933fd61bf358c6ea0e0c1202231cac618f9e8d
This commit is contained in:
Subramanya Sastry 2023-09-27 19:18:52 -05:00 committed by Daniel Kinzler
parent 23b972c89b
commit 56025174a2
3 changed files with 78 additions and 2 deletions

View file

@ -23,6 +23,7 @@ use IBufferingStatsdDataFactory;
use InvalidArgumentException;
use MediaWiki\Logger\Spi as LoggerSpi;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\RevisionOutputCache;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
@ -232,6 +233,20 @@ class ParserOutputAccess {
$this->localCache[$classCacheKey] = $output;
}
// HACK! If the 'useParsoid' option is set, also look up content
// from the 'parsoid' cache that was used by ParsoidOutputAccess to store
// Parsoid content. This lets us fetch content from previously stored content
// without running into cold cache problems!
// (T347632 tracks removal of this hack)
if ( !$output && $parserOptions->getUseParsoid() ) {
if ( $useCache === self::CACHE_PRIMARY ) {
$fallbackParsoidCache = $this->parserCacheFactory->getParserCache(
ParsoidOutputAccess::PARSOID_PARSER_CACHE_NAME
);
$output = $fallbackParsoidCache->get( $page, $parserOptions );
}
}
if ( $output ) {
$this->statsDataFactory->increment( "ParserOutputAccess.Cache.$useCache.hit" );
} else {

View file

@ -287,6 +287,17 @@ class ParserCache {
array $usedOptions = null
): string {
$usedOptions ??= ParserOptions::allCacheVaryingOptions();
// HACK! Ignore the 'useParsoid' option when we are querying content
// from the 'parsoid' cache that was used by ParsoidOutputAccess to store
// Parsoid content (without setting 'useParsoid' option in ParerOptions).
// During this migration away from ParsoidOutputAccess to ParserOutputAccess,
// this lets us fetch content from previously stored content without running
// into cold cache problems!
// (T347632 tracks removal of this hack)
if ( $this->name === 'parsoid' ) {
$usedOptions = array_diff( $usedOptions, [ 'useParsoid' ] );
}
// idhash seem to mean 'page id' + 'rendering hash' (r3710)
$pageid = $page->getId( PageRecord::LOCAL );
$title = $this->titleFactory->newFromPageIdentity( $page );

View file

@ -70,9 +70,9 @@ class ParserOutputAccessTest extends MediaWikiIntegrationTestCase {
$this->assertNotSame( $this->getHtml( $expected ), $this->getHtml( $actual ), $msg );
}
private function getParserCache( $bag = null ) {
private function getParserCache( BagOStuff $bag = null, string $name = 'test' ): ParserCache {
$parserCache = new ParserCache(
'test',
$name,
$bag ?: new HashBagOStuff(),
'19900220000000',
$this->getServiceContainer()->getHookContainer(),
@ -800,6 +800,9 @@ class ParserOutputAccessTest extends MediaWikiIntegrationTestCase {
$caches = [
$this->getParserCache( new HashBagOStuff() ),
$this->getParserCache( new HashBagOStuff() ),
// Needed to support T332931 hacks
// Will be removed by T347632
$this->getParserCache( new HashBagOStuff() )
];
$calls = [];
$parserCacheFactory
@ -892,4 +895,51 @@ class ParserOutputAccessTest extends MediaWikiIntegrationTestCase {
$this->assertNotEquals( $notParsoid, $parsoid, "Should use different caches" );
$this->assertEquals( array_fill( 0, count( $calls ), $parsoid ), $calls );
}
public function testT332931Hacks() {
// Cache used by ParsoidOutputAccess
$parsoidParserCache = $this->getParserCache( new HashBagOStuff(), 'parsoid' );
// Caches used by ParserOutputAccess
$parserCacheFactory = $this->createMock( ParserCacheFactory::class );
$legacyParserCache = $this->getParserCache( new HashBagOStuff(), 'pcache' );
$otherParsoidParserCache = $this->getParserCache( new HashBagOStuff(), 'parsoid-pcache' );
$calls = [];
$parserCacheFactory->method( 'getParserCache' )
->willReturnCallback( static function ( $cacheName ) use ( $parsoidParserCache, $otherParsoidParserCache, $legacyParserCache ) {
if ( $cacheName === 'parsoid' ) {
return $parsoidParserCache;
} elseif ( $cacheName === 'parsoid-pcache' ) {
return $otherParsoidParserCache;
} else {
return $legacyParserCache;
}
} );
$revisionCache = $this->getRevisionOutputCache( new HashBagOStuff() );
$parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn( $revisionCache );
$access = $this->getParserOutputAccessWithCacheFactory( $parserCacheFactory );
// No hit when first accessed
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
$this->editPage( $page, 'Hello World' );
$output = $access->getCachedParserOutput( $page, $parserOptions );
$this->assertNull( $output );
// Now store it in the ParosidOutputAccess ParserCache
$cachedOutput = new ParserOutput( 'Cached Text' );
$parsoidParserCache->save( $cachedOutput, $page, $parserOptions );
// Try again -- the hacks should not kick in
// because this does not set useParsoid
$output = $access->getCachedParserOutput( $page, $parserOptions );
$this->assertNull( $output );
// Try again with useParsoid set! -- the hacks should kick in
$parserOptions->setUseParsoid();
$output = $access->getCachedParserOutput( $page, $parserOptions );
$this->assertNotNull( $output );
}
}