* ParsoidParser hadn't registered a watcher on ParserOptions so far. Because of this, you can see that the current parser cache key (in deployed production code) doesn't have 'useParsoid=1' in it. Ex: View source on enwiki:Hospet shows that the parser cache key there is "enwiki:parsoid-pcache:idhash:2360619-0!canonical". The only reason this doesn't conflict with legacy parser output is because we use "parsoid-pcache", a diferent cache instance than "pcache" used for legacy parser output. But if/when we decide to use the same parser cache instance, this could cause cache corruptions. With FlaggedRevisions, where a single "stable-pcache" parser cache instance is used, in local testing, this was causing Parsoid HTML to be saved without "useParsoid=1", and so Parsoid HTML was being returned for legacy parser cache requests. * In addition, fix the code in PageBundleParserOutputConverter to copy over internal metadata (which includes used options). This ensures that any tracked parser options aren't lost and the right parser cache key is constructed later on. * Added / updated a number of new tests that verifies that usedOptions is tracked correctly in the useParsoid code paths. The tests fail without the code changes in this patch. Bug: T340703 Bug: T335157 Needed-By: I0e954949768044eea6ec275a36d0d6d7ed457e8e Change-Id: I076d5d362bdfd9d4b2ca8886bf6b30c1a746aee7
145 lines
5.4 KiB
PHP
145 lines
5.4 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Parser\Parsoid;
|
|
|
|
use MediaWiki\Parser\ParserOutputFlags;
|
|
use MediaWikiUnitTestCase;
|
|
use ParserOutput;
|
|
use Wikimedia\Parsoid\Core\PageBundle;
|
|
|
|
/**
|
|
* @covers MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter
|
|
*/
|
|
class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|
/** @dataProvider provideParserOutputFromPageBundle */
|
|
public function testParserOutputFromPageBundle( PageBundle $pageBundle ) {
|
|
$output = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle );
|
|
$this->assertSame( $pageBundle->html, $output->getRawText() );
|
|
|
|
$extensionData = $output->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY );
|
|
$this->assertSame( $pageBundle->mw, $extensionData['mw'] );
|
|
$this->assertSame( $pageBundle->parsoid, $extensionData['parsoid'] );
|
|
$this->assertSame( $pageBundle->headers, $extensionData['headers'] );
|
|
$this->assertSame( $pageBundle->headers['content-language'], $extensionData['headers']['content-language'] );
|
|
$this->assertSame( $pageBundle->version, $extensionData['version'] );
|
|
$this->assertSame( $pageBundle->contentmodel, $extensionData['contentmodel'] );
|
|
}
|
|
|
|
/** @dataProvider provideParserOutputFromPageBundle */
|
|
public function testParserOutputFromPageBundleShouldPreserveMetadata( PageBundle $pageBundle ) {
|
|
// Create a ParserOutput with some metadata properties already set.
|
|
$original = new ParserOutput();
|
|
$original->setExtensionData( 'test-key', 'test-data' );
|
|
$original->setOutputFlag( ParserOutputFlags::NO_GALLERY );
|
|
$original->setPageProperty( 'forcetoc', '' );
|
|
$original->recordOption( 'test1' );
|
|
$original->recordOption( 'test2' );
|
|
|
|
// This should preserve the metadata.
|
|
$output = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $original );
|
|
$this->assertSame( $pageBundle->html, $output->getRawText() );
|
|
|
|
// Check the page bundle data
|
|
$extensionData = $output->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY );
|
|
$this->assertSame( $pageBundle->mw, $extensionData['mw'] );
|
|
$this->assertSame( $pageBundle->parsoid, $extensionData['parsoid'] );
|
|
$this->assertSame( $pageBundle->headers, $extensionData['headers'] );
|
|
$this->assertSame( $pageBundle->headers['content-language'], $extensionData['headers']['content-language'] );
|
|
$this->assertSame( $pageBundle->version, $extensionData['version'] );
|
|
$this->assertSame( $pageBundle->contentmodel, $extensionData['contentmodel'] );
|
|
|
|
// Check our additional metadata properties
|
|
$this->assertSame( 'test-data', $output->getExtensionData( 'test-key' ) );
|
|
$this->assertSame( true, $output->getOutputFlag( ParserOutputFlags::NO_GALLERY ) );
|
|
$this->assertSame( '', $output->getPageProperty( 'forcetoc' ) );
|
|
$this->assertSame( [ 'test1', 'test2' ], $output->getUsedOptions() );
|
|
|
|
// Check that $original and $output can be modified independently of each other
|
|
$original->setText( 'new text version' );
|
|
$this->assertNotSame( 'new text version', $output->getRawText() );
|
|
}
|
|
|
|
public static function provideParserOutputFromPageBundle() {
|
|
yield 'should convert PageBundle containing data-parsoid and data-mw' => [
|
|
new PageBundle(
|
|
'html content',
|
|
[ 'ids' => '1.33' ],
|
|
[ 'ids' => '1.33' ],
|
|
'1.x',
|
|
[ 'content-language' => 'abc' ],
|
|
'testing'
|
|
)
|
|
];
|
|
|
|
yield 'should convert PageBundle that contains no data-parsoid or data-mw' => [
|
|
new PageBundle(
|
|
'html content',
|
|
[],
|
|
[],
|
|
'1.x',
|
|
[ 'content-language' => null ]
|
|
)
|
|
];
|
|
}
|
|
|
|
/** @dataProvider providePageBundleFromParserOutput */
|
|
public function testPageBundleFromParserOutput( ParserOutput $parserOutput ) {
|
|
$pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
|
|
|
|
$this->assertSame( $parserOutput->getRawText(), $pageBundle->html );
|
|
|
|
$extensionData = $parserOutput->getExtensionData(
|
|
PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY
|
|
);
|
|
|
|
$this->assertSame( $extensionData['parsoid'] ?? [], $pageBundle->parsoid );
|
|
$this->assertSame( $extensionData['mw'] ?? [], $pageBundle->mw );
|
|
|
|
// NOTE: We default to "0.0.0" as a fix for T325137. We can go back to null
|
|
// once PageBundle::responseData is more robust.
|
|
$this->assertSame( $extensionData['version'] ?? '0.0.0', $pageBundle->version );
|
|
|
|
$this->assertSame( $extensionData['headers'] ?? [], $pageBundle->headers );
|
|
$this->assertSame( $extensionData['headers']['content-language'], $pageBundle->headers['content-language'] );
|
|
$this->assertSame( $extensionData['contentmodel'] ?? null, $pageBundle->contentmodel );
|
|
}
|
|
|
|
public function providePageBundleFromParserOutput() {
|
|
yield 'should convert ParsoidOutput containing data-parsoid and data-mw' => [
|
|
$this->getParsoidOutput(
|
|
'hello world',
|
|
[
|
|
'parsoid' => [ 'ids' => '1.22' ],
|
|
'mw' => [],
|
|
'version' => '2.x',
|
|
'headers' => [ 'content-language' => 'xyz' ],
|
|
'testing'
|
|
]
|
|
)
|
|
];
|
|
|
|
yield 'should convert ParsoidOutput that does not contain data-parsoid or data-mw' => [
|
|
$this->getParsoidOutput(
|
|
'hello world',
|
|
[
|
|
'parsoid' => null,
|
|
'mw' => null,
|
|
'version' => null,
|
|
'headers' => [ 'content-language' => null ]
|
|
]
|
|
)
|
|
];
|
|
}
|
|
|
|
private function getParsoidOutput(
|
|
string $rawText,
|
|
?array $pageBundleData
|
|
): ParserOutput {
|
|
$parserOutput = new ParserOutput( $rawText );
|
|
$parserOutput->setExtensionData(
|
|
PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY, $pageBundleData
|
|
);
|
|
|
|
return $parserOutput;
|
|
}
|
|
}
|