wiki.techinc.nl/tests/phpunit/integration/includes/parser/Parsoid/LanguageVariantConverterTest.php
daniel e7f21f6e64 HtmlOutputRendererHelper: fall back to page language
HtmlOutputRendererHelper should not crash hard if the ParserOutput has
no language set. ParserOutput may come from a variety of places, we
should be lenient about it not having a language.

However, we should try harder to actually set a language on ParserOutput
if we have one available. So this also updates
PageBundleParserOutputConverter to keep the ParserOutput's language in
sync wit the language header in the PageBundle.

Bug: T349868
Bug: T353689
Bug: T359426
Change-Id: I2edf20dc3b199e22cda2f32bc858c21ca7d8f4bd
2024-03-06 17:18:16 +00:00

259 lines
8.5 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace MediaWiki\Tests\Parser\Parsoid;
use Language;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Parsoid\LanguageVariantConverter;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWikiIntegrationTestCase;
use Wikimedia\Bcp47Code\Bcp47CodeValue;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Parsoid;
/**
* @group Database
* @covers \MediaWiki\Parser\Parsoid\LanguageVariantConverter
*/
class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
public function setUp(): void {
$this->overrideConfigValue( 'UsePigLatinVariant', true );
}
public static function provideConvertPageBundleVariant() {
yield 'No source or base, rely on page language (en)' => [
new PageBundle(
'<p>test language conversion</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
null,
'en-x-piglatin',
null,
'>esttay anguagelay onversioncay<'
];
yield 'Source variant is base language' => [
new PageBundle(
'<p>test language conversion</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'en' ]
),
null,
'en-x-piglatin',
'en',
'>esttay anguagelay onversioncay<'
];
yield 'Source language is null' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'crh' ]
),
null,
'crh-Latn',
null,
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'Source language is explicit' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'crh' ]
),
null,
'crh-Latn',
'crh-Cyrl',
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'Content language is provided via HTTP header' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'crh-Cyrl' ]
),
'crh',
'crh-Latn',
'crh-Cyrl',
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'Content language is variant' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
'crh-Cyrl',
'crh-Latn',
null,
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'No content-language, but source variant provided' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
null,
'crh-Latn',
'crh-Cyrl',
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'Source variant is a base language code' => [
new PageBundle(
'<p>Бутун инсанлар сербестлик, менлик ве укъукъларда мусавий олып дунйагъа келелер.</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
null,
'crh-Latn',
'crh',
'>Butun insanlar serbestlik, menlik ve uquqlarda musaviy olıp dunyağa keleler.</'
];
yield 'Base language does not support variants' => [
new PageBundle(
'<p>Hallo Wereld</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
'nl',
'nl-be',
null,
'>Hallo Wereld<',
false // The output language is currently not indicated. Should be expected to be 'nl' in the future.
];
}
/**
* @dataProvider provideConvertPageBundleVariant
*/
public function testConvertPageBundleVariant(
PageBundle $pageBundle,
$contentLanguage,
$target,
$source,
$expected,
$expectedLanguage = null
) {
if ( $expectedLanguage === null ) {
$expectedLanguage = $target;
}
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
if ( $contentLanguage ) {
$contentLanguage = $this->getLanguageBcp47( $contentLanguage );
$languageVariantConverter->setPageLanguageOverride( $contentLanguage );
}
$target = $this->getLanguageBcp47( $target );
if ( $source ) {
$source = $this->getLanguageBcp47( $source );
}
$outputPageBundle = $languageVariantConverter->convertPageBundleVariant( $pageBundle, $target, $source );
$html = $outputPageBundle->toHtml();
$stripped = preg_replace( ':</?span[^>]*>:', '', $html );
$this->assertStringContainsString( $expected, $stripped );
if ( $expectedLanguage !== false ) {
$this->assertMatchesRegularExpression( "@<meta http-equiv=\"content-language\" content=\"($expectedLanguage)\"/>@i", $html );
$this->assertMatchesRegularExpression( "@^$expectedLanguage@i", $outputPageBundle->headers['content-language'] );
}
$this->assertEquals( Parsoid::defaultHTMLVersion(), $outputPageBundle->version );
}
public function provideConvertParserOutputVariant() {
foreach ( $this->provideConvertPageBundleVariant() as $name => $case ) {
$case[0] = PageBundleParserOutputConverter::parserOutputFromPageBundle( $case[0] );
yield $name => $case;
}
}
/**
* @dataProvider provideConvertParserOutputVariant
*/
public function testConvertParserOutputVariant(
ParserOutput $parserOutput,
$contentLanguage,
$target,
$source,
$expected,
$expectedLanguage = null
) {
if ( $expectedLanguage === null ) {
$expectedLanguage = $target;
}
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
if ( $contentLanguage ) {
$contentLanguage = $this->getLanguageBcp47( $contentLanguage );
$languageVariantConverter->setPageLanguageOverride( $contentLanguage );
}
$target = $this->getLanguageBcp47( $target );
if ( $source ) {
$source = $this->getLanguageBcp47( $source );
}
// Set some misc metadata in $parserOutput so we can verify it was
// preserved.
$parserOutput->setExtensionData( 'my-key', 'my-data' );
$modifiedParserOutput = $languageVariantConverter
->convertParserOutputVariant( $parserOutput, $target, $source );
$this->assertSame( 'my-data', $modifiedParserOutput->getExtensionData( 'my-key' ) );
$html = $modifiedParserOutput->getRawText();
$stripped = preg_replace( ':</?span[^>]*>:', '', $html );
$this->assertStringContainsString( $expected, $stripped );
if ( $expectedLanguage !== false ) {
$this->assertMatchesRegularExpression( "@<meta http-equiv=\"content-language\" content=\"($expectedLanguage)\"/>@i", $html );
}
$extensionData = $modifiedParserOutput
->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY );
$this->assertEquals( Parsoid::defaultHTMLVersion(), $extensionData['version'] );
if ( $expectedLanguage !== false ) {
$this->assertMatchesRegularExpression( "@^$expectedLanguage@i", $extensionData['headers']['content-language'] );
$this->assertSame( $expectedLanguage, (string)$modifiedParserOutput->getLanguage() );
}
}
private function getLanguageBcp47( $bcp47Code ): Language {
$languageFactory = $this->getServiceContainer()->getLanguageFactory();
return $languageFactory->getLanguage( new Bcp47CodeValue( $bcp47Code ) );
}
private function getLanguageVariantConverter( PageIdentity $pageIdentity ): LanguageVariantConverter {
return new LanguageVariantConverter(
$pageIdentity,
$this->getServiceContainer()->getParsoidPageConfigFactory(),
$this->getServiceContainer()->getService( '_Parsoid' ),
$this->getServiceContainer()->getParsoidSiteConfig(),
$this->getServiceContainer()->getTitleFactory(),
$this->getServiceContainer()->getLanguageConverterFactory(),
$this->getServiceContainer()->getLanguageFactory()
);
}
}