LanguageVariantConverter: Use content language code from HTTP header

Use the content language from the header, and give that the highest
priority when identifying the page language.

Bug: T317019
Change-Id: Ibb0671f1b873ef83a4d53824a9c4c17726e68635
This commit is contained in:
Abijeet 2022-10-07 16:08:50 +05:30
parent c0845f10e3
commit 715080cfd5
4 changed files with 82 additions and 11 deletions

View file

@ -1085,6 +1085,11 @@ abstract class ParsoidHandler extends Handler {
->getHTMLTransformFactory()
->getLanguageVariantConverter( $pageIdentity );
$languageVariantConverter->setPageConfig( $pageConfig );
$httpContentLanguage = $attribs['pagelanguage' ] ?? null;
if ( $httpContentLanguage ) {
$languageVariantConverter->setPageContentLanguage( $httpContentLanguage );
}
try {
$out = $languageVariantConverter->convertPageBundleVariant( $pb, $target, $source );
} catch ( InvalidArgumentException $e ) {

View file

@ -44,6 +44,9 @@ class LanguageVariantConverter {
/** @var TitleFactory */
private $titleFactory;
/** @var string */
private $pageContentLanguage;
public function __construct(
PageIdentity $pageIdentity,
PageConfigFactory $pageConfigFactory,
@ -73,6 +76,16 @@ class LanguageVariantConverter {
$this->pageConfig = $pageConfig;
}
/**
* Set the page content language override
*
* @param string $language
* @return void
*/
public function setPageContentLanguage( string $language ) {
$this->pageContentLanguage = $language;
}
/**
* Perform variant conversion on a PageBundle object.
*
@ -157,7 +170,19 @@ class LanguageVariantConverter {
}
private function getPageLanguageCode( PageBundle $pageBundle ): string {
$languageCode = $pageBundle->headers[ 'content-language' ] ?? null;
return $languageCode ?? $this->pageTitle->getPageLanguage()->getCode();
if ( $this->pageContentLanguage ) {
return $this->pageContentLanguage;
}
$pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null;
if ( $pageBundleLanguage ) {
return $pageBundleLanguage;
}
if ( $this->pageConfig ) {
return $this->pageConfig->getPageLanguage();
}
return $this->pageTitle->getPageLanguage()->getCode();
}
}

View file

@ -31,6 +31,7 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
),
'en-x-piglatin',
null,
null,
'>esttay anguagelay onversioncay<'
];
yield 'Source variant is base language' => [
@ -43,6 +44,7 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
),
'en-x-piglatin',
'en',
null,
'>esttay anguagelay onversioncay<'
];
yield 'Source language is null' => [
@ -55,6 +57,7 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
),
'sr-el',
null,
null,
'>Ovo je testna stranica<'
];
yield 'Source language is explicit' => [
@ -67,6 +70,20 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
),
'sr-el',
'sr-ec',
null,
'>Ovo je testna stranica<'
];
yield 'Content language is provided via HTTP header' => [
new PageBundle(
'<p>Ово је тестна страница</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'sr-ec' ]
),
'sr-el',
'sr-ec',
'sr',
'>Ovo je testna stranica<'
];
}
@ -74,9 +91,12 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
/**
* @dataProvider provideConvertPageBundleVariant
*/
public function testConvertPageBundleVariant( PageBundle $pageBundle, $target, $source, $expected ) {
public function testConvertPageBundleVariant( PageBundle $pageBundle, $target, $source, $contentLanguage, $expected ) {
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
if ( $contentLanguage ) {
$languageVariantConverter->setPageContentLanguage( $contentLanguage );
}
$outputPageBundle = $languageVariantConverter->convertPageBundleVariant( $pageBundle, $target, $source );
@ -97,9 +117,12 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
/**
* @dataProvider provideConvertParserOutputVariant
*/
public function testConvertParserOutputVariant( ParserOutput $parserOutput, $target, $source, $expected ) {
public function testConvertParserOutputVariant( ParserOutput $parserOutput, $target, $source, $contentLanguage, $expected ) {
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
if ( $contentLanguage ) {
$languageVariantConverter->setPageContentLanguage( $contentLanguage );
}
$modifiedParserOutput = $languageVariantConverter
->convertParserOutputVariant( $parserOutput, $target, $source );

View file

@ -43,6 +43,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
null,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
@ -66,7 +67,8 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
public function testSourceLanguage(
?string $pageBundleLanguageCode,
string $titleLanguageCode,
?string $sourceLanguageCode
?string $sourceLanguageCode,
?string $contentLanguage
) {
// Decide what should be called and what should not be
$shouldParsoidBeUsed = true;
@ -89,6 +91,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
$contentLanguage,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
@ -99,9 +102,13 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
}
public function provideSourceLanguage() {
yield 'PageBundle language is used when available' => [ 'en', 'en-gb', null ];
yield 'Title page language is used if PageBundle language is not available' => [ null, 'en-ca', null ];
yield 'Source language is used if given' => [ null, 'en-ca', 'en-gb' ];
yield 'Content language is used when available' => [ 'sr-el', 'sr-ec', null, 'sr' ];
yield 'PageBundle language is used when content language is not available' =>
[ 'en', 'en-gb', null, null ];
yield 'Title page language is used if PageBundle and content language are not available' =>
[ null, 'en-ca', null, null ];
yield 'Source language is used if given' =>
[ null, 'en-ca', 'en-gb', null ];
}
/** @dataProvider provideSiteConfiguration */
@ -130,6 +137,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
null,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
@ -157,6 +165,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
* @param bool $shouldPageConfigFactoryBeUsed
* @param bool $isLanguageConversionEnabled
* @param string|null $pageBundleLanguageCode
* @param string|null $contentLanguage
* @param string $titleLanguageCode
* @param string $targetLanguageCode
* @param string|null $sourceLanguageCode
@ -169,13 +178,16 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
bool $shouldPageConfigFactoryBeUsed,
bool $isLanguageConversionEnabled,
?string $pageBundleLanguageCode,
?string $contentLanguage,
string $titleLanguageCode,
string $targetLanguageCode,
?string $sourceLanguageCode,
array $parsoidSettings
): LanguageVariantConverter {
// If PageBundle language code is set, use that else, fallback to title page language
$pageLanguageCode = $pageBundleLanguageCode ?? $titleLanguageCode;
// If Content language is set, use language from there,
// If PageBundle language code is set, use that
// Else, fallback to title page language
$pageLanguageCode = $contentLanguage ?? $pageBundleLanguageCode ?? $titleLanguageCode;
$shouldSiteConfigBeUsed = true;
$parsoidSettings = [];
@ -204,7 +216,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
]
);
return new LanguageVariantConverter(
$languageVariantConverter = new LanguageVariantConverter(
$pageIdentityValue,
$pageConfigFactoryMock,
$parsoidMock,
@ -212,6 +224,12 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$siteConfigMock,
$titleFactoryMock
);
if ( $contentLanguage ) {
$languageVariantConverter->setPageContentLanguage( $contentLanguage );
}
return $languageVariantConverter;
}
// Mock methods follow