2022-10-04 11:54:37 +00:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
|
|
namespace MediaWiki\Parser\Parsoid;
|
|
|
|
|
|
|
|
|
|
|
|
use MediaWiki\MainConfigNames;
|
|
|
|
|
|
use MediaWiki\MainConfigSchema;
|
|
|
|
|
|
use MediaWiki\Page\PageIdentity;
|
|
|
|
|
|
use MediaWikiIntegrationTestCase;
|
|
|
|
|
|
use ParserOutput;
|
|
|
|
|
|
use Wikimedia\Parsoid\Core\PageBundle;
|
|
|
|
|
|
use Wikimedia\Parsoid\Parsoid;
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @group Database
|
|
|
|
|
|
* @covers MediaWiki\Parser\Parsoid\LanguageVariantConverter
|
|
|
|
|
|
*/
|
|
|
|
|
|
class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
|
|
|
|
|
|
public function provideConvertPageBundleVariant() {
|
|
|
|
|
|
yield 'No source or base, rely on page language (en)' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>test language conversion</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
null,
|
2022-10-04 13:34:05 +00:00
|
|
|
|
'en-x-piglatin',
|
2022-10-07 10:38:50 +00:00
|
|
|
|
null,
|
2022-10-04 11:54:37 +00:00
|
|
|
|
'>esttay anguagelay onversioncay<'
|
|
|
|
|
|
];
|
|
|
|
|
|
yield 'Source variant is base language' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>test language conversion</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[ 'content-language' => 'en' ]
|
|
|
|
|
|
),
|
2022-10-04 13:34:05 +00:00
|
|
|
|
null,
|
2022-10-04 11:54:37 +00:00
|
|
|
|
'en-x-piglatin',
|
|
|
|
|
|
'en',
|
|
|
|
|
|
'>esttay anguagelay onversioncay<'
|
|
|
|
|
|
];
|
|
|
|
|
|
yield 'Source language is null' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[ 'content-language' => 'sr' ]
|
|
|
|
|
|
),
|
|
|
|
|
|
null,
|
2022-10-04 13:34:05 +00:00
|
|
|
|
'sr-el',
|
2022-10-07 10:38:50 +00:00
|
|
|
|
null,
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-04 11:54:37 +00:00
|
|
|
|
];
|
|
|
|
|
|
yield 'Source language is explicit' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[ 'content-language' => 'sr' ]
|
|
|
|
|
|
),
|
2022-10-04 13:34:05 +00:00
|
|
|
|
null,
|
2022-10-04 11:54:37 +00:00
|
|
|
|
'sr-el',
|
|
|
|
|
|
'sr-ec',
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-07 10:38:50 +00:00
|
|
|
|
];
|
|
|
|
|
|
yield 'Content language is provided via HTTP header' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[ 'content-language' => 'sr-ec' ]
|
|
|
|
|
|
),
|
2022-10-04 13:34:05 +00:00
|
|
|
|
'sr',
|
|
|
|
|
|
'sr-el',
|
|
|
|
|
|
'sr-ec',
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-04 13:34:05 +00:00
|
|
|
|
];
|
|
|
|
|
|
yield 'Content language is variant' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
'sr-ec',
|
|
|
|
|
|
'sr-el',
|
|
|
|
|
|
null,
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-04 13:34:05 +00:00
|
|
|
|
];
|
|
|
|
|
|
yield 'No content-language, but source variant provided' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
null,
|
2022-10-07 10:38:50 +00:00
|
|
|
|
'sr-el',
|
|
|
|
|
|
'sr-ec',
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-04 13:34:05 +00:00
|
|
|
|
];
|
|
|
|
|
|
yield 'Source variant is a base language code' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Ово је тестна страница</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
null,
|
|
|
|
|
|
'sr-el',
|
2022-10-07 10:38:50 +00:00
|
|
|
|
'sr',
|
2022-11-29 14:34:42 +00:00
|
|
|
|
'>Ovo je testna stranica<',
|
|
|
|
|
|
'sr-el|sr-Latn' // sr-el is accepted for backwards compatibility for now
|
2022-10-04 11:54:37 +00:00
|
|
|
|
];
|
2022-11-22 18:59:58 +00:00
|
|
|
|
yield 'Base language does not support variants' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Hallo Wereld</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
'nl',
|
|
|
|
|
|
'nl-be',
|
|
|
|
|
|
null,
|
|
|
|
|
|
'>Hallo Wereld<',
|
|
|
|
|
|
false // The output language is currently not indicated. Should be expected to be 'nl' in the future.
|
|
|
|
|
|
];
|
2022-11-03 10:52:25 +00:00
|
|
|
|
yield 'Variant conversion with fallback to core LanguageConverter' => [
|
|
|
|
|
|
new PageBundle(
|
|
|
|
|
|
'<p>Siltemeniñ astın sız:</p>',
|
|
|
|
|
|
[ 'parsoid-data' ],
|
|
|
|
|
|
[ 'mw-data' ],
|
|
|
|
|
|
Parsoid::defaultHTMLVersion(),
|
|
|
|
|
|
[]
|
|
|
|
|
|
),
|
|
|
|
|
|
null,
|
|
|
|
|
|
'kk-cyrl',
|
|
|
|
|
|
'kk-latn',
|
|
|
|
|
|
'<p>Сілтеменің астын сыз:</p>',
|
|
|
|
|
|
'kk-cyrl|kk-Cyrl'
|
|
|
|
|
|
];
|
2022-10-04 11:54:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @dataProvider provideConvertPageBundleVariant
|
|
|
|
|
|
*/
|
2022-11-22 18:59:58 +00:00
|
|
|
|
public function testConvertPageBundleVariant(
|
|
|
|
|
|
PageBundle $pageBundle,
|
|
|
|
|
|
$contentLanguage,
|
|
|
|
|
|
$target,
|
|
|
|
|
|
$source,
|
|
|
|
|
|
$expected,
|
|
|
|
|
|
$expectedLanguage = null
|
|
|
|
|
|
) {
|
|
|
|
|
|
if ( $expectedLanguage === null ) {
|
|
|
|
|
|
$expectedLanguage = $target;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-10-04 11:54:37 +00:00
|
|
|
|
$page = $this->getExistingTestPage();
|
|
|
|
|
|
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
|
2022-10-07 10:38:50 +00:00
|
|
|
|
if ( $contentLanguage ) {
|
2022-10-04 13:34:05 +00:00
|
|
|
|
$languageVariantConverter->setPageLanguageOverride( $contentLanguage );
|
2022-10-07 10:38:50 +00:00
|
|
|
|
}
|
2022-10-04 11:54:37 +00:00
|
|
|
|
|
|
|
|
|
|
$outputPageBundle = $languageVariantConverter->convertPageBundleVariant( $pageBundle, $target, $source );
|
|
|
|
|
|
|
|
|
|
|
|
$html = $outputPageBundle->toHtml();
|
|
|
|
|
|
$this->assertStringContainsString( $expected, $html );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
|
|
|
|
|
|
if ( $expectedLanguage !== false ) {
|
2022-11-29 14:34:42 +00:00
|
|
|
|
$this->assertMatchesRegularExpression( "@<meta http-equiv=\"content-language\" content=\"($expectedLanguage)\"/>@", $html );
|
|
|
|
|
|
$this->assertMatchesRegularExpression( "@^$expectedLanguage@", $outputPageBundle->headers['content-language'] );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
}
|
2022-10-04 11:54:37 +00:00
|
|
|
|
$this->assertEquals( Parsoid::defaultHTMLVersion(), $outputPageBundle->version );
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public function provideConvertParserOutputVariant() {
|
|
|
|
|
|
foreach ( $this->provideConvertPageBundleVariant() as $name => $case ) {
|
|
|
|
|
|
$case[0] = PageBundleParserOutputConverter::parserOutputFromPageBundle( $case[0] );
|
|
|
|
|
|
yield $name => $case;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @dataProvider provideConvertParserOutputVariant
|
|
|
|
|
|
*/
|
2022-11-22 18:59:58 +00:00
|
|
|
|
public function testConvertParserOutputVariant(
|
|
|
|
|
|
ParserOutput $parserOutput,
|
|
|
|
|
|
$contentLanguage,
|
|
|
|
|
|
$target,
|
|
|
|
|
|
$source,
|
|
|
|
|
|
$expected,
|
|
|
|
|
|
$expectedLanguage = null
|
|
|
|
|
|
) {
|
|
|
|
|
|
if ( $expectedLanguage === null ) {
|
|
|
|
|
|
$expectedLanguage = $target;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-10-04 11:54:37 +00:00
|
|
|
|
$page = $this->getExistingTestPage();
|
|
|
|
|
|
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
|
2022-10-07 10:38:50 +00:00
|
|
|
|
if ( $contentLanguage ) {
|
2022-10-04 13:34:05 +00:00
|
|
|
|
$languageVariantConverter->setPageLanguageOverride( $contentLanguage );
|
2022-10-07 10:38:50 +00:00
|
|
|
|
}
|
2022-10-04 11:54:37 +00:00
|
|
|
|
|
|
|
|
|
|
$modifiedParserOutput = $languageVariantConverter
|
|
|
|
|
|
->convertParserOutputVariant( $parserOutput, $target, $source );
|
|
|
|
|
|
|
|
|
|
|
|
$html = $modifiedParserOutput->getRawText();
|
|
|
|
|
|
$this->assertStringContainsString( $expected, $html );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
if ( $expectedLanguage !== false ) {
|
2022-11-29 14:34:42 +00:00
|
|
|
|
$this->assertMatchesRegularExpression( "@<meta http-equiv=\"content-language\" content=\"($expectedLanguage)\"/>@", $html );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
}
|
2022-10-04 11:54:37 +00:00
|
|
|
|
|
|
|
|
|
|
$extensionData = $modifiedParserOutput
|
|
|
|
|
|
->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY );
|
|
|
|
|
|
$this->assertEquals( Parsoid::defaultHTMLVersion(), $extensionData['version'] );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
|
|
|
|
|
|
if ( $expectedLanguage !== false ) {
|
2022-11-29 14:34:42 +00:00
|
|
|
|
$this->assertMatchesRegularExpression( "@^$expectedLanguage@", $extensionData['headers']['content-language'] );
|
2022-11-22 18:59:58 +00:00
|
|
|
|
}
|
2022-10-04 11:54:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private function getLanguageVariantConverter( PageIdentity $pageIdentity ): LanguageVariantConverter {
|
|
|
|
|
|
return new LanguageVariantConverter(
|
|
|
|
|
|
$pageIdentity,
|
|
|
|
|
|
$this->getServiceContainer()->getParsoidPageConfigFactory(),
|
|
|
|
|
|
$this->getServiceContainer()->getService( '_Parsoid' ),
|
|
|
|
|
|
MainConfigSchema::getDefaultValue( MainConfigNames::ParsoidSettings ),
|
|
|
|
|
|
$this->getServiceContainer()->getParsoidSiteConfig(),
|
2022-10-04 13:34:05 +00:00
|
|
|
|
$this->getServiceContainer()->getTitleFactory(),
|
2022-11-03 10:52:25 +00:00
|
|
|
|
$this->getServiceContainer()->getLanguageConverterFactory(),
|
2022-10-04 13:34:05 +00:00
|
|
|
|
$this->getServiceContainer()->getLanguageFactory()
|
2022-10-04 11:54:37 +00:00
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|