Merge "Re-apply: Introduce LanguageVariantConverter"

This commit is contained in:
jenkins-bot 2022-10-06 11:25:39 +00:00 committed by Gerrit Code Review
commit ca5814e21f
7 changed files with 681 additions and 21 deletions

View file

@ -1060,9 +1060,9 @@ abstract class ParsoidHandler extends Handler {
PageConfig $pageConfig, array $attribs, array $revision
) {
$opts = $attribs['opts'];
$source = $opts['updates']['variant']['source'] ?? null;
$target = $opts['updates']['variant']['target'] ??
$attribs['envOptions']['htmlVariantLanguage'];
$source = $opts['updates']['variant']['source'] ?? null;
if ( !$target ) {
throw new HttpException(
@ -1070,15 +1070,7 @@ abstract class ParsoidHandler extends Handler {
);
}
if ( !$this->siteConfig->langConverterEnabledForLanguage(
$pageConfig->getPageLanguage()
) ) {
throw new HttpException(
'LanguageConversion is not enabled on this article.', 400
);
}
$parsoid = $this->newParsoid();
$pageIdentity = $this->tryToCreatePageIdentity( $attribs );
$pb = new PageBundle(
$revision['html']['body'],
@ -1088,15 +1080,20 @@ abstract class ParsoidHandler extends Handler {
$revision['html']['headers'] ?? null,
$revision['contentmodel'] ?? null
);
$out = $parsoid->pb2pb(
$pageConfig, 'variant', $pb,
[
'variant' => [
'source' => $source,
'target' => $target,
]
]
);
$languageVariantConverter = MediaWikiServices::getInstance()
->getHTMLTransformFactory()
->getLanguageVariantConverter( $pageIdentity );
$languageVariantConverter->setPageConfig( $pageConfig );
try {
$out = $languageVariantConverter->convertPageBundleVariant( $pb, $target, $source );
} catch ( InvalidArgumentException $e ) {
throw new HttpException(
'Unsupported language conversion',
400,
[ 'reason' => $e->getMessage() ]
);
}
$response = $this->getResponseFactory()->createJson( $out->responseData() );
ParsoidFormatHelper::setContentType(

View file

@ -740,7 +740,9 @@ return [
$services->getService( '_Parsoid' ),
$services->getMainConfig()->get( MainConfigNames::ParsoidSettings ),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
$services->getContentHandlerFactory(),
$services->getParsoidSiteConfig(),
$services->getTitleFactory()
);
},

View file

@ -5,6 +5,8 @@ namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Content\IContentHandlerFactory;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use TitleFactory;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Parsoid;
/**
@ -25,22 +27,34 @@ class HTMLTransformFactory {
/** @var IContentHandlerFactory */
private $contentHandlerFactory;
/** @var SiteConfig */
private $siteConfig;
/** @var TitleFactory */
private $titleFactory;
/**
* @param Parsoid $parsoid
* @param array $parsoidSettings
* @param PageConfigFactory $configFactory
* @param IContentHandlerFactory $contentHandlerFactory
* @param SiteConfig $siteConfig
* @param TitleFactory $titleFactory
*/
public function __construct(
Parsoid $parsoid,
array $parsoidSettings,
PageConfigFactory $configFactory,
IContentHandlerFactory $contentHandlerFactory
IContentHandlerFactory $contentHandlerFactory,
SiteConfig $siteConfig,
TitleFactory $titleFactory
) {
$this->parsoid = $parsoid;
$this->parsoidSettings = $parsoidSettings;
$this->configFactory = $configFactory;
$this->contentHandlerFactory = $contentHandlerFactory;
$this->siteConfig = $siteConfig;
$this->titleFactory = $titleFactory;
}
/**
@ -63,4 +77,22 @@ class HTMLTransformFactory {
);
}
/**
* Get a language variant converter object for a given page
*
* @param PageIdentity $page
*
* @return LanguageVariantConverter
*/
public function getLanguageVariantConverter( PageIdentity $page ): LanguageVariantConverter {
return new LanguageVariantConverter(
$page,
$this->configFactory,
$this->parsoid,
$this->parsoidSettings,
$this->siteConfig,
$this->titleFactory
);
}
}

View file

@ -0,0 +1,163 @@
<?php
namespace MediaWiki\Parser\Parsoid;
use InvalidArgumentException;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Rest\HttpException;
use MediaWiki\Revision\RevisionAccessException;
use ParserOutput;
use Title;
use TitleFactory;
use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Parsoid;
/**
* @since 1.40
* @unstable should be marked stable before 1.40 release
*/
class LanguageVariantConverter {
/** @var PageConfigFactory */
private $pageConfigFactory;
/** @var PageConfig */
private $pageConfig;
/** @var PageIdentity */
private $pageIdentity;
/** @var Title */
private $pageTitle;
/** @var Parsoid */
private $parsoid;
/** @var array */
private $parsoidSettings;
/** @var SiteConfig */
private $siteConfig;
/** @var TitleFactory */
private $titleFactory;
public function __construct(
PageIdentity $pageIdentity,
PageConfigFactory $pageConfigFactory,
Parsoid $parsoid,
array $parsoidSettings,
SiteConfig $siteConfig,
TitleFactory $titleFactory
) {
$this->pageConfigFactory = $pageConfigFactory;
$this->pageIdentity = $pageIdentity;
$this->parsoid = $parsoid;
$this->parsoidSettings = $parsoidSettings;
$this->siteConfig = $siteConfig;
$this->titleFactory = $titleFactory;
// @phan-suppress-next-line PhanPossiblyNullTypeMismatchProperty
$this->pageTitle = $this->titleFactory->castFromPageIdentity( $this->pageIdentity );
}
/**
* Set the PageConfig object to be used during language variant conversion.
* If not provided, the object will be created.
*
* @param PageConfig $pageConfig
* @return void
*/
public function setPageConfig( PageConfig $pageConfig ) {
$this->pageConfig = $pageConfig;
}
/**
* Perform variant conversion on a PageBundle object.
*
* @param PageBundle $pageBundle
* @param string $targetVariantCode
* @param string|null $sourceVariantCode
*
* @return PageBundle
* @throws HttpException
*/
public function convertPageBundleVariant(
PageBundle $pageBundle,
string $targetVariantCode,
string $sourceVariantCode = null
): PageBundle {
$pageLanguageCode = $this->getPageLanguageCode( $pageBundle );
if ( !$this->siteConfig->langConverterEnabledForLanguage( $pageLanguageCode ) ) {
throw new InvalidArgumentException( "LanguageConversion is not supported for $pageLanguageCode." );
}
$pageConfig = $this->getPageConfig( $pageLanguageCode, $sourceVariantCode );
$modifiedPageBundle = $this->parsoid->pb2pb(
$pageConfig, 'variant', $pageBundle,
[
'variant' => [
'source' => $sourceVariantCode,
'target' => $targetVariantCode,
]
]
);
return $modifiedPageBundle;
}
/**
* Perform variant conversion on a ParserOutput object.
*
* @param ParserOutput $parserOutput
* @param string $targetVariantCode
* @param string|null $sourceVariantCode
*
* @return ParserOutput
*/
public function convertParserOutputVariant(
ParserOutput $parserOutput,
string $targetVariantCode,
string $sourceVariantCode = null
): ParserOutput {
$pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
$modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariantCode, $sourceVariantCode );
return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle );
}
private function getPageConfig( string $pageLanguageCode, ?string $sourceVariantCode ): PageConfig {
if ( $this->pageConfig ) {
return $this->pageConfig;
}
try {
$this->pageConfig = $this->pageConfigFactory->create(
$this->pageIdentity,
null,
null,
null,
$pageLanguageCode,
$this->parsoidSettings
);
if ( $sourceVariantCode ) {
$this->pageConfig->setVariant( $sourceVariantCode );
}
} catch ( RevisionAccessException $exception ) {
// TODO: Throw a different exception, this class should not know
// about HTTP status codes.
throw new HttpException( 'The specified revision is deleted or suppressed.', 404 );
}
return $this->pageConfig;
}
private function getPageLanguageCode( PageBundle $pageBundle ): string {
$languageCode = $pageBundle->headers[ 'content-language' ] ?? null;
return $languageCode ?? $this->pageTitle->getPageLanguage()->getCode();
}
}

View file

@ -0,0 +1,127 @@
<?php
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\MainConfigNames;
use MediaWiki\MainConfigSchema;
use MediaWiki\Page\PageIdentity;
use MediaWikiIntegrationTestCase;
use ParserOutput;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Parsoid;
/**
* @group Database
* @covers MediaWiki\Parser\Parsoid\LanguageVariantConverter
*/
class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
public function setUp(): void {
// enable Pig Latin variant conversion
$this->overrideConfigValue( 'UsePigLatinVariant', true );
}
public function provideConvertPageBundleVariant() {
yield 'No source or base, rely on page language (en)' => [
new PageBundle(
'<p>test language conversion</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
'en-x-piglatin',
null,
'>esttay anguagelay onversioncay<'
];
yield 'Source variant is base language' => [
new PageBundle(
'<p>test language conversion</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'en' ]
),
'en-x-piglatin',
'en',
'>esttay anguagelay onversioncay<'
];
yield 'Source language is null' => [
new PageBundle(
'<p>Ово је тестна страница</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'sr' ]
),
'sr-el',
null,
'>Ovo je testna stranica<'
];
yield 'Source language is explicit' => [
new PageBundle(
'<p>Ово је тестна страница</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[ 'content-language' => 'sr' ]
),
'sr-el',
'sr-ec',
'>Ovo je testna stranica<'
];
}
/**
* @dataProvider provideConvertPageBundleVariant
*/
public function testConvertPageBundleVariant( PageBundle $pageBundle, $target, $source, $expected ) {
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
$outputPageBundle = $languageVariantConverter->convertPageBundleVariant( $pageBundle, $target, $source );
$html = $outputPageBundle->toHtml();
$this->assertStringContainsString( $expected, $html );
$this->assertStringContainsString( "<meta http-equiv=\"content-language\" content=\"$target\"/>", $html );
$this->assertEquals( $target, $outputPageBundle->headers['content-language'] );
$this->assertEquals( Parsoid::defaultHTMLVersion(), $outputPageBundle->version );
}
public function provideConvertParserOutputVariant() {
foreach ( $this->provideConvertPageBundleVariant() as $name => $case ) {
$case[0] = PageBundleParserOutputConverter::parserOutputFromPageBundle( $case[0] );
yield $name => $case;
}
}
/**
* @dataProvider provideConvertParserOutputVariant
*/
public function testConvertParserOutputVariant( ParserOutput $parserOutput, $target, $source, $expected ) {
$page = $this->getExistingTestPage();
$languageVariantConverter = $this->getLanguageVariantConverter( $page );
$modifiedParserOutput = $languageVariantConverter
->convertParserOutputVariant( $parserOutput, $target, $source );
$html = $modifiedParserOutput->getRawText();
$this->assertStringContainsString( $expected, $html );
$this->assertStringContainsString( "<meta http-equiv=\"content-language\" content=\"$target\"/>", $html );
$extensionData = $modifiedParserOutput
->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY );
$this->assertEquals( $target, $extensionData['headers']['content-language'] );
$this->assertEquals( Parsoid::defaultHTMLVersion(), $extensionData['version'] );
}
private function getLanguageVariantConverter( PageIdentity $pageIdentity ): LanguageVariantConverter {
return new LanguageVariantConverter(
$pageIdentity,
$this->getServiceContainer()->getParsoidPageConfigFactory(),
$this->getServiceContainer()->getService( '_Parsoid' ),
MainConfigSchema::getDefaultValue( MainConfigNames::ParsoidSettings ),
$this->getServiceContainer()->getParsoidSiteConfig(),
$this->getServiceContainer()->getTitleFactory()
);
}
}

View file

@ -0,0 +1,339 @@
<?php
namespace MediaWiki\Parser\Parsoid;
use InvalidArgumentException;
use Language;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageIdentityValue;
use MediaWiki\Parser\Parsoid\Config\PageConfig;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Parser\Parsoid\Config\SiteConfig;
use MediaWikiUnitTestCase;
use PHPUnit\Framework\MockObject\MockObject;
use Title;
use TitleFactory;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Parsoid;
/**
* @covers MediaWiki\Parser\Parsoid\LanguageVariantConverter
*/
class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
/** @dataProvider provideSetConfig */
public function testSetConfig( bool $shouldPageConfigFactoryBeUsed ) {
// Decide what should be called and what should not be
$shouldParsoidBeUsed = true;
$isLanguageConversionEnabled = true;
// Set expected language codes
$pageBundleLanguageCode = 'zh';
$titleLanguageCode = 'zh-hans';
$targetLanguageCode = 'zh-hans';
$sourceLanguageCode = null;
// Create mocks
$parsoidSettings = [];
$pageConfigMock = $this->getPageConfigMock();
$pageBundleMock = $this->getPageBundleMock( $pageBundleLanguageCode );
$languageVariantConverter = $this->getLanguageVariantConverter(
$shouldParsoidBeUsed,
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
$parsoidSettings
);
if ( !$shouldPageConfigFactoryBeUsed ) {
$languageVariantConverter->setPageConfig( $pageConfigMock );
}
$languageVariantConverter->convertPageBundleVariant( $pageBundleMock, $targetLanguageCode );
}
public function provideSetConfig() {
yield 'PageConfigFactory should not be used if PageConfig is set' => [ false ];
yield 'PageConfigFactory should be used if PageConfig is not set' => [ true ];
}
/** @dataProvider provideSourceLanguage */
public function testSourceLanguage(
?string $pageBundleLanguageCode,
string $titleLanguageCode,
?string $sourceLanguageCode
) {
// Decide what should be called and what should not be
$shouldParsoidBeUsed = true;
$shouldPageConfigFactoryBeUsed = true;
$isLanguageConversionEnabled = true;
// Set expected language codes
$targetLanguageCode = 'zh-hans';
$parsoidSettings = [];
// Create mocks
if ( $pageBundleLanguageCode ) {
$pageBundleMock = $this->getPageBundleMock( $pageBundleLanguageCode );
} else {
$pageBundleMock = $this->getPageBundleMockWithoutLanguage();
}
$languageVariantConverter = $this->getLanguageVariantConverter(
$shouldParsoidBeUsed,
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
$parsoidSettings
);
$languageVariantConverter->convertPageBundleVariant( $pageBundleMock, $targetLanguageCode, $sourceLanguageCode );
}
public function provideSourceLanguage() {
yield 'PageBundle language is used when available' => [ 'en', 'en-gb', null ];
yield 'Title page language is used if PageBundle language is not available' => [ null, 'en-ca', null ];
yield 'Source language is used if given' => [ null, 'en-ca', 'en-gb' ];
}
/** @dataProvider provideSiteConfiguration */
public function testSiteConfiguration(
bool $isLanguageConversionEnabled,
bool $shouldParsoidBeUsed,
bool $shouldPageConfigFactoryBeUsed
) {
// Set expected language codes
$pageBundleLanguageCode = 'zh';
$titleLanguageCode = 'zh-hans';
$targetLanguageCode = 'zh-hans';
$sourceLanguageCode = null;
// Create mocks
$parsoidSettings = [];
if ( !$isLanguageConversionEnabled ) {
$this->expectException( InvalidArgumentException::class );
$this->expectExceptionMessage( 'LanguageConversion is not supported' );
}
$pageBundleMock = $this->getPageBundleMock( $pageBundleLanguageCode );
$languageVariantConverter = $this->getLanguageVariantConverter(
$shouldParsoidBeUsed,
$shouldPageConfigFactoryBeUsed,
$isLanguageConversionEnabled,
$pageBundleLanguageCode,
$titleLanguageCode,
$targetLanguageCode,
$sourceLanguageCode,
$parsoidSettings
);
$languageVariantConverter->convertPageBundleVariant( $pageBundleMock, $targetLanguageCode );
}
public function provideSiteConfiguration() {
$isLanguageConversionEnabled = false;
$shouldParsoidBeUsed = false;
$shouldPageConfigFactoryBeUsed = false;
yield 'If language conversion is disabled, parsoid and page config factory should not be used' =>
[ $isLanguageConversionEnabled, $shouldParsoidBeUsed, $shouldPageConfigFactoryBeUsed ];
$isLanguageConversionEnabled = true;
$shouldParsoidBeUsed = true;
$shouldPageConfigFactoryBeUsed = true;
yield 'If language conversion is enabled, parsoid and page config factory should be used' =>
[ $isLanguageConversionEnabled, $shouldParsoidBeUsed, $shouldPageConfigFactoryBeUsed ];
}
/**
* @param bool $shouldParsoidBeUsed
* @param bool $shouldPageConfigFactoryBeUsed
* @param bool $isLanguageConversionEnabled
* @param string|null $pageBundleLanguageCode
* @param string $titleLanguageCode
* @param string $targetLanguageCode
* @param string|null $sourceLanguageCode
* @param array $parsoidSettings
*
* @return LanguageVariantConverter
*/
private function getLanguageVariantConverter(
bool $shouldParsoidBeUsed,
bool $shouldPageConfigFactoryBeUsed,
bool $isLanguageConversionEnabled,
?string $pageBundleLanguageCode,
string $titleLanguageCode,
string $targetLanguageCode,
?string $sourceLanguageCode,
array $parsoidSettings
): LanguageVariantConverter {
// If PageBundle language code is set, use that else, fallback to title page language
$pageLanguageCode = $pageBundleLanguageCode ?? $titleLanguageCode;
$shouldSiteConfigBeUsed = true;
$parsoidSettings = [];
$pageIdentityValue = new PageIdentityValue( 1, NS_MAIN, 'hello_world', PageIdentity::LOCAL );
// Create the necessary mocks
$pageConfigMock = $this->getPageConfigMock();
$pageConfigFactoryMock = $this->getPageConfigFactoryMock(
$shouldPageConfigFactoryBeUsed,
// Expected arguments to PageConfigFactory mock
[ $pageIdentityValue, null, null, null, $pageLanguageCode, $parsoidSettings ],
$pageConfigMock
);
$pageBundleMock = $this->getPageBundleMock( $pageBundleLanguageCode );
$siteConfigMock = $this->getSiteConfigMock(
$shouldSiteConfigBeUsed, $pageLanguageCode, $isLanguageConversionEnabled
);
$titleFactoryMock = $this->getTitleFactoryMock( $pageIdentityValue, $titleLanguageCode );
$parsoidMock = $this->getParsoidMock(
$shouldParsoidBeUsed,
[
$pageConfigMock,
'variant',
$pageBundleMock,
[ 'variant' => [ 'source' => $sourceLanguageCode, 'target' => $targetLanguageCode ] ]
]
);
return new LanguageVariantConverter(
$pageIdentityValue,
$pageConfigFactoryMock,
$parsoidMock,
$parsoidSettings,
$siteConfigMock,
$titleFactoryMock
);
}
// Mock methods follow
/**
* @param bool $shouldBeCalled
* @param array $arguments
* @param PageConfig $pageConfig
*
* @return MockObject|PageConfigFactory
*/
private function getPageConfigFactoryMock( bool $shouldBeCalled, array $arguments, PageConfig $pageConfig ) {
$mock = $this->createMock( PageConfigFactory::class );
if ( $shouldBeCalled ) {
$mock->expects( $this->once() )
->method( 'create' )
->with( ...$arguments )
->willReturn( $pageConfig );
} else {
$mock->expects( $this->never() )
->method( 'create' );
}
return $mock;
}
/**
* @param bool $shouldBeCalled
* @param array $arguments
*
* @return MockObject|Parsoid
*/
private function getParsoidMock( bool $shouldBeCalled, array $arguments ) {
$mock = $this->createMock( Parsoid::class );
if ( $shouldBeCalled ) {
$mock->expects( $this->once() )
->method( 'pb2pb' )
->with( ...$arguments );
} else {
$mock->expects( $this->never() )
->method( 'pb2pb' );
}
return $mock;
}
/**
* @param bool $shouldBeCalled
* @param string $baseLanguageCode
* @param bool $isLanguageConversionEnabled
*
* @return MockObject|SiteConfig
*/
private function getSiteConfigMock(
bool $shouldBeCalled,
string $baseLanguageCode,
bool $isLanguageConversionEnabled
) {
$mock = $this->createMock( SiteConfig::class );
if ( $shouldBeCalled ) {
$mock->expects( $this->once() )
->method( 'langConverterEnabledForLanguage' )
->with( $baseLanguageCode )
->willReturn( $isLanguageConversionEnabled );
} else {
$mock->expects( $this->never() )
->method( 'langConverterEnabledForLanguage' );
}
return $mock;
}
/**
* @param PageIdentity $pageIdentity
* @param string $languageCode
*
* @return MockObject|TitleFactory
*/
private function getTitleFactoryMock( PageIdentity $pageIdentity, string $languageCode ) {
$languageMock = $this->createMock( Language::class );
$languageMock->method( 'getCode' )
->willReturn( $languageCode );
$titleMock = $this->createMock( Title::class );
$titleMock->method( 'getPageLanguage' )
->willReturn( $languageMock );
$mock = $this->createMock( TitleFactory::class );
$mock->expects( $this->once() )
->method( 'castFromPageIdentity' )
->willReturn( $titleMock )
->with( $pageIdentity );
return $mock;
}
/**
* @return MockObject|PageBundle
*/
private function getPageBundleMockWithoutLanguage() {
return $this->getPageBundleMock( null );
}
/**
* @param string|null $languageCode
*
* @return MockObject|PageBundle
*/
private function getPageBundleMock( ?string $languageCode ) {
$mock = $this->createMock( PageBundle::class );
$mock->headers = [
'content-language' => $languageCode
];
return $mock;
}
/**
* @return MockObject|PageConfig
*/
private function getPageConfigMock() {
$mock = $this->createNoOpMock( PageConfig::class, [ 'setVariant' ] );
return $mock;
}
}