LanguageVariantConverter: Add fallback to core LanguageConverter

If variant conversion is not supported by Parsoid, fallback to using
the old LanguageConverter.

We still call parsoid to perform variant conversion in order to add
metadata that is missing when the core language converter is used.

Bug: T318401
Change-Id: I0499c853b4e301f135339fc137054bd760ee237d
Depends-On: Ie94aaa11963ec1e9e99136af469a05fa4005710d
This commit is contained in:
Abijeet 2022-11-03 16:22:25 +05:30
parent 043befec82
commit 5c113a833a
6 changed files with 131 additions and 5 deletions

View file

@ -748,6 +748,7 @@ return [
$services->getContentHandlerFactory(),
$services->getParsoidSiteConfig(),
$services->getTitleFactory(),
$services->getLanguageConverterFactory(),
$services->getLanguageFactory()
);
},

View file

@ -3,6 +3,7 @@
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Content\IContentHandlerFactory;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
@ -34,6 +35,9 @@ class HtmlTransformFactory {
/** @var TitleFactory */
private $titleFactory;
/** @var LanguageConverterFactory */
private $languageConverterFactory;
/** @var LanguageFactory */
private $languageFactory;
@ -44,6 +48,7 @@ class HtmlTransformFactory {
* @param IContentHandlerFactory $contentHandlerFactory
* @param SiteConfig $siteConfig
* @param TitleFactory $titleFactory
* @param LanguageConverterFactory $languageConverterFactory
* @param LanguageFactory $languageFactory
*/
public function __construct(
@ -53,6 +58,7 @@ class HtmlTransformFactory {
IContentHandlerFactory $contentHandlerFactory,
SiteConfig $siteConfig,
TitleFactory $titleFactory,
LanguageConverterFactory $languageConverterFactory,
LanguageFactory $languageFactory
) {
$this->parsoid = $parsoid;
@ -61,6 +67,7 @@ class HtmlTransformFactory {
$this->contentHandlerFactory = $contentHandlerFactory;
$this->siteConfig = $siteConfig;
$this->titleFactory = $titleFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->languageFactory = $languageFactory;
}
@ -99,7 +106,8 @@ class HtmlTransformFactory {
$this->parsoidSettings,
$this->siteConfig,
$this->titleFactory,
$this->languageFactory,
$this->languageConverterFactory,
$this->languageFactory
);
}

View file

@ -2,6 +2,7 @@
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
@ -43,11 +44,19 @@ class LanguageVariantConverter {
/** @var TitleFactory */
private $titleFactory;
private LanguageFactory $languageFactory;
/** @var LanguageConverterFactory */
private $languageConverterFactory;
/** @var LanguageFactory */
private $languageFactory;
/** @var string */
private $pageLanguageOverride;
/** @var bool */
private $isFallbackLanguageConverterEnabled = true;
public function __construct(
PageIdentity $pageIdentity,
PageConfigFactory $pageConfigFactory,
@ -55,6 +64,7 @@ class LanguageVariantConverter {
array $parsoidSettings,
SiteConfig $siteConfig,
TitleFactory $titleFactory,
LanguageConverterFactory $languageConverterFactory,
LanguageFactory $languageFactory
) {
$this->pageConfigFactory = $pageConfigFactory;
@ -63,10 +73,10 @@ class LanguageVariantConverter {
$this->parsoidSettings = $parsoidSettings;
$this->siteConfig = $siteConfig;
$this->titleFactory = $titleFactory;
$this->languageFactory = $languageFactory;
// @phan-suppress-next-line PhanPossiblyNullTypeMismatchProperty
$this->pageTitle = $this->titleFactory->castFromPageIdentity( $this->pageIdentity );
$this->languageConverterFactory = $languageConverterFactory;
$this->languageFactory = $languageFactory;
}
/**
@ -116,6 +126,40 @@ class LanguageVariantConverter {
$pageConfig = $this->getPageConfig( $pageLanguageCode, $sourceVariantCode );
if ( !$this->parsoid->implementsLanguageConversion( $pageConfig, $targetVariantCode ) ) {
if ( $this->isFallbackLanguageConverterEnabled ) {
$baseLanguage = $this->languageFactory->getParentLanguage( $targetVariantCode );
$languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
$convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
$targetVariantPageBundle = new PageBundle(
$convertedHtml,
[],
[],
$pageBundle->version,
[ 'content-language' => $targetVariantCode ]
);
// Hack: Even though variant conversion for the language is not supported by Parsoid, we pass the
// HTML to parsoid for variant conversion in order to add metadata that is missing when we use the
// core LanguageConverter directly.
$targetVariantPageBundle = $this->parsoid->pb2pb(
$pageConfig, 'variant', $targetVariantPageBundle,
[
'variant' => [
'source' => $sourceVariantCode,
'target' => $targetVariantCode
]
]
);
return $targetVariantPageBundle;
}
// Fallback variant conversion is not enabled, return the page bundle as is.
return $pageBundle;
}
$modifiedPageBundle = $this->parsoid->pb2pb(
$pageConfig, 'variant', $pageBundle,
[
@ -149,6 +193,14 @@ class LanguageVariantConverter {
return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle );
}
/**
* Disable fallback language variant converter
* @return void
*/
public function disableFallbackLanguageConverter(): void {
$this->isFallbackLanguageConverterEnabled = false;
}
private function getPageConfig( string $pageLanguageCode, ?string $sourceVariantCode ): PageConfig {
if ( $this->pageConfig ) {
return $this->pageConfig;

View file

@ -6,16 +6,18 @@ describe( 'Page Source', () => {
const page = utils.title( 'PageSource_' );
const pageWithSpaces = page.replace( '_', ' ' );
const variantPage = utils.title( 'PageSourceVariant' );
const fallbackVariantPage = 'MediaWiki:Tog-underline/kk-latn';
const redirectPage = utils.title( 'Redirect ' );
const redirectedPage = redirectPage.replace( 'Redirect', 'Redirected' );
const client = new REST();
const anon = action.getAnon();
let mindy;
const baseEditText = "''Edit 1'' and '''Edit 2'''";
before( async () => {
const mindy = await action.mindy();
mindy = await action.mindy();
await anon.edit( page, { text: baseEditText } );
// Setup page with redirects
@ -194,6 +196,17 @@ describe( 'Page Source', () => {
assert.match( headers[ 'content-language' ], /en-x-piglatin/i );
assert.match( headers.etag, /en-x-piglatin/i );
} );
it( 'Should perform fallback variant conversion', async () => {
await mindy.edit( fallbackVariantPage, { text: 'Siltemeniñ astın sız:' } );
const { headers, text } = await client.get( `/page/${encodeURIComponent( fallbackVariantPage )}/html`, null, {
'accept-language': 'kk-cyrl'
} );
assert.match( text, /Сілтеменің астын сыз:/ );
assert.match( headers.vary, /\bAccept-Language\b/i );
assert.match( headers[ 'content-language' ], /kk-cyrl/i );
assert.match( headers.etag, /kk-cyrl/i );
} );
} );
describe( 'GET /page/{title}/with_html', () => {
@ -283,5 +296,22 @@ describe( 'Page Source', () => {
assert.match( headers[ 'content-language' ], /en-x-piglatin/i );
}
} );
it( 'Should perform fallback variant conversion', async () => {
await mindy.edit( fallbackVariantPage, { text: 'Siltemeniñ astın sız:' } );
const { headers, text } = await client.get( `/page/${encodeURIComponent( fallbackVariantPage )}/html`, null, {
'accept-language': 'kk-cyrl'
} );
assert.match( text, /Сілтеменің астын сыз:/ );
assert.match( headers.vary, /\bAccept-Language\b/i );
assert.match( headers.etag, /kk-cyrl/i );
// Since with_html returns JSON, content language is not set
// but if its set, we expect it to be set correctly.
const contentLanguageHeader = headers[ 'content-language' ];
if ( contentLanguageHeader ) {
assert.match( headers[ 'content-language' ], /kk-cyrl/i );
}
} );
} );
} );

View file

@ -140,6 +140,20 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
'>Hallo Wereld<',
false // The output language is currently not indicated. Should be expected to be 'nl' in the future.
];
yield 'Variant conversion with fallback to core LanguageConverter' => [
new PageBundle(
'<p>Siltemeniñ astın sız:</p>',
[ 'parsoid-data' ],
[ 'mw-data' ],
Parsoid::defaultHTMLVersion(),
[]
),
null,
'kk-cyrl',
'kk-latn',
'<p>Сілтеменің астын сыз:</p>',
'kk-cyrl|kk-Cyrl'
];
}
/**
@ -229,6 +243,7 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
MainConfigSchema::getDefaultValue( MainConfigNames::ParsoidSettings ),
$this->getServiceContainer()->getParsoidSiteConfig(),
$this->getServiceContainer()->getTitleFactory(),
$this->getServiceContainer()->getLanguageConverterFactory(),
$this->getServiceContainer()->getLanguageFactory()
);
}

View file

@ -3,6 +3,8 @@
namespace MediaWiki\Parser\Parsoid;
use Language;
use LanguageConverter;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageIdentityValue;
@ -259,6 +261,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$parsoidSettings,
$siteConfigMock,
$titleFactoryMock,
$this->getLanguageConverterFactoryMock(),
$languageFactoryMock
);
@ -311,6 +314,9 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
->method( 'pb2pb' );
}
$mock->method( 'implementsLanguageConversion' )
->willReturn( true );
return $mock;
}
@ -397,6 +403,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
$mock->headers = [
'content-language' => $languageCode
];
$mock->html = 'test message';
return $mock;
}
@ -420,4 +427,17 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
return $languageMock;
}
private function getLanguageConverterFactoryMock() {
$languageConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$languageConverter = $this->createMock( LanguageConverter::class );
$languageConverter->method( 'convertTo' )
->willReturnCallback( static function ( $text, $code ) {
return $text;
} );
$languageConverterFactoryMock->method( 'getLanguageConverter' )
->willReturn( $languageConverter );
return $languageConverterFactoryMock;
}
}