LanguageVariantConverter: Add fallback to core LanguageConverter
If variant conversion is not supported by Parsoid, fallback to using the old LanguageConverter. We still call parsoid to perform variant conversion in order to add metadata that is missing when the core language converter is used. Bug: T318401 Change-Id: I0499c853b4e301f135339fc137054bd760ee237d Depends-On: Ie94aaa11963ec1e9e99136af469a05fa4005710d
This commit is contained in:
parent
043befec82
commit
5c113a833a
6 changed files with 131 additions and 5 deletions
|
|
@ -748,6 +748,7 @@ return [
|
|||
$services->getContentHandlerFactory(),
|
||||
$services->getParsoidSiteConfig(),
|
||||
$services->getTitleFactory(),
|
||||
$services->getLanguageConverterFactory(),
|
||||
$services->getLanguageFactory()
|
||||
);
|
||||
},
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
namespace MediaWiki\Parser\Parsoid;
|
||||
|
||||
use MediaWiki\Content\IContentHandlerFactory;
|
||||
use MediaWiki\Languages\LanguageConverterFactory;
|
||||
use MediaWiki\Languages\LanguageFactory;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
||||
|
|
@ -34,6 +35,9 @@ class HtmlTransformFactory {
|
|||
/** @var TitleFactory */
|
||||
private $titleFactory;
|
||||
|
||||
/** @var LanguageConverterFactory */
|
||||
private $languageConverterFactory;
|
||||
|
||||
/** @var LanguageFactory */
|
||||
private $languageFactory;
|
||||
|
||||
|
|
@ -44,6 +48,7 @@ class HtmlTransformFactory {
|
|||
* @param IContentHandlerFactory $contentHandlerFactory
|
||||
* @param SiteConfig $siteConfig
|
||||
* @param TitleFactory $titleFactory
|
||||
* @param LanguageConverterFactory $languageConverterFactory
|
||||
* @param LanguageFactory $languageFactory
|
||||
*/
|
||||
public function __construct(
|
||||
|
|
@ -53,6 +58,7 @@ class HtmlTransformFactory {
|
|||
IContentHandlerFactory $contentHandlerFactory,
|
||||
SiteConfig $siteConfig,
|
||||
TitleFactory $titleFactory,
|
||||
LanguageConverterFactory $languageConverterFactory,
|
||||
LanguageFactory $languageFactory
|
||||
) {
|
||||
$this->parsoid = $parsoid;
|
||||
|
|
@ -61,6 +67,7 @@ class HtmlTransformFactory {
|
|||
$this->contentHandlerFactory = $contentHandlerFactory;
|
||||
$this->siteConfig = $siteConfig;
|
||||
$this->titleFactory = $titleFactory;
|
||||
$this->languageConverterFactory = $languageConverterFactory;
|
||||
$this->languageFactory = $languageFactory;
|
||||
}
|
||||
|
||||
|
|
@ -99,7 +106,8 @@ class HtmlTransformFactory {
|
|||
$this->parsoidSettings,
|
||||
$this->siteConfig,
|
||||
$this->titleFactory,
|
||||
$this->languageFactory,
|
||||
$this->languageConverterFactory,
|
||||
$this->languageFactory
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
namespace MediaWiki\Parser\Parsoid;
|
||||
|
||||
use MediaWiki\Languages\LanguageConverterFactory;
|
||||
use MediaWiki\Languages\LanguageFactory;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
||||
|
|
@ -43,11 +44,19 @@ class LanguageVariantConverter {
|
|||
|
||||
/** @var TitleFactory */
|
||||
private $titleFactory;
|
||||
private LanguageFactory $languageFactory;
|
||||
|
||||
/** @var LanguageConverterFactory */
|
||||
private $languageConverterFactory;
|
||||
|
||||
/** @var LanguageFactory */
|
||||
private $languageFactory;
|
||||
|
||||
/** @var string */
|
||||
private $pageLanguageOverride;
|
||||
|
||||
/** @var bool */
|
||||
private $isFallbackLanguageConverterEnabled = true;
|
||||
|
||||
public function __construct(
|
||||
PageIdentity $pageIdentity,
|
||||
PageConfigFactory $pageConfigFactory,
|
||||
|
|
@ -55,6 +64,7 @@ class LanguageVariantConverter {
|
|||
array $parsoidSettings,
|
||||
SiteConfig $siteConfig,
|
||||
TitleFactory $titleFactory,
|
||||
LanguageConverterFactory $languageConverterFactory,
|
||||
LanguageFactory $languageFactory
|
||||
) {
|
||||
$this->pageConfigFactory = $pageConfigFactory;
|
||||
|
|
@ -63,10 +73,10 @@ class LanguageVariantConverter {
|
|||
$this->parsoidSettings = $parsoidSettings;
|
||||
$this->siteConfig = $siteConfig;
|
||||
$this->titleFactory = $titleFactory;
|
||||
$this->languageFactory = $languageFactory;
|
||||
|
||||
// @phan-suppress-next-line PhanPossiblyNullTypeMismatchProperty
|
||||
$this->pageTitle = $this->titleFactory->castFromPageIdentity( $this->pageIdentity );
|
||||
$this->languageConverterFactory = $languageConverterFactory;
|
||||
$this->languageFactory = $languageFactory;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -116,6 +126,40 @@ class LanguageVariantConverter {
|
|||
|
||||
$pageConfig = $this->getPageConfig( $pageLanguageCode, $sourceVariantCode );
|
||||
|
||||
if ( !$this->parsoid->implementsLanguageConversion( $pageConfig, $targetVariantCode ) ) {
|
||||
if ( $this->isFallbackLanguageConverterEnabled ) {
|
||||
$baseLanguage = $this->languageFactory->getParentLanguage( $targetVariantCode );
|
||||
$languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
|
||||
|
||||
$convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
|
||||
$targetVariantPageBundle = new PageBundle(
|
||||
$convertedHtml,
|
||||
[],
|
||||
[],
|
||||
$pageBundle->version,
|
||||
[ 'content-language' => $targetVariantCode ]
|
||||
);
|
||||
|
||||
// Hack: Even though variant conversion for the language is not supported by Parsoid, we pass the
|
||||
// HTML to parsoid for variant conversion in order to add metadata that is missing when we use the
|
||||
// core LanguageConverter directly.
|
||||
$targetVariantPageBundle = $this->parsoid->pb2pb(
|
||||
$pageConfig, 'variant', $targetVariantPageBundle,
|
||||
[
|
||||
'variant' => [
|
||||
'source' => $sourceVariantCode,
|
||||
'target' => $targetVariantCode
|
||||
]
|
||||
]
|
||||
);
|
||||
|
||||
return $targetVariantPageBundle;
|
||||
}
|
||||
|
||||
// Fallback variant conversion is not enabled, return the page bundle as is.
|
||||
return $pageBundle;
|
||||
}
|
||||
|
||||
$modifiedPageBundle = $this->parsoid->pb2pb(
|
||||
$pageConfig, 'variant', $pageBundle,
|
||||
[
|
||||
|
|
@ -149,6 +193,14 @@ class LanguageVariantConverter {
|
|||
return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle );
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable fallback language variant converter
|
||||
* @return void
|
||||
*/
|
||||
public function disableFallbackLanguageConverter(): void {
|
||||
$this->isFallbackLanguageConverterEnabled = false;
|
||||
}
|
||||
|
||||
private function getPageConfig( string $pageLanguageCode, ?string $sourceVariantCode ): PageConfig {
|
||||
if ( $this->pageConfig ) {
|
||||
return $this->pageConfig;
|
||||
|
|
|
|||
|
|
@ -6,16 +6,18 @@ describe( 'Page Source', () => {
|
|||
const page = utils.title( 'PageSource_' );
|
||||
const pageWithSpaces = page.replace( '_', ' ' );
|
||||
const variantPage = utils.title( 'PageSourceVariant' );
|
||||
const fallbackVariantPage = 'MediaWiki:Tog-underline/kk-latn';
|
||||
|
||||
const redirectPage = utils.title( 'Redirect ' );
|
||||
const redirectedPage = redirectPage.replace( 'Redirect', 'Redirected' );
|
||||
|
||||
const client = new REST();
|
||||
const anon = action.getAnon();
|
||||
let mindy;
|
||||
const baseEditText = "''Edit 1'' and '''Edit 2'''";
|
||||
|
||||
before( async () => {
|
||||
const mindy = await action.mindy();
|
||||
mindy = await action.mindy();
|
||||
await anon.edit( page, { text: baseEditText } );
|
||||
|
||||
// Setup page with redirects
|
||||
|
|
@ -194,6 +196,17 @@ describe( 'Page Source', () => {
|
|||
assert.match( headers[ 'content-language' ], /en-x-piglatin/i );
|
||||
assert.match( headers.etag, /en-x-piglatin/i );
|
||||
} );
|
||||
it( 'Should perform fallback variant conversion', async () => {
|
||||
await mindy.edit( fallbackVariantPage, { text: 'Siltemeniñ astın sız:' } );
|
||||
const { headers, text } = await client.get( `/page/${encodeURIComponent( fallbackVariantPage )}/html`, null, {
|
||||
'accept-language': 'kk-cyrl'
|
||||
} );
|
||||
|
||||
assert.match( text, /Сілтеменің астын сыз:/ );
|
||||
assert.match( headers.vary, /\bAccept-Language\b/i );
|
||||
assert.match( headers[ 'content-language' ], /kk-cyrl/i );
|
||||
assert.match( headers.etag, /kk-cyrl/i );
|
||||
} );
|
||||
} );
|
||||
|
||||
describe( 'GET /page/{title}/with_html', () => {
|
||||
|
|
@ -283,5 +296,22 @@ describe( 'Page Source', () => {
|
|||
assert.match( headers[ 'content-language' ], /en-x-piglatin/i );
|
||||
}
|
||||
} );
|
||||
it( 'Should perform fallback variant conversion', async () => {
|
||||
await mindy.edit( fallbackVariantPage, { text: 'Siltemeniñ astın sız:' } );
|
||||
const { headers, text } = await client.get( `/page/${encodeURIComponent( fallbackVariantPage )}/html`, null, {
|
||||
'accept-language': 'kk-cyrl'
|
||||
} );
|
||||
|
||||
assert.match( text, /Сілтеменің астын сыз:/ );
|
||||
assert.match( headers.vary, /\bAccept-Language\b/i );
|
||||
assert.match( headers.etag, /kk-cyrl/i );
|
||||
|
||||
// Since with_html returns JSON, content language is not set
|
||||
// but if its set, we expect it to be set correctly.
|
||||
const contentLanguageHeader = headers[ 'content-language' ];
|
||||
if ( contentLanguageHeader ) {
|
||||
assert.match( headers[ 'content-language' ], /kk-cyrl/i );
|
||||
}
|
||||
} );
|
||||
} );
|
||||
} );
|
||||
|
|
|
|||
|
|
@ -140,6 +140,20 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
|
|||
'>Hallo Wereld<',
|
||||
false // The output language is currently not indicated. Should be expected to be 'nl' in the future.
|
||||
];
|
||||
yield 'Variant conversion with fallback to core LanguageConverter' => [
|
||||
new PageBundle(
|
||||
'<p>Siltemeniñ astın sız:</p>',
|
||||
[ 'parsoid-data' ],
|
||||
[ 'mw-data' ],
|
||||
Parsoid::defaultHTMLVersion(),
|
||||
[]
|
||||
),
|
||||
null,
|
||||
'kk-cyrl',
|
||||
'kk-latn',
|
||||
'<p>Сілтеменің астын сыз:</p>',
|
||||
'kk-cyrl|kk-Cyrl'
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -229,6 +243,7 @@ class LanguageVariantConverterTest extends MediaWikiIntegrationTestCase {
|
|||
MainConfigSchema::getDefaultValue( MainConfigNames::ParsoidSettings ),
|
||||
$this->getServiceContainer()->getParsoidSiteConfig(),
|
||||
$this->getServiceContainer()->getTitleFactory(),
|
||||
$this->getServiceContainer()->getLanguageConverterFactory(),
|
||||
$this->getServiceContainer()->getLanguageFactory()
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
namespace MediaWiki\Parser\Parsoid;
|
||||
|
||||
use Language;
|
||||
use LanguageConverter;
|
||||
use MediaWiki\Languages\LanguageConverterFactory;
|
||||
use MediaWiki\Languages\LanguageFactory;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Page\PageIdentityValue;
|
||||
|
|
@ -259,6 +261,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
|
|||
$parsoidSettings,
|
||||
$siteConfigMock,
|
||||
$titleFactoryMock,
|
||||
$this->getLanguageConverterFactoryMock(),
|
||||
$languageFactoryMock
|
||||
);
|
||||
|
||||
|
|
@ -311,6 +314,9 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
|
|||
->method( 'pb2pb' );
|
||||
}
|
||||
|
||||
$mock->method( 'implementsLanguageConversion' )
|
||||
->willReturn( true );
|
||||
|
||||
return $mock;
|
||||
}
|
||||
|
||||
|
|
@ -397,6 +403,7 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
|
|||
$mock->headers = [
|
||||
'content-language' => $languageCode
|
||||
];
|
||||
$mock->html = 'test message';
|
||||
return $mock;
|
||||
}
|
||||
|
||||
|
|
@ -420,4 +427,17 @@ class LanguageVariantConverterUnitTest extends MediaWikiUnitTestCase {
|
|||
|
||||
return $languageMock;
|
||||
}
|
||||
|
||||
private function getLanguageConverterFactoryMock() {
|
||||
$languageConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
|
||||
$languageConverter = $this->createMock( LanguageConverter::class );
|
||||
$languageConverter->method( 'convertTo' )
|
||||
->willReturnCallback( static function ( $text, $code ) {
|
||||
return $text;
|
||||
} );
|
||||
$languageConverterFactoryMock->method( 'getLanguageConverter' )
|
||||
->willReturn( $languageConverter );
|
||||
|
||||
return $languageConverterFactoryMock;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue