Accept BCP 47 codes in LanguageConverter rules

Facilitate a gradual migration away from non-standard MediaWiki language
codes.  This will ensure that (a) rules can be written with standard
BCP 47 codes, and (b) rules written with existing nonstandard codes will
continue to work once these are added to
LanguageCode::$deprecatedLanguageCodeMapping.

Change-Id: I3ba96faafaf40bd47fb5919621f7035f0431a698
This commit is contained in:
C. Scott Ananian 2018-07-13 15:40:20 -04:00
parent 50d87ad2f8
commit f7bb180fef
3 changed files with 38 additions and 10 deletions

View file

@ -153,25 +153,27 @@ class ConverterRule {
$to = trim( $v[1] );
$v = trim( $v[0] );
$u = explode( '=>', $v, 2 );
$vv = $this->mConverter->validateVariant( $v );
// if $to is empty (which is also used as $from in bidtable),
// strtr() could return a wrong result.
if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
$bidtable[$v] = $to;
if ( count( $u ) == 1 && $to !== '' && $vv ) {
$bidtable[$vv] = $to;
} elseif ( count( $u ) == 2 ) {
$from = trim( $u[0] );
$v = trim( $u[1] );
$vv = $this->mConverter->validateVariant( $v );
// if $from is empty, strtr() could return a wrong result.
if ( array_key_exists( $v, $unidtable )
&& !is_array( $unidtable[$v] )
if ( array_key_exists( $vv, $unidtable )
&& !is_array( $unidtable[$vv] )
&& $from !== ''
&& in_array( $v, $variants ) ) {
$unidtable[$v] = [ $from => $to ];
} elseif ( $from !== '' && in_array( $v, $variants ) ) {
$unidtable[$v][$from] = $to;
&& $vv ) {
$unidtable[$vv] = [ $from => $to ];
} elseif ( $from !== '' && $vv ) {
$unidtable[$vv][$from] = $to;
}
}
// syntax error, pass
if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
$bidtable = [];
$unidtable = [];
break;

View file

@ -1175,8 +1175,21 @@ class LanguageConverter {
// [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
// [2] => ''
// ]
$pat = '/;\s*(?=';
$expandedVariants = [];
foreach ( $this->mVariants as $variant ) {
$expandedVariants[ $variant ] = 1;
// Accept standard BCP 47 names for variants as well.
$expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
}
// Accept old deprecated names for variants
foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
if ( isset( $expandedVariants[ $new ] ) ) {
$expandedVariants[ $old ] = 1;
}
}
$pat = '/;\s*(?=';
foreach ( $expandedVariants as $variant => $ignore ) {
// zh-hans:xxx;zh-hant:yyy
$pat .= $variant . '\s*:|';
// xxx=>zh-hans:yyy; xxx=>zh-hant:zzz

View file

@ -22434,6 +22434,19 @@ language=zh variant=zh-tw
<p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-tw","t":"Taiwan"}]}'></span>, not China</p>
!! end
!! test
Explicit definition of language variant alternatives (BCP 47 codes)
!! options
language=zh variant=zh-tw
!! wikitext
-{zh:China;zh-Hant-TW:Taiwan}-, not China
!! html/php
<p>Taiwan, not China
</p>
!! html/parsoid
<p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-Hant-TW","t":"Taiwan"}]}'></span>, not China</p>
!! end
!! test
Filter syntax for language variants
!! options