Break up $wgDummyLanguageCodes

$wgDummyLanguageCodes is a set and mapping of different language codes:

* Renamed language codes: ['als' => 'gsw', 'bat-smg' => 'sgs',
                           'be-xold' => 'be-tarask', 'fiu-vro' => 'vro',
                           'roa-rup' => 'rup', 'zh-classical' => 'lzh',
                           'zh-min-nan' => 'nan', 'zh-yue' => 'yue'].
  The old language codes are deprecated because they are invalid but
  should be supported for compatibility reasons for a while.
* Language codes of macro languages, which get mapped to the main
  language: ['bh' => 'bho', 'no' => 'nb'].
* Language variants which get mapped to main language:
  ['simple' => 'en'].
* Internal language codes of the private-use-area which get mapped to
  itself: ['qqq' => 'qqq', 'qqx' => 'qqx']

This is a very strange conglomeration which should get differentiated,
and were split up in the following ways:

* Renamed language codes are available from
  LanguageCode::getDeprecatedCodeMapping().
* Language codes of macro languages and the variants that are mapped to
  the main language are available as $wgExtraLanguageCodes and are set
  in DefaultSettings.php.
* Internal language codes are set in $wgDummyLanguageCodes in Setup.php.

Change-Id: If73c74ee87d8235381449cab7dcd9f46b0f23590
This commit is contained in:
This, that and the other 2017-01-13 01:17:41 +11:00 committed by Kunal Mehta
parent c62e736f18
commit 48ab87d0a3
5 changed files with 121 additions and 20 deletions

View file

@ -688,6 +688,7 @@ $wgAutoloadLocalClasses = [
'LanguageBe_tarask' => __DIR__ . '/languages/classes/LanguageBe_tarask.php',
'LanguageBg' => __DIR__ . '/languages/classes/LanguageBg.php',
'LanguageBs' => __DIR__ . '/languages/classes/LanguageBs.php',
'LanguageCode' => __DIR__ . '/languages/LanguageCode.php',
'LanguageConverter' => __DIR__ . '/languages/LanguageConverter.php',
'LanguageCu' => __DIR__ . '/languages/classes/LanguageCu.php',
'LanguageDsb' => __DIR__ . '/languages/classes/LanguageDsb.php',

View file

@ -2808,8 +2808,9 @@ $wgUsePrivateIPs = false;
* MediaWiki out of the box. Not all languages listed there have translations,
* see languages/messages/ for the list of languages with some localisation.
*
* Warning: Don't use language codes listed in $wgDummyLanguageCodes like "no"
* for Norwegian (use "nb" instead), or things will break unexpectedly.
* Warning: Don't use any of MediaWiki's deprecated language codes listed in
* LanguageCode::getDeprecatedCodeMapping or $wgDummyLanguageCodes, like "no"
* for Norwegian (use "nb" instead). If you do, things will break unexpectedly.
*
* This defines the default interface language for all users, but users can
* change it in their preferences.
@ -2868,25 +2869,22 @@ $wgExtraInterlanguageLinkPrefixes = [];
$wgExtraLanguageNames = [];
/**
* List of language codes that don't correspond to an actual language.
* These codes are mostly left-offs from renames, or other legacy things.
* This array makes them not appear as a selectable language on the installer,
* and excludes them when running the transstat.php script.
* List of mappings from one language code to another.
* This array makes the codes not appear as a selectable language on the
* installer, and excludes them when running the transstat.php script.
*
* In Setup.php, the variable $wgDummyLanguageCodes is created by combining
* these codes with a list of "deprecated" codes, which are mostly leftovers
* from renames or other legacy things, and the internal codes 'qqq' and 'qqx'.
* If a mapping in $wgExtraLanguageCodes collide with a built-in mapping, the
* value in $wgExtraLanguageCodes will be used.
*
* @since 1.29
*/
$wgDummyLanguageCodes = [
'als' => 'gsw',
'bat-smg' => 'sgs',
'be-x-old' => 'be-tarask',
'bh' => 'bho',
'fiu-vro' => 'vro',
'no' => 'nb',
'qqq' => 'qqq', # Used for message documentation.
'qqx' => 'qqx', # Used for viewing message keys.
'roa-rup' => 'rup',
'simple' => 'en',
'zh-classical' => 'lzh',
'zh-min-nan' => 'nan',
'zh-yue' => 'yue',
$wgExtraLanguageCodes = [
'bh' => 'bho', // Bihari language family
'no' => 'nb', // Norwegian language family
'simple' => 'en', // Simple English
];
/**

View file

@ -403,6 +403,14 @@ if ( is_array( $wgExtraNamespaces ) ) {
$wgCanonicalNamespaceNames = $wgCanonicalNamespaceNames + $wgExtraNamespaces;
}
// Merge in the legacy language codes, unless overridden in the config
if ( !isset( $wgDummyLanguageCodes ) ) {
$wgDummyLanguageCodes = [
'qqq' => 'qqq', // Used for message documentation
'qqx' => 'qqx', // Used for viewing message keys
] + $wgExtraLanguageCodes + LanguageCode::getDeprecatedCodeMapping();
}
// These are now the same, always
// To determine the user language, use $wgLang->getCode()
$wgContLanguageCode = $wgLanguageCode;

View file

@ -0,0 +1,54 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Methods for dealing with language codes.
* @todo Move some of the code-related static methods out of Language into this class
*
* @since 1.29
* @ingroup Language
*/
class LanguageCode {
/**
* Returns a mapping of deprecated language codes that were used in previous
* versions of MediaWiki to up-to-date, current language codes.
*
* This array is merged into $wgDummyLanguageCodes in Setup.php, along with
* the fake language codes 'qqq' and 'qqx', which are used internally by
* MediaWiki's localisation system.
*
* @return string[]
*/
public static function getDeprecatedCodeMapping() {
return [
// Note that als is actually a valid ISO 639 code (Tosk Albanian), but it
// was previously used in MediaWiki for Alsatian, which comes under gsw
'als' => 'gsw',
'bat-smg' => 'sgs',
'be-x-old' => 'be-tarask',
'fiu-vro' => 'vro',
'roa-rup' => 'rup',
'zh-classical' => 'lzh',
'zh-min-nan' => 'nan',
'zh-yue' => 'yue',
];
}
}

View file

@ -0,0 +1,40 @@
<?php
/**
* @covers LanguageCode
*
* @group Language
*
* @license GPL-2.0+
* @author Thiemo Mättig
*/
class LanguageCodeTest extends PHPUnit_Framework_TestCase {
public function testConstructor() {
$instance = new LanguageCode();
$this->assertInstanceOf( LanguageCode::class, $instance );
}
public function testGetDeprecatedCodeMapping() {
$map = LanguageCode::getDeprecatedCodeMapping();
$this->assertInternalType( 'array', $map );
$this->assertContainsOnly( 'string', array_keys( $map ) );
$this->assertArrayNotHasKey( '', $map );
$this->assertContainsOnly( 'string', $map );
$this->assertNotContains( '', $map );
// Codes special to MediaWiki should never appear in a map of "deprecated" codes
$this->assertArrayNotHasKey( 'qqq', $map, 'documentation' );
$this->assertNotContains( 'qqq', $map, 'documentation' );
$this->assertArrayNotHasKey( 'qqx', $map, 'debug code' );
$this->assertNotContains( 'qqx', $map, 'debug code' );
// Valid language codes that are currently not "deprecated"
$this->assertArrayNotHasKey( 'bh', $map, 'family of Bihari languages' );
$this->assertArrayNotHasKey( 'no', $map, 'family of Norwegian languages' );
$this->assertArrayNotHasKey( 'simple', $map );
}
}