Generalize non-digit-grouping of four-digit numbers

In some languages it's conventional not to insert a thousands
separator in numbers that are four digits long (1000-9999).
Rather than copy-paste the custom code to do this between 13 files,
introduce another option and have the base Language class handle it.

This also fixes an issue in several languages where this logic
previously would not work for negative or fractional numbers.

To implement this, a new option is added to MessagesXx.php files,
`$minimumGroupingDigits = 2;`, with the meaning as defined in
<http://unicode.org/reports/tr35/tr35-numbers.html>. It is a little
roundabout, but it could allow us to migrate the number formatting
(currently all custom code) to some generic library easily.

Bug: T177846
Change-Id: Iedd8de5648cf2de1c94044918626de2f96365d48
This commit is contained in:
Bartosz Dziewoński 2017-10-10 20:51:50 +02:00
parent 08324f14b4
commit eb6bb6b7b9
31 changed files with 41 additions and 406 deletions

View file

@ -705,7 +705,6 @@ $wgAutoloadLocalClasses = [
'LanguageAr' => __DIR__ . '/languages/classes/LanguageAr.php',
'LanguageAz' => __DIR__ . '/languages/classes/LanguageAz.php',
'LanguageBe_tarask' => __DIR__ . '/languages/classes/LanguageBe_tarask.php',
'LanguageBg' => __DIR__ . '/languages/classes/LanguageBg.php',
'LanguageBs' => __DIR__ . '/languages/classes/LanguageBs.php',
'LanguageCode' => __DIR__ . '/languages/LanguageCode.php',
'LanguageConverter' => __DIR__ . '/languages/LanguageConverter.php',
@ -713,8 +712,6 @@ $wgAutoloadLocalClasses = [
'LanguageCu' => __DIR__ . '/languages/classes/LanguageCu.php',
'LanguageDsb' => __DIR__ . '/languages/classes/LanguageDsb.php',
'LanguageEn' => __DIR__ . '/languages/classes/LanguageEn.php',
'LanguageEs' => __DIR__ . '/languages/classes/LanguageEs.php',
'LanguageEt' => __DIR__ . '/languages/classes/LanguageEt.php',
'LanguageFi' => __DIR__ . '/languages/classes/LanguageFi.php',
'LanguageGa' => __DIR__ . '/languages/classes/LanguageGa.php',
'LanguageGan' => __DIR__ . '/languages/classes/LanguageGan.php',
@ -729,21 +726,17 @@ $wgAutoloadLocalClasses = [
'LanguageKm' => __DIR__ . '/languages/classes/LanguageKm.php',
'LanguageKsh' => __DIR__ . '/languages/classes/LanguageKsh.php',
'LanguageKu' => __DIR__ . '/languages/classes/LanguageKu.php',
'LanguageKu_ku' => __DIR__ . '/languages/classes/LanguageKu_ku.php',
'LanguageLa' => __DIR__ . '/languages/classes/LanguageLa.php',
'LanguageMl' => __DIR__ . '/languages/classes/LanguageMl.php',
'LanguageMy' => __DIR__ . '/languages/classes/LanguageMy.php',
'LanguageOs' => __DIR__ . '/languages/classes/LanguageOs.php',
'LanguagePl' => __DIR__ . '/languages/classes/LanguagePl.php',
'LanguageQqx' => __DIR__ . '/languages/classes/LanguageQqx.php',
'LanguageRu' => __DIR__ . '/languages/classes/LanguageRu.php',
'LanguageShi' => __DIR__ . '/languages/classes/LanguageShi.php',
'LanguageSl' => __DIR__ . '/languages/classes/LanguageSl.php',
'LanguageSr' => __DIR__ . '/languages/classes/LanguageSr.php',
'LanguageTg' => __DIR__ . '/languages/classes/LanguageTg.php',
'LanguageTr' => __DIR__ . '/languages/classes/LanguageTr.php',
'LanguageTyv' => __DIR__ . '/languages/classes/LanguageTyv.php',
'LanguageUk' => __DIR__ . '/languages/classes/LanguageUk.php',
'LanguageUz' => __DIR__ . '/languages/classes/LanguageUz.php',
'LanguageWa' => __DIR__ . '/languages/classes/LanguageWa.php',
'LanguageYue' => __DIR__ . '/languages/classes/LanguageYue.php',

View file

@ -109,7 +109,8 @@ class LocalisationCache {
static public $allKeys = [
'fallback', 'namespaceNames', 'bookstoreList',
'magicWords', 'messages', 'rtl', 'capitalizeAllNouns', 'digitTransformTable',
'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
'separatorTransformTable', 'minimumGroupingDigits',
'fallback8bitEncoding', 'linkPrefixExtension',
'linkTrail', 'linkPrefixCharset', 'namespaceAliases',
'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
'defaultDateFormat', 'extraUserToggles', 'specialPageAliases',

View file

@ -3313,12 +3313,25 @@ class Language {
*/
function commafy( $number ) {
$digitGroupingPattern = $this->digitGroupingPattern();
$minimumGroupingDigits = $this->minimumGroupingDigits();
if ( $number === null ) {
return '';
}
if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
// default grouping is at thousands, use the same for ###,###,### pattern too.
// Default grouping is at thousands, use the same for ###,###,### pattern too.
// In some languages it's conventional not to insert a thousands separator
// in numbers that are four digits long (1000-9999).
if ( $minimumGroupingDigits ) {
// Number of '#' characters after last comma in the grouping pattern.
// The pattern is hardcoded here, but this would vary for different patterns.
$primaryGroupingSize = 3;
// Maximum length of a number to suppress digit grouping for.
$maximumLength = $minimumGroupingDigits + $primaryGroupingSize - 1;
if ( preg_match( '/^\-?\d{1,' . $maximumLength . '}(\.\d+)?$/', $number ) ) {
return $number;
}
}
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
} else {
// Ref: http://cldr.unicode.org/translation/number-patterns
@ -3381,6 +3394,13 @@ class Language {
return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
}
/**
* @return int|null
*/
function minimumGroupingDigits() {
return self::$dataCache->getItem( $this->mCode, 'minimumGroupingDigits' );
}
/**
* Take a list of strings and build a locale-friendly comma-separated
* list, using the local comma-separator message.

View file

@ -53,20 +53,4 @@ class LanguageBe_tarask extends Language {
return $s;
}
/**
* Four-digit number should be without group commas (spaces)
* So "1 234 567", "12 345" but "1234"
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( preg_match( '/^-?\d{1,4}(\.\d*)?$/', $_ ) ) {
return $_;
} else {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
}
}
}

View file

@ -1,45 +0,0 @@
<?php
/**
* Bulgarian (Български) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Bulgarian (Български)
*
* @ingroup Language
*/
class LanguageBg extends Language {
/**
* ISO number formatting: 123 456 789,99.
* Avoid tripple grouping by numbers with whole part up to 4 digits.
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -1,42 +0,0 @@
<?php
/**
* Spanish (español) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Spanish (español)
*
* @ingroup Language
*/
class LanguageEs extends Language {
/**
* @param string $_
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^-?\d{1,4}(\.\d+)?$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -1,42 +0,0 @@
<?php
/**
* Estonian (eesti) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Estonian (eesti)
*
* @ingroup Language
*/
class LanguageEt extends Language {
/**
* Avoid grouping whole numbers between -9999 to 9999
* @param string $_
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\-?\d{1,4}(\.\d+)?$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -78,19 +78,4 @@ class LanguageHy extends Language {
}
return $word;
}
/**
* Armenian numeric format is "12 345,67" but "1234,56"
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -75,19 +75,4 @@ class LanguageKaa extends Language {
return parent::lcfirst( $string );
}
/**
* Avoid grouping whole numbers between 0 to 9999
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -773,19 +773,4 @@ class LanguageKk_cyrl extends Language {
return $lastLetter;
}
/**
* Avoid grouping whole numbers between 0 to 9999
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -163,21 +163,6 @@ class LanguageKsh extends Language {
return $word;
}
/**
* Avoid grouping whole numbers between 0 to 9999
*
* @param string $_
*
* @return string
*/
public function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
/**
* Handle cases of (1, other, 0) or (1, other)
*

View file

@ -227,7 +227,7 @@ class KuConverter extends LanguageConverter {
*
* @ingroup Language
*/
class LanguageKu extends LanguageKu_ku {
class LanguageKu extends Language {
function __construct() {
parent::__construct();

View file

@ -1,45 +0,0 @@
<?php
/**
* Kurdish specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Kurdish
*
* @ingroup Language
*/
// phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
class LanguageKu_ku extends Language {
/**
* Avoid grouping whole numbers between 0 to 9999
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\d{1,4}$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -1,41 +0,0 @@
<?php
/**
* Polish (polski) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Polish (polski)
*
* @ingroup Language
*/
class LanguagePl extends Language {
/**
* @param string $_
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\-?\d{1,4}(\.\d+)?$/', $_ ) ) {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
} else {
return $_;
}
}
}

View file

@ -1,50 +0,0 @@
<?php
/**
* Russian (русский язык) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Russian (русский язык)
*
* You can contact:
* Alexander Sigachov (alexander.sigachov at Googgle Mail)
* Amir E. Aharoni (amir.aharoni@mail.huji.ac.il)
*
* @ingroup Language
*/
class LanguageRu extends Language {
/**
* Four-digit number should be without group commas (spaces)
* See manual of style at https://ru.wikipedia.org/wiki/Википедия:Оформление_статей
* So "1 234 567", "12 345" but "1234"
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( preg_match( '/^-?\d{1,4}(\.\d*)?$/', $_ ) ) {
return $_;
} else {
return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
}
}
}

View file

@ -1,48 +0,0 @@
<?php
/**
* Ukrainian (українська мова) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* Ukrainian (українська мова)
*
* @ingroup Language
*/
class LanguageUk extends Language {
/**
* Ukrainian numeric format is "12 345,67" but "1234,56"
*
* @param string $_
*
* @return string
*/
function commafy( $_ ) {
if ( !preg_match( '/^\-?\d{1,4}(\.\d+)?$/', $_ ) ) {
return strrev( (string)preg_replace(
'/(\d{3})(?=\d)(?!\d*\.)/',
'$1,',
strrev( $_ )
) );
} else {
return $_;
}
}
}

View file

@ -236,6 +236,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0", # nbsp
'.' => ','
];
$minimumGroupingDigits = 2;
$linkTrail = '/^([абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]+)(.*)$/sDu';

View file

@ -218,3 +218,4 @@ $bookstoreList = [
$linkTrail = '/^([a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]+)(.*)$/sDu';
$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ];
$minimumGroupingDigits = 2;

View file

@ -299,4 +299,6 @@ $dateFormats = [
];
$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ];
$minimumGroupingDigits = 2;
$linkTrail = '/^([a-záéíóúñ]+)(.*)$/sDu';

View file

@ -259,6 +259,8 @@ $magicWords = [
];
$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ];
$minimumGroupingDigits = 2;
$linkTrail = '/^([äöõšüža-z]+)(.*)$/sDu';
$datePreferences = [

View file

@ -12,6 +12,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0", # nbsp
'.' => ','
];
$minimumGroupingDigits = 2;
$fallback8bitEncoding = 'UTF-8';

View file

@ -14,6 +14,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0",
'.' => ',',
];
$minimumGroupingDigits = 2;
$fallback8bitEncoding = 'windows-1254';

View file

@ -22,6 +22,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0",
'.' => ',',
];
$minimumGroupingDigits = 2;
$fallback8bitEncoding = 'windows-1251';

View file

@ -95,6 +95,8 @@ $namespaceAliases = [
];
$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ];
$minimumGroupingDigits = 2;
$linkTrail = '/^([äöüėëijßəğåůæœça-z]+)(.*)$/sDu';
// Remove German aliases

View file

@ -51,6 +51,7 @@ $namespaceAliases = [
];
$separatorTransformTable = [ ',' => '.', '.' => ',' ];
$minimumGroupingDigits = 2;
$specialPageAliases = [
'Allmessages' => [ 'Hemû_Peyam' ],

View file

@ -119,10 +119,12 @@ $dateFormats = [
];
$fallback8bitEncoding = 'iso-8859-2';
$separatorTransformTable = [
',' => "\xc2\xa0", // T4749
'.' => ','
];
$minimumGroupingDigits = 2;
$linkTrail = '/^([a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]+)(.*)$/sDu';

View file

@ -420,6 +420,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0", # nbsp
'.' => ','
];
$minimumGroupingDigits = 2;
$fallback8bitEncoding = 'windows-1251';
$linkPrefixExtension = false;

View file

@ -58,6 +58,7 @@ $separatorTransformTable = [
',' => "\xc2\xa0", # nbsp
'.' => ','
];
$minimumGroupingDigits = 2;
$fallback8bitEncoding = 'windows-1251';
$linkPrefixExtension = true;

View file

@ -76,7 +76,7 @@ class LanguagePlTest extends LanguageClassesTestCase {
}
/**
* @covers LanguagePl::commafy()
* @covers Language::commafy()
* @dataProvider provideCommafyData
*/
public function testCommafy( $number, $numbersWithCommas ) {

View file

@ -6,9 +6,6 @@
* @file
*/
/**
* @covers LanguageRu
*/
class LanguageRuTest extends LanguageClassesTestCase {
/**
* @dataProvider providePlural

View file

@ -6,9 +6,6 @@
* @file
*/
/**
* @covers LanguageUk
*/
class LanguageUkTest extends LanguageClassesTestCase {
/**
* @dataProvider providePlural