2016-07-27 14:43:01 +00:00
|
|
|
|
<?php
|
|
|
|
|
|
/**
|
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
|
*
|
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
|
*
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
|
*
|
|
|
|
|
|
* @file
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2021-08-24 19:12:39 +00:00
|
|
|
|
use MediaWiki\Languages\LanguageFactory;
|
|
|
|
|
|
|
2016-07-27 14:43:01 +00:00
|
|
|
|
/**
|
|
|
|
|
|
* Collation that orders text with numbers "naturally", so that 'Foo 1' < 'Foo 2' < 'Foo 12'.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Note that this only works in terms of sequences of digits, and the behavior for decimal fractions
|
|
|
|
|
|
* or pretty-formatted numbers may be unexpected.
|
|
|
|
|
|
*
|
2016-10-29 08:29:11 +00:00
|
|
|
|
* Digits will be based on the wiki's content language settings. If
|
2017-10-07 16:53:04 +00:00
|
|
|
|
* you change the content language of a wiki you will need to run
|
2016-10-29 08:29:11 +00:00
|
|
|
|
* updateCollation.php --force. Only English (ASCII 0-9) and the
|
|
|
|
|
|
* localized version will be counted. Localized digits from other languages
|
|
|
|
|
|
* or weird unicode digit equivalents (e.g. 4, 𝟜, ⓸ , ⁴, etc) will not count.
|
|
|
|
|
|
*
|
2016-07-27 14:43:01 +00:00
|
|
|
|
* @since 1.28
|
|
|
|
|
|
*/
|
|
|
|
|
|
class NumericUppercaseCollation extends UppercaseCollation {
|
2016-10-29 08:29:11 +00:00
|
|
|
|
|
|
|
|
|
|
/**
|
2020-04-07 21:38:17 +00:00
|
|
|
|
* @var Language How to convert digits (usually the content language)
|
2016-10-29 08:29:11 +00:00
|
|
|
|
*/
|
|
|
|
|
|
private $digitTransformLang;
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2021-08-24 19:12:39 +00:00
|
|
|
|
* @param LanguageFactory $languageFactory
|
|
|
|
|
|
* @param string|Language $digitTransformLang How to convert digits.
|
2016-10-29 08:29:11 +00:00
|
|
|
|
* For example, if given language "my" than ၇ is treated like 7.
|
2021-03-30 19:02:21 +00:00
|
|
|
|
* It is expected that usually this is given the content language.
|
2016-10-29 08:29:11 +00:00
|
|
|
|
*/
|
2021-03-30 19:02:21 +00:00
|
|
|
|
public function __construct(
|
2021-08-24 19:12:39 +00:00
|
|
|
|
LanguageFactory $languageFactory,
|
|
|
|
|
|
$digitTransformLang
|
2021-03-30 19:02:21 +00:00
|
|
|
|
) {
|
2021-08-24 19:12:39 +00:00
|
|
|
|
$this->digitTransformLang = $digitTransformLang instanceof Language
|
|
|
|
|
|
? $digitTransformLang
|
|
|
|
|
|
: $languageFactory->getLanguage( $digitTransformLang );
|
|
|
|
|
|
parent::__construct( $languageFactory );
|
2016-10-29 08:29:11 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2016-07-27 14:43:01 +00:00
|
|
|
|
public function getSortKey( $string ) {
|
|
|
|
|
|
$sortkey = parent::getSortKey( $string );
|
2016-10-29 08:29:11 +00:00
|
|
|
|
$sortkey = $this->convertDigits( $sortkey );
|
2016-07-27 14:43:01 +00:00
|
|
|
|
// For each sequence of digits, insert the digit '0' and then the length of the sequence
|
|
|
|
|
|
// (encoded in two bytes) before it. That's all folks, it sorts correctly now! The '0' ensures
|
|
|
|
|
|
// correct position (where digits would normally sort), then the length will be compared putting
|
|
|
|
|
|
// shorter numbers before longer ones; if identical, then the characters will be compared, which
|
|
|
|
|
|
// generates the correct results for numbers of equal length.
|
2021-02-10 22:31:02 +00:00
|
|
|
|
$sortkey = preg_replace_callback( '/\d+/', static function ( $matches ) {
|
2016-10-20 18:56:54 +00:00
|
|
|
|
// Strip any leading zeros
|
|
|
|
|
|
$number = ltrim( $matches[0], '0' );
|
|
|
|
|
|
$len = strlen( $number );
|
2016-07-27 14:43:01 +00:00
|
|
|
|
// This allows sequences of up to 65536 numeric characters to be handled correctly. One byte
|
|
|
|
|
|
// would allow only for 256, which doesn't feel future-proof.
|
|
|
|
|
|
$prefix = chr( floor( $len / 256 ) ) . chr( $len % 256 );
|
2016-10-20 18:56:54 +00:00
|
|
|
|
return '0' . $prefix . $number;
|
2016-07-27 14:43:01 +00:00
|
|
|
|
}, $sortkey );
|
|
|
|
|
|
|
|
|
|
|
|
return $sortkey;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2016-10-29 08:29:11 +00:00
|
|
|
|
/**
|
|
|
|
|
|
* Convert localized digits to english digits.
|
|
|
|
|
|
*
|
|
|
|
|
|
* based on Language::parseFormattedNumber but without commas.
|
|
|
|
|
|
*
|
2017-08-11 15:46:31 +00:00
|
|
|
|
* @param string $string sortkey to unlocalize digits of
|
|
|
|
|
|
* @return string Sortkey with all localized digits replaced with ASCII digits.
|
2016-10-29 08:29:11 +00:00
|
|
|
|
*/
|
|
|
|
|
|
private function convertDigits( $string ) {
|
|
|
|
|
|
$table = $this->digitTransformLang->digitTransformTable();
|
|
|
|
|
|
if ( $table ) {
|
|
|
|
|
|
$table = array_filter( $table );
|
|
|
|
|
|
$flipped = array_flip( $table );
|
|
|
|
|
|
// Some languages seem to also have commas in this table.
|
|
|
|
|
|
$flipped = array_filter( $flipped, 'is_numeric' );
|
|
|
|
|
|
$string = strtr( $string, $flipped );
|
|
|
|
|
|
}
|
|
|
|
|
|
return $string;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2016-07-27 14:43:01 +00:00
|
|
|
|
public function getFirstLetter( $string ) {
|
2016-10-29 08:29:11 +00:00
|
|
|
|
$convertedString = $this->convertDigits( $string );
|
|
|
|
|
|
|
|
|
|
|
|
if ( preg_match( '/^\d/', $convertedString ) ) {
|
|
|
|
|
|
return wfMessage( 'category-header-numerals' )
|
|
|
|
|
|
->numParams( 0, 9 )
|
|
|
|
|
|
->text();
|
2016-07-27 14:43:01 +00:00
|
|
|
|
} else {
|
|
|
|
|
|
return parent::getFirstLetter( $string );
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|