Adding support for numeric collation when using UCA collations

To use, add '-u-kn' to the end of a collation name and set it as
the value for $wgCategoryCollation.

Bug: T8948
Change-Id: Ica7908daf80624fa2648127114d01665e96234c0
This commit is contained in:
Kaldari 2016-07-14 20:47:52 -07:00
parent 58ea3e0902
commit 52c1b00dc0
4 changed files with 32 additions and 2 deletions

View file

@ -53,6 +53,8 @@ abstract class Collation {
return new IdentityCollation;
case 'uca-default':
return new IcuCollation( 'root' );
case 'uca-default-u-kn':
return new IcuCollation( 'root-u-kn' );
case 'xx-uca-ckb':
return new CollationCkb;
case 'xx-uca-et':

View file

@ -36,6 +36,9 @@ class IcuCollation extends Collation {
/** @var Language */
protected $digitTransformLanguage;
/** @var boolean */
private $useNumericCollation = false;
/** @var array */
private $firstLetterData;
@ -197,6 +200,15 @@ class IcuCollation extends Collation {
$this->primaryCollator = Collator::create( $locale );
$this->primaryCollator->setStrength( Collator::PRIMARY );
// If the special suffix for numeric collation is present, turn on numeric collation.
if ( substr( $locale, -5, 5 ) === '-u-kn' ) {
$this->useNumericCollation = true;
// Strip off the special suffix so it doesn't trip up fetchFirstLetterData().
$this->locale = substr( $this->locale, 0, -5 );
$this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
$this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
}
}
public function getSortKey( $string ) {
@ -213,8 +225,9 @@ class IcuCollation extends Collation {
return '';
}
// Check for CJK
$firstChar = mb_substr( $string, 0, 1, 'UTF-8' );
// If the first character is a CJK character, just return that character.
if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
return $firstChar;
}
@ -232,7 +245,19 @@ class IcuCollation extends Collation {
// Before the first letter
return '';
}
return $this->getLetterByIndex( $min );
$sortLetter = $this->getLetterByIndex( $min );
if ( $this->useNumericCollation ) {
// If the sort letter is a number, return '09' (or localized equivalent).
// ASCII value of 0 is 48. ASCII value of 9 is 57.
// Note that this also applies to non-Arabic numerals since they are
// mapped to Arabic numeral sort letters. For example, ২ sorts as 2.
if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
$sortLetter = wfMessage( 'category-header-numerals' )->numParams( 0, 9 )->text();
}
}
return $sortLetter;
}
/**
@ -408,6 +433,7 @@ class IcuCollation extends Collation {
}
/**
* Test if a code point is a CJK (Chinese, Japanese, Korean) character
* @since 1.16.3
*/
public static function isCjk( $codepoint ) {

View file

@ -137,6 +137,7 @@
"noindex-category": "Noindexed pages",
"broken-file-category": "Pages with broken file links",
"categoryviewer-pagedlinks": "($1) ($2)",
"category-header-numerals": "$1$2",
"about": "About",
"article": "Content page",
"newwindow": "(opens in new window)",

View file

@ -318,6 +318,7 @@
"noindex-category": "Name of the [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages with the <nowiki>__NOINDEX__</nowiki> behavior switch are listed.\n\nFor description of this behavior switch see [[mw:Special:MyLanguage/Help:Magic_words#Behavior_switches|MediaWiki]].\n\nSee also:\n* {{msg-mw|Noindex-category-desc}}",
"broken-file-category": "Name of [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages that embed files that do not exist (\"broken images\") are listed.\n\nSee also:\n* {{msg-mw|Broken-file-category-desc}}",
"categoryviewer-pagedlinks": "{{Optional}}\nThe pagination links in category viewer. Parameters:\n* $1 - the previous link, uses {{msg-mw|Prevn}}\n* $2 - the next link, uses {{msg-mw|Nextn}}",
"category-header-numerals": "{{Optional}}\nA header for all pages whose titles start with a number. This is used on category pages. This should only be translated if your language uses a different method to indicate a range of numbers (other than a dash).\n* $1 - 0 (or localized equivalent)\n* $2 9 (or localized equivalent)",
"about": "{{Identical|About}}",
"article": "A 'content page' is a page that forms part of the purpose of the wiki. It includes the main page and pages in the main namespace and any other namespaces that are included when the wiki is customised. For example on Wikimedia Commons 'content pages' include pages in the file and category namespaces. On Wikinews 'content pages' include pages in the Portal namespace. For technical definition of 'content namespaces' see [[mw:Manual:Using_custom_namespaces#Content_namespaces|MediaWiki]].\n\nPossible alternatives to the word 'content' are 'subject matter' or 'wiki subject' or 'wiki purpose'.\n\n{{Identical|Content page}}",
"newwindow": "Below the edit form, next to \"{{msg-mw|Edithelp}}\".",