IcuCollation::$tailoringFirstLetters: implement letter removal

This is necessary for Swedish, where 'Þ' ("thorn") - considered a
separate letter by default in the first-letters-root.ser file - is
sorted as 'th', causing unexpected output on category pages - words
starting with 'th'..'u' were placed under a heading with the thorn.

There were three obvious ways to do this:
* somehow include information that this letter is to be removed in the
  string itself, as in 'sv' => array( "Å", "Ä", "Ö", "-Þ" ) - could
  potentially clash with valid uses
* create a separate array other than $tailoringFirstLetters to store
  this information - would cause the data to be fragmented all over
  the file
* include information about letters to be removed in a separate key
  "linked" to the regular one, as in '-sv' => array( "Þ" ) - I see no
  obvious downsides, so this is what I ended up doing

Bug: 45446
Change-Id: I57e07a2027c391c5baa767a68f4409b9de7b4618
This commit is contained in:
MatmaRex 2013-03-11 22:24:09 +01:00
parent 919bfc1057
commit ae38b340dc

View file

@ -181,7 +181,10 @@ class IcuCollation extends Collation {
/**
* Additional characters (or character groups) to be considered separate
* letters for given languages, compared to the data stored in the
* letters for given languages, or to be removed from the list of such
* letters (denoted by keys starting with '-').
*
* These are additions to (or subtractions from) the data stored in the
* first-letters-root.ser file (which among others includes full basic latin,
* cyrillic and greek alphabets).
*
@ -262,6 +265,7 @@ class IcuCollation extends Collation {
'sq' => array( "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ),
'sr' => array(),
'sv' => array( "Å", "Ä", "Ö" ),
'-sv' => array( "Þ" ), // sorted as "th" in Swedish, causing unexpected output - bug 45446
'tk' => array( "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ),
'tl' => array( "Ñ", "Ng" ),
'tr' => array( "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ),
@ -352,7 +356,12 @@ class IcuCollation extends Collation {
if ( isset ( self::$tailoringFirstLetters[$this->locale] ) ) {
$letters = wfGetPrecompiledData( "first-letters-root.ser" );
// Append additional characters
$letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
// Remove unnecessary ones, if any
if ( isset( self::$tailoringFirstLetters[ '-' . $this->locale ] ) ) {
$letters = array_diff( $letters, self::$tailoringFirstLetters[ '-' . $this->locale ] );
}
} else {
$letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" );
if ( $letters === false ) {