Fix bug in prefixing scheme
As Bawolff pointed out at [[mw:User talk:Simetrical/Collation]], the prefixing scheme I was using meant that the page "Z" with sort key of "F" would sort after a page named "A" with a sort key of "FF", since the first one's raw sort key would compute to "FZ", and the second's would compute to "FFA". I've fixed this by separating the prefix from the unprefixed part by a null byte (cl_sortkey is eventually going to be totally binary anyway, may as well start now).
This commit is contained in:
parent
98a7f4d756
commit
2ffa5e4876
5 changed files with 28 additions and 16 deletions
|
|
@ -312,7 +312,7 @@ class CategoryViewer {
|
|||
$count = 0;
|
||||
foreach ( $res as $row ) {
|
||||
$title = Title::newFromRow( $row );
|
||||
$rawSortkey = $row->cl_sortkey_prefix . $title->getCategorySortkey();
|
||||
$rawSortkey = $title->getCategorySortkey( $row->cl_sortkey_prefix );
|
||||
|
||||
if ( ++$count > $this->limit ) {
|
||||
# We've reached the one extra which shows that there
|
||||
|
|
|
|||
|
|
@ -457,7 +457,7 @@ class LinksUpdate {
|
|||
# order or such.
|
||||
$prefix = $sortkey;
|
||||
$sortkey = $wgContLang->convertToSortkey(
|
||||
$prefix . $this->mTitle->getCategorySortkey() );
|
||||
$this->mTitle->getCategorySortkey( $prefix ) );
|
||||
}
|
||||
|
||||
$arr[] = array(
|
||||
|
|
|
|||
|
|
@ -4139,20 +4139,29 @@ class Title {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns what the default sort key for categories would be, if
|
||||
* {{defaultsort:}} isn't used. This is the same as getText() for
|
||||
* categories, and for everything if $wgCategoryPrefixedDefaultSortkey is
|
||||
* false; otherwise it's the same as getPrefixedText().
|
||||
* Returns the raw sort key to be used for categories, with the specified
|
||||
* prefix. This will be fed to Language::convertToSortkey() to get a
|
||||
* binary sortkey that can be used for actual sorting.
|
||||
*
|
||||
* @param $prefix string The prefix to be used, specified using
|
||||
* {{defaultsort:}} or like [[Category:Foo|prefix]]. Empty for no
|
||||
* prefix.
|
||||
* @return string
|
||||
*/
|
||||
public function getCategorySortkey() {
|
||||
public function getCategorySortkey( $prefix = '' ) {
|
||||
global $wgCategoryPrefixedDefaultSortkey;
|
||||
if ( $this->getNamespace() == NS_CATEGORY
|
||||
|| !$wgCategoryPrefixedDefaultSortkey ) {
|
||||
return $this->getText();
|
||||
$unprefixed = $this->getText();
|
||||
} else {
|
||||
return $this->getPrefixedText();
|
||||
$unprefixed = $this->getPrefixedText();
|
||||
}
|
||||
if ( $prefix !== '' ) {
|
||||
# Separate with a null byte, so the unprefixed part is only used as
|
||||
# a tiebreaker when two pages have the exact same prefix -- null
|
||||
# sorts before everything else (hopefully).
|
||||
return "$prefix\0$unprefixed";
|
||||
}
|
||||
return $unprefixed;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2938,10 +2938,10 @@ class Language {
|
|||
/**
|
||||
* Given a string, convert it to a (hopefully short) key that can be used
|
||||
* for efficient sorting. A binary sort according to the sortkeys
|
||||
* corresponds to a logical sort of the corresponding strings. Applying
|
||||
* this to cl_sortkey_prefix concatenated with the page title (possibly
|
||||
* with namespace prefix, depending on $wgCategoryPrefixedDefaultSortkey)
|
||||
* gives you cl_sortkey.
|
||||
* corresponds to a logical sort of the corresponding strings. Current
|
||||
* code expects that a null character should sort before all others, but
|
||||
* has no other particular expectations (and that one can be changed if
|
||||
* necessary).
|
||||
*
|
||||
* @param string $string UTF-8 string
|
||||
* @return string Binary sortkey
|
||||
|
|
@ -2988,6 +2988,9 @@ class Language {
|
|||
* @return string UTF-8 string corresponding to the first letter of input
|
||||
*/
|
||||
public function firstLetterForLists( $string ) {
|
||||
if ( $string[0] == "\0" ) {
|
||||
$string = substr( $string, 1 );
|
||||
}
|
||||
return strtoupper( mb_substr( $string, 0, 1 ) );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,11 +57,10 @@ TEXT;
|
|||
$dbw->begin();
|
||||
foreach ( $res as $row ) {
|
||||
$title = Title::newFromRow( $row );
|
||||
$rawSortkey = $title->getCategorySortkey();
|
||||
if ( $row->cl_collation == 0 ) {
|
||||
# This is an old-style row, so the sortkey needs to be
|
||||
# converted.
|
||||
if ( $row->cl_sortkey == $rawSortkey ) {
|
||||
if ( $row->cl_sortkey == $title->getCategorySortkey() ) {
|
||||
$prefix = '';
|
||||
} else {
|
||||
# Custom sortkey, use it as a prefix
|
||||
|
|
@ -82,7 +81,8 @@ TEXT;
|
|||
$dbw->update(
|
||||
'categorylinks',
|
||||
array(
|
||||
'cl_sortkey' => $wgContLang->convertToSortkey( $prefix . $rawSortkey ),
|
||||
'cl_sortkey' => $wgContLang->convertToSortkey(
|
||||
$title->getCategorySortkey( $prefix ) ),
|
||||
'cl_sortkey_prefix' => $prefix,
|
||||
'cl_collation' => $wgCollationVersion,
|
||||
'cl_type' => $type,
|
||||
|
|
|
|||
Loading…
Reference in a new issue