2013-03-24 03:09:43 +00:00
|
|
|
|
<?php
|
2013-10-24 10:54:02 +00:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Class CollationTest
|
|
|
|
|
|
* @covers Collation
|
|
|
|
|
|
* @covers IcuCollation
|
|
|
|
|
|
* @covers IdentityCollation
|
|
|
|
|
|
* @covers UppercaseCollation
|
|
|
|
|
|
*/
|
2013-03-24 03:09:43 +00:00
|
|
|
|
class CollationTest extends MediaWikiLangTestCase {
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Test to make sure, that if you
|
|
|
|
|
|
* have "X" and "XY", the binary
|
|
|
|
|
|
* sortkey also has "X" being a
|
|
|
|
|
|
* prefix of "XY". Our collation
|
|
|
|
|
|
* code makes this assumption.
|
|
|
|
|
|
*
|
2014-04-17 18:43:42 +00:00
|
|
|
|
* @param string $lang Language code for collator
|
2017-12-28 15:06:10 +00:00
|
|
|
|
* @param string $base
|
2014-04-17 18:43:42 +00:00
|
|
|
|
* @param string $extended String containing base as a prefix.
|
2013-03-24 03:09:43 +00:00
|
|
|
|
*
|
|
|
|
|
|
* @dataProvider prefixDataProvider
|
|
|
|
|
|
*/
|
2013-10-23 22:51:31 +00:00
|
|
|
|
public function testIsPrefix( $lang, $base, $extended ) {
|
2013-03-24 03:09:43 +00:00
|
|
|
|
$cp = Collator::create( $lang );
|
|
|
|
|
|
$cp->setStrength( Collator::PRIMARY );
|
|
|
|
|
|
$baseBin = $cp->getSortKey( $base );
|
|
|
|
|
|
// Remove sortkey terminator
|
|
|
|
|
|
$baseBin = rtrim( $baseBin, "\0" );
|
|
|
|
|
|
$extendedBin = $cp->getSortKey( $extended );
|
|
|
|
|
|
$this->assertStringStartsWith( $baseBin, $extendedBin, "$base is not a prefix of $extended" );
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-10-24 10:54:02 +00:00
|
|
|
|
public static function prefixDataProvider() {
|
2016-02-17 09:09:32 +00:00
|
|
|
|
return [
|
|
|
|
|
|
[ 'en', 'A', 'AA' ],
|
|
|
|
|
|
[ 'en', 'A', 'AAA' ],
|
|
|
|
|
|
[ 'en', 'Д', 'ДЂ' ],
|
|
|
|
|
|
[ 'en', 'Д', 'ДA' ],
|
2013-03-24 03:09:43 +00:00
|
|
|
|
// 'Ʒ' should expand to 'Z ' (note space).
|
2016-02-17 09:09:32 +00:00
|
|
|
|
[ 'fi', 'Z', 'Ʒ' ],
|
2013-03-24 03:09:43 +00:00
|
|
|
|
// 'Þ' should expand to 'th'
|
2016-02-17 09:09:32 +00:00
|
|
|
|
[ 'sv', 't', 'Þ' ],
|
2013-03-24 03:09:43 +00:00
|
|
|
|
// Javanese is a limited use alphabet, so should have 3 bytes
|
|
|
|
|
|
// per character, so do some tests with it.
|
2016-02-17 09:09:32 +00:00
|
|
|
|
[ 'en', 'ꦲ', 'ꦲꦤ' ],
|
|
|
|
|
|
[ 'en', 'ꦲ', 'ꦲД' ],
|
|
|
|
|
|
[ 'en', 'A', 'Aꦲ' ],
|
|
|
|
|
|
];
|
2013-03-24 03:09:43 +00:00
|
|
|
|
}
|
2013-04-26 12:00:22 +00:00
|
|
|
|
|
2013-03-24 03:09:43 +00:00
|
|
|
|
/**
|
|
|
|
|
|
* Opposite of testIsPrefix
|
|
|
|
|
|
*
|
|
|
|
|
|
* @dataProvider notPrefixDataProvider
|
|
|
|
|
|
*/
|
2013-10-23 22:51:31 +00:00
|
|
|
|
public function testNotIsPrefix( $lang, $base, $extended ) {
|
2013-03-24 03:09:43 +00:00
|
|
|
|
$cp = Collator::create( $lang );
|
|
|
|
|
|
$cp->setStrength( Collator::PRIMARY );
|
|
|
|
|
|
$baseBin = $cp->getSortKey( $base );
|
|
|
|
|
|
// Remove sortkey terminator
|
|
|
|
|
|
$baseBin = rtrim( $baseBin, "\0" );
|
|
|
|
|
|
$extendedBin = $cp->getSortKey( $extended );
|
|
|
|
|
|
$this->assertStringStartsNotWith( $baseBin, $extendedBin, "$base is a prefix of $extended" );
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-10-24 10:54:02 +00:00
|
|
|
|
public static function notPrefixDataProvider() {
|
2016-02-17 09:09:32 +00:00
|
|
|
|
return [
|
|
|
|
|
|
[ 'en', 'A', 'B' ],
|
|
|
|
|
|
[ 'en', 'AC', 'ABC' ],
|
|
|
|
|
|
[ 'en', 'Z', 'Ʒ' ],
|
|
|
|
|
|
[ 'en', 'A', 'ꦲ' ],
|
|
|
|
|
|
];
|
2013-03-24 03:09:43 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Test correct first letter is fetched.
|
|
|
|
|
|
*
|
2014-04-17 18:43:42 +00:00
|
|
|
|
* @param string $collation Collation name (aka uca-en)
|
|
|
|
|
|
* @param string $string String to get first letter of
|
|
|
|
|
|
* @param string $firstLetter Expected first letter.
|
2013-03-24 03:09:43 +00:00
|
|
|
|
*
|
|
|
|
|
|
* @dataProvider firstLetterProvider
|
|
|
|
|
|
*/
|
2013-10-23 22:51:31 +00:00
|
|
|
|
public function testGetFirstLetter( $collation, $string, $firstLetter ) {
|
2021-08-24 19:12:39 +00:00
|
|
|
|
$col = $this->getServiceContainer()->getCollationFactory()->makeCollation( $collation );
|
2013-03-24 03:09:43 +00:00
|
|
|
|
$this->assertEquals( $firstLetter, $col->getFirstLetter( $string ) );
|
|
|
|
|
|
}
|
2013-04-26 12:00:22 +00:00
|
|
|
|
|
2019-10-09 18:24:07 +00:00
|
|
|
|
public function firstLetterProvider() {
|
2016-02-17 09:09:32 +00:00
|
|
|
|
return [
|
|
|
|
|
|
[ 'uppercase', 'Abc', 'A' ],
|
|
|
|
|
|
[ 'uppercase', 'abc', 'A' ],
|
|
|
|
|
|
[ 'identity', 'abc', 'a' ],
|
|
|
|
|
|
[ 'uca-en', 'abc', 'A' ],
|
|
|
|
|
|
[ 'uca-en', ' ', ' ' ],
|
|
|
|
|
|
[ 'uca-en', 'Êveryone', 'E' ],
|
|
|
|
|
|
[ 'uca-vi', 'Êveryone', 'Ê' ],
|
2013-03-24 03:09:43 +00:00
|
|
|
|
// Make sure thorn is not a first letter.
|
2016-02-17 09:09:32 +00:00
|
|
|
|
[ 'uca-sv', 'The', 'T' ],
|
|
|
|
|
|
[ 'uca-sv', 'Å', 'Å' ],
|
|
|
|
|
|
[ 'uca-hu', 'dzsdo', 'Dzs' ],
|
|
|
|
|
|
[ 'uca-hu', 'dzdso', 'Dz' ],
|
|
|
|
|
|
[ 'uca-hu', 'CSD', 'Cs' ],
|
|
|
|
|
|
[ 'uca-root', 'CSD', 'C' ],
|
|
|
|
|
|
[ 'uca-fi', 'Ǥ', 'G' ],
|
|
|
|
|
|
[ 'uca-fi', 'Ŧ', 'T' ],
|
|
|
|
|
|
[ 'uca-fi', 'Ʒ', 'Z' ],
|
|
|
|
|
|
[ 'uca-fi', 'Ŋ', 'N' ],
|
2017-04-28 04:52:49 +00:00
|
|
|
|
[ 'uppercase-ba', 'в', 'В' ],
|
2016-02-17 09:09:32 +00:00
|
|
|
|
];
|
2013-03-24 03:09:43 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|