wiki.techinc.nl/tests/phpunit/includes/languages/LanguageCrhTest.php
Timo Tijhof 7c39f76452 Move Language subclasses to includes/
Depending on which namespace we want these classes to have after
T166010 they could either stay in includes/languages/ (plural) in
their own MediaWiki\Languages\-namespace dedicated to Language
subclasses, or they could go in into a subdirectory like
`includes/language/languages/` if we want to keep them in the same
top-level namespace as other Language classes and services, but in
a more nested namespace.

For now, I've made the smaller change and kept the Language subclasses
in their own directory directly under includes/, not nested further.

Bug: T225756
Change-Id: I01015424707b442853879fd50c97f00215e5c2fa
2021-08-04 23:44:46 +01:00

142 lines
6.2 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
/**
* @group Language
* @covers CrhConverter
* @covers MediaWiki\Languages\Data\CrhExceptions
*/
class LanguageCrhTest extends LanguageClassesTestCase {
/**
* Test individual words and test minimal contextual transforms
* by creating test strings `<cyrillic> <latin>` and
* `<latin> <cyrillic>` and then converting to all variants.
*
* @dataProvider provideAutoConvertToAllVariantsByWord
* @covers Language::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariantsByWord( $cyrl, $lat ) {
$value = $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $cyrl . ' ' . $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $lat . ' ' . $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByWord() {
return [
// general words, covering more of the alphabet
[ 'рузгярнынъ', 'ruzgârnıñ' ], [ 'Париж', 'Parij' ], [ 'чёкюч', 'çöküç' ],
[ 'элифбени', 'elifbeni' ], [ 'полициясы', 'politsiyası' ], [ 'хусусында', 'hususında' ],
[ 'акъшамларны', 'aqşamlarnı' ], [ 'опькеленюв', 'öpkelenüv' ],
[ 'кулюмсиреди', 'külümsiredi' ], [ 'айтмайджагъым', 'aytmaycağım' ],
[ 'козьяшсыз', 'közyaşsız' ],
// exception words
[ 'инструменталь', 'instrumental' ], [ 'гургуль', 'gürgül' ], [ 'тюшюнмемек', 'tüşünmemek' ],
// specific problem words
[ 'куню', 'künü' ], [ 'сюргюнлиги', 'sürgünligi' ], [ 'озю', 'özü' ], [ 'этти', 'etti' ],
[ 'эсас', 'esas' ], [ 'дёрт', 'dört' ], [ 'кельди', 'keldi' ], [ 'км²', 'km²' ],
[ 'юзь', 'yüz' ], [ 'АКъШ', 'AQŞ' ], [ 'ШСДжБнен', 'ŞSCBnen' ], [ 'июль', 'iyül' ],
[ 'ишгъаль', 'işğal' ], [ 'ишгъальджилерине', 'işğalcilerine' ], [ 'район', 'rayon' ],
[ 'районынынъ', 'rayonınıñ' ], [ 'Ногъай', 'Noğay' ], [ 'Юрьтю', 'Yürtü' ],
[ 'ватандан', 'vatandan' ], [ 'ком-кок', 'köm-kök' ], [ 'АКЪКЪЫ', 'AQQI' ],
[ 'ДАГЪГЪА', 'DAĞĞA' ], [ '13-юнджи', '13-ünci' ], [ 'ДЖУРЬМЕК', 'CÜRMEK' ],
[ 'джумлеси', 'cümlesi' ], [ 'ильи', 'ilyi' ], [ 'Ильи', 'İlyi' ], [ 'бруцел', 'brutsel' ],
[ 'коцюб', 'kotsüb' ], [ 'плацен', 'platsen' ], [ 'эпицентр', 'epitsentr' ],
// -tsin- words
[ 'кетсин', 'ketsin' ], [ 'кирлетсин', 'kirletsin' ], [ 'этсин', 'etsin' ],
[ 'етсин', 'yetsin' ], [ 'этсинлерми', 'etsinlermi' ], [ 'принцини', 'printsini' ],
[ 'медицина', 'meditsina' ], [ 'Щетсин', 'Şçetsin' ], [ 'Щекоцины', 'Şçekotsinı' ],
// regex pattern words
[ 'коюнден', 'köyünden' ], [ 'аньге', 'ange' ],
// multi part words
[ 'эки юз', 'eki yüz' ],
// affix patterns
[ 'койнинъ', 'köyniñ' ], [ 'Авджыкойде', 'Avcıköyde' ], [ 'экваториаль', 'ekvatorial' ],
[ 'Джанкой', 'Canköy' ], [ 'усть', 'üst' ], [ 'роль', 'rol' ], [ 'буюк', 'büyük' ],
[ 'джонк', 'cönk' ],
// Roman numerals vs Initials, part 1 - Roman numeral initials without spaces
[ 'А.Б.Дж.Д.М. Къадырова XII', 'A.B.C.D.M. Qadırova XII' ],
// Roman numerals vs Initials, part 2 - Roman numeral initials with spaces
[ 'Г. Х. Ы. В. X. Л. Меметов III', 'G. H. I. V. X. L. Memetov III' ],
// ALL CAPS, made up acronyms
[ 'НЪАБ', 'ÑAB' ], [ 'КЪЫДЖ', 'QIC' ], [ 'ГЪУК', 'ĞUK' ], [ 'ДЖОТ', 'COT' ], [ 'ДЖА', 'CA' ],
];
}
/**
* Run tests that require some context (like Roman numerals) or with
* many-to-one mappings, or other asymmetric results (like smart quotes)
*
* @dataProvider provideAutoConvertToAllVariantsByString
* @covers Language::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariantsByString( $result, $value ) {
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByString() {
return [
// Roman numerals and quotes, esp. single-letter Roman numerals at the end of a string
[
[
'crh' => 'VI,VII IX “dört” «дёрт» XI XII I V X L C D M',
'crh-cyrl' => 'VI,VII IX «дёрт» «дёрт» XI XII I V X L C D M',
'crh-latn' => 'VI,VII IX “dört” "dört" XI XII I V X L C D M',
],
'VI,VII IX “dört” «дёрт» XI XII I V X L C D M'
],
// Many-to-one mappings: many Cyrillic to one Latin
[
[
'crh' => 'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül',
'crh-cyrl' => 'шофер шофёр шофёр корбекул корьбекул корьбекуль корьбекуль',
'crh-latn' => 'şoför şoför şoför körbekül körbekül körbekül körbekül',
],
'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül'
],
// Many-to-one mappings: many Latin to one Cyrillic
[
[
'crh' => 'fevqülade fevqulade февкъульаде beyude beyüde бейуде',
'crh-cyrl' => 'февкъульаде февкъульаде февкъульаде бейуде бейуде бейуде',
'crh-latn' => 'fevqülade fevqulade fevqulade beyude beyüde beyüde',
],
'fevqülade fevqulade февкъульаде beyude beyüde бейуде'
],
];
}
}