wiki.techinc.nl/tests/phpunit/includes/languages/LanguageCrhTest.php
Umherirrender fd516a98e1 Fix whitespaces after comma
Change-Id: Ide6de0a53661e6f650099d7b1f274a02699441df
2022-12-15 01:24:14 +01:00

142 lines
6.2 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
/**
* @group Language
* @covers CrhConverter
* @covers MediaWiki\Languages\Data\CrhExceptions
*/
class LanguageCrhTest extends LanguageClassesTestCase {
/**
* Test individual words and test minimal contextual transforms
* by creating test strings `<cyrillic> <latin>` and
* `<latin> <cyrillic>` and then converting to all variants.
*
* @dataProvider provideAutoConvertToAllVariantsByWord
* @covers Language::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariantsByWord( $cyrl, $lat ) {
$value = $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $cyrl . ' ' . $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
$value = $lat . ' ' . $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByWord() {
return [
// general words, covering more of the alphabet
[ 'рузгярнынъ', 'ruzgârnıñ' ], [ 'Париж', 'Parij' ], [ 'чёкюч', 'çöküç' ],
[ 'элифбени', 'elifbeni' ], [ 'полициясы', 'politsiyası' ], [ 'хусусында', 'hususında' ],
[ 'акъшамларны', 'aqşamlarnı' ], [ 'опькеленюв', 'öpkelenüv' ],
[ 'кулюмсиреди', 'külümsiredi' ], [ 'айтмайджагъым', 'aytmaycağım' ],
[ 'козьяшсыз', 'közyaşsız' ],
// exception words
[ 'инструменталь', 'instrumental' ], [ 'гургуль', 'gürgül' ], [ 'тюшюнмемек', 'tüşünmemek' ],
// specific problem words
[ 'куню', 'künü' ], [ 'сюргюнлиги', 'sürgünligi' ], [ 'озю', 'özü' ], [ 'этти', 'etti' ],
[ 'эсас', 'esas' ], [ 'дёрт', 'dört' ], [ 'кельди', 'keldi' ], [ 'км²', 'km²' ],
[ 'юзь', 'yüz' ], [ 'АКъШ', 'AQŞ' ], [ 'ШСДжБнен', 'ŞSCBnen' ], [ 'июль', 'iyül' ],
[ 'ишгъаль', 'işğal' ], [ 'ишгъальджилерине', 'işğalcilerine' ], [ 'район', 'rayon' ],
[ 'районынынъ', 'rayonınıñ' ], [ 'Ногъай', 'Noğay' ], [ 'Юрьтю', 'Yürtü' ],
[ 'ватандан', 'vatandan' ], [ 'ком-кок', 'köm-kök' ], [ 'АКЪКЪЫ', 'AQQI' ],
[ 'ДАГЪГЪА', 'DAĞĞA' ], [ '13-юнджи', '13-ünci' ], [ 'ДЖУРЬМЕК', 'CÜRMEK' ],
[ 'джумлеси', 'cümlesi' ], [ 'ильи', 'ilyi' ], [ 'Ильи', 'İlyi' ], [ 'бруцел', 'brutsel' ],
[ 'коцюб', 'kotsüb' ], [ 'плацен', 'platsen' ], [ 'эпицентр', 'epitsentr' ],
// -tsin- words
[ 'кетсин', 'ketsin' ], [ 'кирлетсин', 'kirletsin' ], [ 'этсин', 'etsin' ],
[ 'етсин', 'yetsin' ], [ 'этсинлерми', 'etsinlermi' ], [ 'принцини', 'printsini' ],
[ 'медицина', 'meditsina' ], [ 'Щетсин', 'Şçetsin' ], [ 'Щекоцины', 'Şçekotsinı' ],
// regex pattern words
[ 'коюнден', 'köyünden' ], [ 'аньге', 'ange' ],
// multi part words
[ 'эки юз', 'eki yüz' ],
// affix patterns
[ 'койнинъ', 'köyniñ' ], [ 'Авджыкойде', 'Avcıköyde' ], [ 'экваториаль', 'ekvatorial' ],
[ 'Джанкой', 'Canköy' ], [ 'усть', 'üst' ], [ 'роль', 'rol' ], [ 'буюк', 'büyük' ],
[ 'джонк', 'cönk' ],
// Roman numerals vs Initials, part 1 - Roman numeral initials without spaces
[ 'А.Б.Дж.Д.М. Къадырова XII', 'A.B.C.D.M. Qadırova XII' ],
// Roman numerals vs Initials, part 2 - Roman numeral initials with spaces
[ 'Г. Х. Ы. В. X. Л. Меметов III', 'G. H. I. V. X. L. Memetov III' ],
// ALL CAPS, made up acronyms
[ 'НЪАБ', 'ÑAB' ], [ 'КЪЫДЖ', 'QIC' ], [ 'ГЪУК', 'ĞUK' ], [ 'ДЖОТ', 'COT' ], [ 'ДЖА', 'CA' ],
];
}
/**
* Run tests that require some context (like Roman numerals) or with
* many-to-one mappings, or other asymmetric results (like smart quotes)
*
* @dataProvider provideAutoConvertToAllVariantsByString
* @covers Language::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariantsByString( $result, $value ) {
$this->assertEquals( $result, $this->getLang()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByString() {
return [
// Roman numerals and quotes, esp. single-letter Roman numerals at the end of a string
[
[
'crh' => 'VI,VII IX “dört” «дёрт» XI XII I V X L C D M',
'crh-cyrl' => 'VI,VII IX «дёрт» «дёрт» XI XII I V X L C D M',
'crh-latn' => 'VI,VII IX “dört” "dört" XI XII I V X L C D M',
],
'VI,VII IX “dört” «дёрт» XI XII I V X L C D M'
],
// Many-to-one mappings: many Cyrillic to one Latin
[
[
'crh' => 'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül',
'crh-cyrl' => 'шофер шофёр шофёр корбекул корьбекул корьбекуль корьбекуль',
'crh-latn' => 'şoför şoför şoför körbekül körbekül körbekül körbekül',
],
'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül'
],
// Many-to-one mappings: many Latin to one Cyrillic
[
[
'crh' => 'fevqülade fevqulade февкъульаде beyude beyüde бейуде',
'crh-cyrl' => 'февкъульаде февкъульаде февкъульаде бейуде бейуде бейуде',
'crh-latn' => 'fevqülade fevqulade fevqulade beyude beyüde beyüde',
],
'fevqülade fevqulade февкъульаде beyude beyüde бейуде'
],
];
}
}