languages: Move Converter and tests to respective files

Bug: T226833, T243760
Change-Id: I6fc7f267098d663fbefd0e78457726c343c9b3e4
This commit is contained in:
Peter Ovchyn 2020-01-23 20:39:23 +02:00
parent 61e0908fa2
commit 863a1d5bdd
29 changed files with 1408 additions and 564 deletions

View file

@ -330,7 +330,7 @@ $wgAutoloadLocalClasses = [
'CreateCommonPasswordCdb' => __DIR__ . '/maintenance/createCommonPasswordCdb.php',
'CreateFileOp' => __DIR__ . '/includes/libs/filebackend/fileop/CreateFileOp.php',
'CreditsAction' => __DIR__ . '/includes/actions/CreditsAction.php',
'CrhConverter' => __DIR__ . '/languages/classes/LanguageCrh.php',
'CrhConverter' => __DIR__ . '/languages/converters/CrhConverter.php',
'CryptHKDF' => __DIR__ . '/includes/libs/CryptHKDF.php',
'CssContent' => __DIR__ . '/includes/content/CssContent.php',
'CssContentHandler' => __DIR__ . '/includes/content/CssContentHandler.php',
@ -451,7 +451,7 @@ $wgAutoloadLocalClasses = [
'EmaillingJob' => __DIR__ . '/includes/jobqueue/jobs/EmaillingJob.php',
'EmptyBagOStuff' => __DIR__ . '/includes/libs/objectcache/EmptyBagOStuff.php',
'EmptyUserGroup' => __DIR__ . '/maintenance/emptyUserGroup.php',
'EnConverter' => __DIR__ . '/languages/classes/LanguageEn.php',
'EnConverter' => __DIR__ . '/languages/converters/EnConverter.php',
'EncryptedPassword' => __DIR__ . '/includes/password/EncryptedPassword.php',
'EnhancedChangesList' => __DIR__ . '/includes/changes/EnhancedChangesList.php',
'EnotifNotifyJob' => __DIR__ . '/includes/jobqueue/jobs/EnotifNotifyJob.php',
@ -556,7 +556,7 @@ $wgAutoloadLocalClasses = [
'FormlessAction' => __DIR__ . '/includes/actions/FormlessAction.php',
'GIFHandler' => __DIR__ . '/includes/media/GIFHandler.php',
'GIFMetadataExtractor' => __DIR__ . '/includes/media/GIFMetadataExtractor.php',
'GanConverter' => __DIR__ . '/languages/classes/LanguageGan.php',
'GanConverter' => __DIR__ . '/languages/converters/GanConverter.php',
'GenderCache' => __DIR__ . '/includes/cache/GenderCache.php',
'GenerateCollationData' => __DIR__ . '/maintenance/language/generateCollationData.php',
'GenerateJsonI18n' => __DIR__ . '/maintenance/generateJsonI18n.php',
@ -693,7 +693,7 @@ $wgAutoloadLocalClasses = [
'InvalidPassword' => __DIR__ . '/includes/password/InvalidPassword.php',
'InvalidateUserSesssions' => __DIR__ . '/maintenance/invalidateUserSessions.php',
'IteratorDecorator' => __DIR__ . '/includes/libs/iterators/IteratorDecorator.php',
'IuConverter' => __DIR__ . '/languages/classes/LanguageIu.php',
'IuConverter' => __DIR__ . '/languages/converters/IuConverter.php',
'JSCompilerContext' => __DIR__ . '/includes/libs/jsminplus.php',
'JSMinPlus' => __DIR__ . '/includes/libs/jsminplus.php',
'JSNode' => __DIR__ . '/includes/libs/jsminplus.php',
@ -722,8 +722,8 @@ $wgAutoloadLocalClasses = [
'JpegMetadataExtractor' => __DIR__ . '/includes/media/JpegMetadataExtractor.php',
'JsonContent' => __DIR__ . '/includes/content/JsonContent.php',
'JsonContentHandler' => __DIR__ . '/includes/content/JsonContentHandler.php',
'KkConverter' => __DIR__ . '/languages/classes/LanguageKk.php',
'KuConverter' => __DIR__ . '/languages/classes/LanguageKu.php',
'KkConverter' => __DIR__ . '/languages/converters/KkConverter.php',
'KuConverter' => __DIR__ . '/languages/converters/KuConverter.php',
'LCStore' => __DIR__ . '/includes/cache/localisation/LCStore.php',
'LCStoreCDB' => __DIR__ . '/includes/cache/localisation/LCStoreCDB.php',
'LCStoreDB' => __DIR__ . '/includes/cache/localisation/LCStoreDB.php',
@ -1296,7 +1296,7 @@ $wgAutoloadLocalClasses = [
'SectionProfiler' => __DIR__ . '/includes/profiler/SectionProfiler.php',
'SerializedValueContainer' => __DIR__ . '/includes/libs/objectcache/serialized/SerializedValueContainer.php',
'SevenZipStream' => __DIR__ . '/maintenance/includes/SevenZipStream.php',
'ShiConverter' => __DIR__ . '/languages/classes/LanguageShi.php',
'ShiConverter' => __DIR__ . '/languages/converters/ShiConverter.php',
'ShowJobs' => __DIR__ . '/maintenance/showJobs.php',
'ShowSiteStats' => __DIR__ . '/maintenance/showSiteStats.php',
'Site' => __DIR__ . '/includes/site/Site.php',
@ -1458,7 +1458,7 @@ $wgAutoloadLocalClasses = [
'SqliteUpdater' => __DIR__ . '/includes/installer/SqliteUpdater.php',
'SquidPurgeClient' => __DIR__ . '/includes/clientpool/SquidPurgeClient.php',
'SquidPurgeClientPool' => __DIR__ . '/includes/clientpool/SquidPurgeClientPool.php',
'SrConverter' => __DIR__ . '/languages/classes/LanguageSr.php',
'SrConverter' => __DIR__ . '/languages/converters/SrConverter.php',
'StatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php',
'StatsdAwareInterface' => __DIR__ . '/includes/libs/stats/StatsdAwareInterface.php',
'Status' => __DIR__ . '/includes/Status.php',
@ -1494,7 +1494,7 @@ $wgAutoloadLocalClasses = [
'TextPassDumper' => __DIR__ . '/maintenance/includes/TextPassDumper.php',
'TextSlotDiffRenderer' => __DIR__ . '/includes/diff/TextSlotDiffRenderer.php',
'TextStatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php',
'TgConverter' => __DIR__ . '/languages/classes/LanguageTg.php',
'TgConverter' => __DIR__ . '/languages/converters/TgConverter.php',
'ThrottledError' => __DIR__ . '/includes/exception/ThrottledError.php',
'ThumbnailImage' => __DIR__ . '/includes/media/ThumbnailImage.php',
'ThumbnailRenderJob' => __DIR__ . '/includes/jobqueue/jobs/ThumbnailRenderJob.php',
@ -1584,7 +1584,7 @@ $wgAutoloadLocalClasses = [
'UserRightsProxy' => __DIR__ . '/includes/user/UserRightsProxy.php',
'UserrightsPage' => __DIR__ . '/includes/specials/SpecialUserrights.php',
'UsersPager' => __DIR__ . '/includes/specials/pagers/UsersPager.php',
'UzConverter' => __DIR__ . '/languages/classes/LanguageUz.php',
'UzConverter' => __DIR__ . '/languages/converters/UzConverter.php',
'VFormHTMLForm' => __DIR__ . '/includes/htmlform/VFormHTMLForm.php',
'ValidateRegistrationFile' => __DIR__ . '/maintenance/validateRegistrationFile.php',
'VersionChecker' => __DIR__ . '/includes/registration/VersionChecker.php',
@ -1715,7 +1715,7 @@ $wgAutoloadLocalClasses = [
'XmlJsCode' => __DIR__ . '/includes/XmlJsCode.php',
'XmlSelect' => __DIR__ . '/includes/XmlSelect.php',
'XmlTypeCheck' => __DIR__ . '/includes/libs/mime/XmlTypeCheck.php',
'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php',
'ZhConverter' => __DIR__ . '/languages/converters/ZhConverter.php',
'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReaderError.php',
'concatenatedgziphistoryblob' => __DIR__ . '/includes/historyblob/ConcatenatedGzipHistoryBlob.php',

View file

@ -20,62 +20,6 @@
* @file
*/
/**
* @ingroup Language
*/
class EnConverter extends LanguageConverter {
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
}
/**
* Dummy methods required by base class.
*/
protected function loadDefaultTables() {
$this->mTables = [
'en' => new ReplacementArray(),
'en-x-piglatin' => new ReplacementArray(),
];
}
/**
* Translates text into Pig Latin. This allows developers to test the language variants
* functionality and user interface without having to switch wiki language away from default.
*
* @param string $text
* @param string $toVariant
* @return string
*/
public function translate( $text, $toVariant ) {
if ( $toVariant !== 'en-x-piglatin' ) {
return $text;
}
// Only process words composed of standard English alphabet, leave the rest unchanged.
// This skips some English words like 'naïve' or 'résumé', but we can live with that.
// Ignore single letters and words which aren't lowercase or uppercase-first.
return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
$word = $matches[0];
if ( preg_match( '/^[aeiou]/i', $word ) ) {
return $word . 'way';
}
return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
$ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
if ( $ucfirst ) {
return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
}
return $m[2] . $m[1] . 'ay';
}, $word );
}, $text );
}
}
/**
* English
*

View file

@ -20,59 +20,6 @@
* @file
*/
/**
* @ingroup Language
*/
class GanConverter extends LanguageConverter {
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$this->mDescCodeSep = '';
$this->mDescVarSep = '';
$variants = [ 'gan', 'gan-hans', 'gan-hant' ];
$variantfallbacks = [
'gan' => [ 'gan-hans', 'gan-hant' ],
'gan-hans' => [ 'gan' ],
'gan-hant' => [ 'gan' ],
];
$ml = [
'gan' => 'disable',
];
parent::__construct( $langobj, 'gan',
$variants,
$variantfallbacks,
[],
$ml
);
$names = [
'gan' => '原文',
'gan-hans' => '简体',
'gan-hant' => '繁體',
];
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
}
protected function loadDefaultTables() {
$this->mTables = [
'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
'gan' => new ReplacementArray
];
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'gan' );
}
}
/**
* Gan Chinese
*
@ -93,5 +40,4 @@ class LanguageGan extends LanguageZh {
// LanguageZh::normalizeForSearch
return parent::normalizeForSearch( $string, $autoVariant );
}
}

View file

@ -21,344 +21,6 @@
* @ingroup Language
*/
define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER
/**
* Kazakh (Қазақша) converter routines
*
* @ingroup Language
*/
class KkConverter extends LanguageConverter {
protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
$variantfallbacks = [
'kk' => 'kk-cyrl',
'kk-cyrl' => 'kk',
'kk-latn' => 'kk',
'kk-arab' => 'kk',
'kk-kz' => 'kk-cyrl',
'kk-tr' => 'kk-latn',
'kk-cn' => 'kk-arab'
];
parent::__construct( $langobj, 'kk',
$variants, $variantfallbacks, [] );
// No point delaying this since they're in code.
// Waiting until loadDefaultTables() means they never get loaded
// when the tables themselves are loaded from cache.
$this->loadRegs();
}
protected function loadDefaultTables() {
// require __DIR__."/../../includes/KkConversion.php";
// Placeholder for future implementing. Remove variables declarations
// after generating KkConversion.php
$kk2Cyrl = [];
$kk2Latn = [];
$kk2Arab = [];
$kk2KZ = [];
$kk2TR = [];
$kk2CN = [];
$this->mTables = [
'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
'kk-latn' => new ReplacementArray( $kk2Latn ),
'kk-arab' => new ReplacementArray( $kk2Arab ),
'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
'kk' => new ReplacementArray()
];
}
protected function postLoadTables() {
$this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
$this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
$this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
}
private function loadRegs() {
$this->mCyrl2Latn = [
# # Punctuation
'/№/u' => 'No.',
# # Е after vowels
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
# # leading ЁЮЯЩ
'/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
'/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
'/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
'/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
# # other ЁЮЯ
'/Ё/u' => 'YO', '/ё/u' => 'yo',
'/Ю/u' => 'YU', '/ю/u' => 'yu',
'/Я/u' => 'YA', '/я/u' => 'ya',
'/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
# # soft and hard signs
'/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
# # other characters
'/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
'/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
'/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
'/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
'/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
'/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
'/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
'/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
'/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
'/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
'/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
'/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
'/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
'/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
'/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
];
$this->mLatn2Cyrl = [
# # Punctuation
'/#|No\./' => '№',
# # Şç
'/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
'/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
# # soft and hard signs
'/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
'/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
'/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
'/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
'/ʺ/u' => 'ъ',
'/ʹ/u' => 'ь',
# # Ye Yo Yu Ya.
'/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
'/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
'/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
'/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
# # other characters
'/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
'/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
'/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
'/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
'/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
'/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
'/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
'/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
'/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
'/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
'/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
'/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
'/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
'/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
'/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
'/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
'/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
'/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
];
$this->mCyLa2Arab = [
# # Punctuation -> Arabic
'/#|№|No\./u' => '؀', # U+0600
'/\,/' => '،', # U+060C
'/;/' => '؛', # U+061B
'/\?/' => '؟', # U+061F
'/%/' => '٪', # U+066A
'/\*/' => '٭', # U+066D
# # Digits -> Arabic
'/0/' => '۰', # U+06F0
'/1/' => '۱', # U+06F1
'/2/' => '۲', # U+06F2
'/3/' => '۳', # U+06F3
'/4/' => '۴', # U+06F4
'/5/' => '۵', # U+06F5
'/6/' => '۶', # U+06F6
'/7/' => '۷', # U+06F7
'/8/' => '۸', # U+06F8
'/9/' => '۹', # U+06F9
# # Cyrillic -> Arabic
'/Аллаһ/ui' => 'ﷲ',
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
'/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
'/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
'/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
'/ц/ui' => 'تس', '/щ/ui' => 'شش',
'/һ/ui' => 'ح', '/ч/ui' => 'تش',
# '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
'/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
'/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
'/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
'/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
'/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
'/ш/ui' => 'ش',
# # Latin -> Arabic // commented for now...
/*'/Allah/ui' => 'ﷲ',
'/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
'/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
'/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
'/c/ui' => 'تس',
'/ç/ui' => 'تش', '/h/ui' => 'ح',
#'/ç/ui' => 'چ', '/h/ui' => 'ھ',
'/b/ui' => 'ب','/d/ui' => 'د',
'/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
'/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
'/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
'/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
'/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
];
}
/**
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*
* @param string &$link
* @param Title &$nt
* @param bool $ignoreOtherCond
*/
public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
// check for user namespace
if ( is_object( $nt ) ) {
$ns = $nt->getNamespace();
if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
return;
}
}
$oldlink = $link;
parent::findVariantLink( $link, $nt, $ignoreOtherCond );
if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
$link = $oldlink;
}
}
/**
* It translates text into variant
*
* @param string $text
* @param string $toVariant
*
* @return string
*/
public function translate( $text, $toVariant ) {
$text = parent::translate( $text, $toVariant );
switch ( $toVariant ) {
case 'kk-cyrl':
case 'kk-kz':
$letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
break;
case 'kk-latn':
case 'kk-tr':
$letters = KK_C_UC . KK_C_LC . '№0123456789';
break;
case 'kk-arab':
case 'kk-cn':
$letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
break;
default:
return $text;
}
// disable conversion variables like $1, $2...
$varsfix = '\$[0-9]';
$matches = preg_split(
'/' . $varsfix . '[^' . $letters . ']+/u',
$text,
-1,
PREG_SPLIT_OFFSET_CAPTURE
);
$mstart = 0;
$ret = '';
foreach ( $matches as $m ) {
$ret .= substr( $text, $mstart, $m[1] - $mstart );
$ret .= $this->regsConverter( $m[0], $toVariant );
$mstart = $m[1] + strlen( $m[0] );
}
return $ret;
}
/**
* @param string $text
* @param string $toVariant
* @return mixed|string
*/
private function regsConverter( $text, $toVariant ) {
if ( $text == '' ) {
return $text;
}
switch ( $toVariant ) {
case 'kk-arab':
case 'kk-cn':
$letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
$front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
$excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
// split text to words
$matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
$mstart = 0;
$ret = '';
foreach ( $matches as $m ) {
$ret .= substr( $text, $mstart, $m[1] - $mstart );
// is matched the word to front vowels?
// exclude a words matched to е, э, г, к, к, қ,
// them should be without hamza
if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
!preg_match( '/[' . $excludes . ']/u', $m[0] )
) {
$ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
} else {
$ret .= $m[0];
}
$mstart = $m[1] + strlen( $m[0] );
}
$text =& $ret;
foreach ( $this->mCyLa2Arab as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
case 'kk-latn':
case 'kk-tr':
foreach ( $this->mCyrl2Latn as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
case 'kk-cyrl':
case 'kk-kz':
foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
default:
return $text;
}
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'kk' );
}
}
/**
* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
* right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn.

View file

@ -21,109 +21,6 @@
* @ingroup Language
*/
/**
* @ingroup Language
*/
class ZhConverter extends LanguageConverter {
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$this->mDescCodeSep = '';
$this->mDescVarSep = '';
$variants = [
'zh',
'zh-hans',
'zh-hant',
'zh-cn',
'zh-hk',
'zh-mo',
'zh-my',
'zh-sg',
'zh-tw'
];
$variantfallbacks = [
'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
];
$ml = [
'zh' => 'disable',
'zh-hans' => 'unidirectional',
'zh-hant' => 'unidirectional',
];
parent::__construct( $langobj, 'zh',
$variants,
$variantfallbacks,
[],
$ml );
$names = [
'zh' => '原文',
'zh-hans' => '简体',
'zh-hant' => '繁體',
'zh-cn' => '大陆',
'zh-tw' => '台灣',
'zh-hk' => '香港',
'zh-mo' => '澳門',
'zh-sg' => '新加坡',
'zh-my' => '大马',
];
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
}
protected function loadDefaultTables() {
$this->mTables = [
'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
'zh' => new ReplacementArray
];
}
protected function postLoadTables() {
$this->mTables['zh-cn']->setArray(
$this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-hk']->setArray(
$this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
);
$this->mTables['zh-mo']->setArray(
$this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
);
$this->mTables['zh-my']->setArray(
$this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-sg']->setArray(
$this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-tw']->setArray(
$this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
);
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'zh' );
}
}
/**
* class that handles both Traditional and Simplified Chinese
* right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk.
@ -174,7 +71,7 @@ class LanguageZh extends LanguageZh_hans {
public function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
$terms = self::convertDoubleWidth( $terms );
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
$terms = implode( '|', $this->getConverter()->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode( '|', $terms ) );
return $ret;
}

View file

@ -0,0 +1,74 @@
<?php
/**
* English specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* @ingroup Language
*/
class EnConverter extends LanguageConverter {
public function __construct( \Language $langobj ) {
parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
}
/**
* Dummy methods required by base class.
*/
protected function loadDefaultTables() {
$this->mTables = [
'en' => new ReplacementArray(),
'en-x-piglatin' => new ReplacementArray(),
];
}
/**
* Translates text into Pig Latin. This allows developers to test the language variants
* functionality and user interface without having to switch wiki language away from default.
*
* @param string $text
* @param string $toVariant
* @return string
*/
public function translate( $text, $toVariant ) {
if ( $toVariant !== 'en-x-piglatin' ) {
return $text;
}
// Only process words composed of standard English alphabet, leave the rest unchanged.
// This skips some English words like 'naïve' or 'résumé', but we can live with that.
// Ignore single letters and words which aren't lowercase or uppercase-first.
return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
$word = $matches[0];
if ( preg_match( '/^[aeiou]/i', $word ) ) {
return $word . 'way';
}
return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
$ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
if ( $ucfirst ) {
return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
}
return $m[2] . $m[1] . 'ay';
}, $word );
}, $text );
}
}

View file

@ -0,0 +1,74 @@
<?php
/**
* Gan Chinese specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* @ingroup Language
*/
class GanConverter extends LanguageConverter {
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$this->mDescCodeSep = '';
$this->mDescVarSep = '';
$variants = [ 'gan', 'gan-hans', 'gan-hant' ];
$variantfallbacks = [
'gan' => [ 'gan-hans', 'gan-hant' ],
'gan-hans' => [ 'gan' ],
'gan-hant' => [ 'gan' ],
];
$ml = [
'gan' => 'disable',
];
parent::__construct( $langobj, 'gan',
$variants,
$variantfallbacks,
[],
$ml
);
$names = [
'gan' => '原文',
'gan-hans' => '简体',
'gan-hant' => '繁體',
];
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
}
protected function loadDefaultTables() {
$this->mTables = [
'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
'gan' => new ReplacementArray
];
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'gan' );
}
}

View file

@ -0,0 +1,361 @@
<?php
/**
* Kazakh (Қазақша) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER
/**
* Kazakh (Қазақша) converter routines
*
* @ingroup Language
*/
class KkConverter extends LanguageConverter {
protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
$variantfallbacks = [
'kk' => 'kk-cyrl',
'kk-cyrl' => 'kk',
'kk-latn' => 'kk',
'kk-arab' => 'kk',
'kk-kz' => 'kk-cyrl',
'kk-tr' => 'kk-latn',
'kk-cn' => 'kk-arab'
];
parent::__construct( $langobj, 'kk',
$variants, $variantfallbacks, [] );
// No point delaying this since they're in code.
// Waiting until loadDefaultTables() means they never get loaded
// when the tables themselves are loaded from cache.
$this->loadRegs();
}
protected function loadDefaultTables() {
// require __DIR__."/../../includes/KkConversion.php";
// Placeholder for future implementing. Remove variables declarations
// after generating KkConversion.php
$kk2Cyrl = [];
$kk2Latn = [];
$kk2Arab = [];
$kk2KZ = [];
$kk2TR = [];
$kk2CN = [];
$this->mTables = [
'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
'kk-latn' => new ReplacementArray( $kk2Latn ),
'kk-arab' => new ReplacementArray( $kk2Arab ),
'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
'kk' => new ReplacementArray()
];
}
protected function postLoadTables() {
$this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
$this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
$this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
}
private function loadRegs() {
$this->mCyrl2Latn = [
# # Punctuation
'/№/u' => 'No.',
# # Е after vowels
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
# # leading ЁЮЯЩ
'/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
'/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
'/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
'/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
# # other ЁЮЯ
'/Ё/u' => 'YO', '/ё/u' => 'yo',
'/Ю/u' => 'YU', '/ю/u' => 'yu',
'/Я/u' => 'YA', '/я/u' => 'ya',
'/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
# # soft and hard signs
'/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
# # other characters
'/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
'/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
'/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
'/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
'/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
'/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
'/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
'/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
'/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
'/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
'/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
'/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
'/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
'/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
'/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
];
$this->mLatn2Cyrl = [
# # Punctuation
'/#|No\./' => '№',
# # Şç
'/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
'/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
# # soft and hard signs
'/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
'/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
'/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
'/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
'/ʺ/u' => 'ъ',
'/ʹ/u' => 'ь',
# # Ye Yo Yu Ya.
'/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
'/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
'/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
'/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
# # other characters
'/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
'/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
'/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
'/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
'/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
'/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
'/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
'/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
'/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
'/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
'/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
'/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
'/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
'/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
'/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
'/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
'/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
'/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
];
$this->mCyLa2Arab = [
# # Punctuation -> Arabic
'/#|№|No\./u' => '؀', # U+0600
'/\,/' => '،', # U+060C
'/;/' => '؛', # U+061B
'/\?/' => '؟', # U+061F
'/%/' => '٪', # U+066A
'/\*/' => '٭', # U+066D
# # Digits -> Arabic
'/0/' => '۰', # U+06F0
'/1/' => '۱', # U+06F1
'/2/' => '۲', # U+06F2
'/3/' => '۳', # U+06F3
'/4/' => '۴', # U+06F4
'/5/' => '۵', # U+06F5
'/6/' => '۶', # U+06F6
'/7/' => '۷', # U+06F7
'/8/' => '۸', # U+06F8
'/9/' => '۹', # U+06F9
# # Cyrillic -> Arabic
'/Аллаһ/ui' => 'ﷲ',
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
'/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
'/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
'/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
'/ц/ui' => 'تس', '/щ/ui' => 'شش',
'/һ/ui' => 'ح', '/ч/ui' => 'تش',
# '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
'/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
'/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
'/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
'/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
'/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
'/ш/ui' => 'ش',
# # Latin -> Arabic // commented for now...
/*'/Allah/ui' => 'ﷲ',
'/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
'/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
'/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
'/c/ui' => 'تس',
'/ç/ui' => 'تش', '/h/ui' => 'ح',
#'/ç/ui' => 'چ', '/h/ui' => 'ھ',
'/b/ui' => 'ب','/d/ui' => 'د',
'/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
'/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
'/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
'/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
'/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
];
}
/**
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*
* @param string &$link
* @param Title &$nt
* @param bool $ignoreOtherCond
*/
public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
// check for user namespace
if ( is_object( $nt ) ) {
$ns = $nt->getNamespace();
if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
return;
}
}
$oldlink = $link;
parent::findVariantLink( $link, $nt, $ignoreOtherCond );
if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
$link = $oldlink;
}
}
/**
* It translates text into variant
*
* @param string $text
* @param string $toVariant
*
* @return string
*/
public function translate( $text, $toVariant ) {
$text = parent::translate( $text, $toVariant );
switch ( $toVariant ) {
case 'kk-cyrl':
case 'kk-kz':
$letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
break;
case 'kk-latn':
case 'kk-tr':
$letters = KK_C_UC . KK_C_LC . '№0123456789';
break;
case 'kk-arab':
case 'kk-cn':
$letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
break;
default:
return $text;
}
// disable conversion variables like $1, $2...
$varsfix = '\$[0-9]';
$matches = preg_split(
'/' . $varsfix . '[^' . $letters . ']+/u',
$text,
-1,
PREG_SPLIT_OFFSET_CAPTURE
);
$mstart = 0;
$ret = '';
foreach ( $matches as $m ) {
$ret .= substr( $text, $mstart, $m[1] - $mstart );
$ret .= $this->regsConverter( $m[0], $toVariant );
$mstart = $m[1] + strlen( $m[0] );
}
return $ret;
}
/**
* @param string $text
* @param string $toVariant
* @return mixed|string
*/
private function regsConverter( $text, $toVariant ) {
if ( $text == '' ) {
return $text;
}
switch ( $toVariant ) {
case 'kk-arab':
case 'kk-cn':
$letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
$front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
$excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
// split text to words
$matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
$mstart = 0;
$ret = '';
foreach ( $matches as $m ) {
$ret .= substr( $text, $mstart, $m[1] - $mstart );
// is matched the word to front vowels?
// exclude a words matched to е, э, г, к, к, қ,
// them should be without hamza
if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
!preg_match( '/[' . $excludes . ']/u', $m[0] )
) {
$ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
} else {
$ret .= $m[0];
}
$mstart = $m[1] + strlen( $m[0] );
}
$text =& $ret;
foreach ( $this->mCyLa2Arab as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
case 'kk-latn':
case 'kk-tr':
foreach ( $this->mCyrl2Latn as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
case 'kk-cyrl':
case 'kk-kz':
foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
default:
return $text;
}
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'kk' );
}
}

View file

@ -0,0 +1,125 @@
<?php
/**
* Chinese specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* @ingroup Language
*/
class ZhConverter extends LanguageConverter {
/**
* @param Language $langobj
*/
public function __construct( Language $langobj ) {
$this->mDescCodeSep = '';
$this->mDescVarSep = '';
$variants = [
'zh',
'zh-hans',
'zh-hant',
'zh-cn',
'zh-hk',
'zh-mo',
'zh-my',
'zh-sg',
'zh-tw'
];
$variantfallbacks = [
'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
];
$ml = [
'zh' => 'disable',
'zh-hans' => 'unidirectional',
'zh-hant' => 'unidirectional',
];
parent::__construct( $langobj, 'zh',
$variants,
$variantfallbacks,
[],
$ml );
$names = [
'zh' => '原文',
'zh-hans' => '简体',
'zh-hant' => '繁體',
'zh-cn' => '大陆',
'zh-tw' => '台灣',
'zh-hk' => '香港',
'zh-mo' => '澳門',
'zh-sg' => '新加坡',
'zh-my' => '大马',
];
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
}
protected function loadDefaultTables() {
$this->mTables = [
'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
'zh' => new ReplacementArray
];
}
protected function postLoadTables() {
$this->mTables['zh-cn']->setArray(
$this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-hk']->setArray(
$this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
);
$this->mTables['zh-mo']->setArray(
$this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
);
$this->mTables['zh-my']->setArray(
$this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-sg']->setArray(
$this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
);
$this->mTables['zh-tw']->setArray(
$this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
);
}
/**
* @param string $key
* @return string
*/
public function convertCategoryKey( $key ) {
return $this->autoConvert( $key, 'zh' );
}
}

View file

@ -180,6 +180,7 @@ $wgAutoloadClasses += [
# tests/phpunit/languages
'LanguageClassesTestCase' => "$testDir/phpunit/languages/LanguageClassesTestCase.php",
'LanguageConverterTestTrait' => "$testDir/phpunit/languages/LanguageConverterTestTrait.php",
# tests/phpunit/includes/libs
'GenericArrayObjectTest' => "$testDir/phpunit/includes/libs/GenericArrayObjectTest.php",

View file

@ -0,0 +1,33 @@
<?php
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\MediaWikiServices;
trait LanguageConverterTestTrait {
private $codeRegex = '/^(.+)ConverterTest$/';
protected function code():string {
if ( preg_match( $this->codeRegex, get_class( $this ), $m ) ) {
return mb_strtolower( $m[1] );
}
return 'en';
}
/** Create and return LanguageConveter to be tested.
*
* @return ILanguageConverter
*/
protected function getLanguageConverter() : ILanguageConverter {
$code = $this->code();
$language = MediaWikiServices::getInstance()->getLanguageFactory()
->getLanguage( $code );
$factory = new LanguageConverterFactory( false, function () use ( $language ) {
return $language;
} );
return $factory->getLanguageConverter( $language );
}
}

View file

@ -69,7 +69,6 @@ class LanguageSrTest extends LanguageClassesTestCase {
foreach ( $langs as $code => $l ) {
$p = $langFactory->getParentLanguage( $code );
$this->assertTrue( $p === null, 'no parent for bogus language' );
$this->assertFalse( $l instanceof LanguageSr, "$code is not sr" );
$this->assertFalse( $this->getLang()->hasVariant( $code ), "$code is not a sr variant" );
foreach ( [ 'sr', 'sr-ec', 'sr-EC', 'sr-Cyrl', 'sr-cyrl', 'sr-bogus' ] as $v ) {
if ( $v !== $code ) {

View file

@ -0,0 +1,141 @@
<?php
/**
* @group Language
* @covers CrhConverter
*/
class CrhConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariantsByWord
* @covers CrhConverter::autoConvertToAllVariants
*
* Test individual words and test minimal contextual transforms
* by creating test strings "<cyrillic> <latin>" and
* "<latin> <cyrillic>" and then converting to all variants.
*/
public function testAutoConvertToAllVariantsByWord( $cyrl, $lat ) {
$value = $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
$value = $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl,
'crh-latn' => $lat,
];
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
$value = $cyrl . ' ' . $lat;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
$value = $lat . ' ' . $cyrl;
$result = [
'crh' => $value,
'crh-cyrl' => $cyrl . ' ' . $cyrl,
'crh-latn' => $lat . ' ' . $lat,
];
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByWord() {
return [
// general words, covering more of the alphabet
[ 'рузгярнынъ', 'ruzgârnıñ' ], [ 'Париж', 'Parij' ], [ 'чёкюч', 'çöküç' ],
[ 'элифбени', 'elifbeni' ], [ 'полициясы', 'politsiyası' ], [ 'хусусында', 'hususında' ],
[ 'акъшамларны', 'aqşamlarnı' ], [ 'опькеленюв', 'öpkelenüv' ],
[ 'кулюмсиреди', 'külümsiredi' ], [ 'айтмайджагъым', 'aytmaycağım' ],
[ 'козьяшсыз', 'közyaşsız' ],
// exception words
[ 'инструменталь', 'instrumental' ], [ 'гургуль', 'gürgül' ], [ 'тюшюнмемек', 'tüşünmemek' ],
// specific problem words
[ 'куню', 'künü' ], [ 'сюргюнлиги', 'sürgünligi' ], [ 'озю', 'özü' ], [ 'этти', 'etti' ],
[ 'эсас', 'esas' ], [ 'дёрт', 'dört' ], [ 'кельди', 'keldi' ], [ 'км²', 'km²' ],
[ 'юзь', 'yüz' ], [ 'АКъШ', 'AQŞ' ], [ 'ШСДжБнен', 'ŞSCBnen' ], [ 'июль', 'iyül' ],
[ 'ишгъаль', 'işğal' ], [ 'ишгъальджилерине', 'işğalcilerine' ], [ 'район', 'rayon' ],
[ 'районынынъ', 'rayonınıñ' ], [ 'Ногъай', 'Noğay' ], [ 'Юрьтю', 'Yürtü' ],
[ 'ватандан', 'vatandan' ], [ 'ком-кок', 'köm-kök' ], [ 'АКЪКЪЫ', 'AQQI' ],
[ 'ДАГЪГЪА', 'DAĞĞA' ], [ '13-юнджи', '13-ünci' ], [ 'ДЖУРЬМЕК', 'CÜRMEK' ],
[ 'джумлеси', 'cümlesi' ], [ 'ильи', 'ilyi' ], [ 'Ильи', 'İlyi' ], [ 'бруцел', 'brutsel' ],
[ 'коцюб', 'kotsüb' ], [ 'плацен', 'platsen' ], [ 'эпицентр', 'epitsentr' ],
// -tsin- words
[ 'кетсин', 'ketsin' ], [ 'кирлетсин', 'kirletsin' ], [ 'этсин', 'etsin' ],
[ 'етсин', 'yetsin' ], [ 'этсинлерми', 'etsinlermi' ], [ 'принцини', 'printsini' ],
[ 'медицина', 'meditsina' ], [ 'Щетсин', 'Şçetsin' ], [ 'Щекоцины', 'Şçekotsinı' ],
// regex pattern words
[ 'коюнден', 'köyünden' ], [ 'аньге', 'ange' ],
// multi part words
[ 'эки юз', 'eki yüz' ],
// affix patterns
[ 'койнинъ', 'köyniñ' ], [ 'Авджыкойде', 'Avcıköyde' ], [ 'экваториаль', 'ekvatorial' ],
[ 'Джанкой', 'Canköy' ], [ 'усть', 'üst' ], [ 'роль', 'rol' ], [ 'буюк', 'büyük' ],
[ 'джонк', 'cönk' ],
// Roman numerals vs Initials, part 1 - Roman numeral initials without spaces
[ 'А.Б.Дж.Д.М. Къадырова XII', 'A.B.C.D.M. Qadırova XII' ],
// Roman numerals vs Initials, part 2 - Roman numeral initials with spaces
[ 'Г. Х. Ы. В. X. Л. Меметов III', 'G. H. I. V. X. L. Memetov III' ],
// ALL CAPS, made up acronyms
[ 'НЪАБ', 'ÑAB' ], [ 'КЪЫДЖ', 'QIC' ], [ 'ГЪУК', 'ĞUK' ], [ 'ДЖОТ', 'COT' ], [ 'ДЖА', 'CA' ],
];
}
/**
* @dataProvider provideAutoConvertToAllVariantsByString
* @covers CrhConverter::autoConvertToAllVariants
*
* Run tests that require some context (like Roman numerals) or with
* many-to-one mappings, or other asymmetric results (like smart quotes)
*/
public function testAutoConvertToAllVariantsByString( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariantsByString() {
return [
[ // Roman numerals and quotes, esp. single-letter Roman numerals at the end of a string
[
'crh' => 'VI,VII IX “dört” «дёрт» XI XII I V X L C D M',
'crh-cyrl' => 'VI,VII IX «дёрт» «дёрт» XI XII I V X L C D M',
'crh-latn' => 'VI,VII IX “dört” "dört" XI XII I V X L C D M',
],
'VI,VII IX “dört” «дёрт» XI XII I V X L C D M'
],
[ // Many-to-one mappings: many Cyrillic to one Latin
[
'crh' => 'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül',
'crh-cyrl' => 'шофер шофёр шофёр корбекул корьбекул корьбекуль корьбекуль',
'crh-latn' => 'şoför şoför şoför körbekül körbekül körbekül körbekül',
],
'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül'
],
[ // Many-to-one mappings: many Latin to one Cyrillic
[
'crh' => 'fevqülade fevqulade февкъульаде beyude beyüde бейуде',
'crh-cyrl' => 'февкъульаде февкъульаде февкъульаде бейуде бейуде бейуде',
'crh-latn' => 'fevqülade fevqulade fevqulade beyude beyüde beyüde',
],
'fevqülade fevqulade февкъульаде beyude beyüde бейуде'
],
];
}
}

View file

@ -0,0 +1,41 @@
<?php
/**
* @group Language
* @covers GanConverter
*/
class GanConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers GanConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
// zh2Hans
[
[
'gan' => '㑯',
'gan-hans' => '㑔',
'gan-hant' => '㑯',
],
'㑯'
],
// zh2Hant
[
[
'gan' => '㐷',
'gan-hans' => '㐷',
'gan-hant' => '傌',
],
'㐷'
],
];
}
}

View file

@ -0,0 +1,41 @@
<?php
/**
* @group Language
* @covers IuConverter
*/
class IuConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers IuConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
// ike-cans
[
[
'ike-cans' => 'ᐴ',
'ike-latn' => 'PUU',
'iu' => 'PUU',
],
'PUU'
],
// ike-latn
[
[
'ike-cans' => 'ᐴ',
'ike-latn' => 'puu',
'iu' => 'ᐴ',
],
'ᐴ'
],
];
}
}

View file

@ -0,0 +1,47 @@
<?php
/**
* @group Language
* @covers KkConverter
*/
class KkConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers KkConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
[
[
'kk' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
'kk-cyrl' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
'kk-latn' => 'Adamdarğa aqıl-parasat, ar-ojdan berilgen',
'kk-arab' => 'ادامدارعا اقىل-پاراسات، ار-وجدان بەرىلگەن',
'kk-kz' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
'kk-tr' => 'Adamdarğa aqıl-parasat, ar-ojdan berilgen',
'kk-cn' => 'ادامدارعا اقىل-پاراسات، ار-وجدان بەرىلگەن'
],
'Адамдарға ақыл-парасат, ар-ождан берілген'
],
];
}
/**
* @covers KkConverter::findVariantLink
* @covers LanguageConverter::findVariantLink
*/
public function testFindVariantLinks() {
$old = "sample_link";
$newLink = $old;
$title = Title::newFromText( "Same page for link" );
$this->getLanguageConverter()->findVariantLink( $newLink, $title );
$this->assertSame( $old, $newLink, "inks should'n be changed" );
}
}

View file

@ -0,0 +1,47 @@
<?php
/**
* @group Language
* @covers KuConverter
*/
class KuConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers KuConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
[
[
'ku' => '١',
'ku-arab' => '١',
'ku-latn' => '1',
],
'١'
],
[
[
'ku' => 'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.',
'ku-arab' => 'ویکیپەدیائە نسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
'ku-latn' => 'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.',
],
'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.'
],
[
[
'ku' => 'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
'ku-arab' => 'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
'ku-latn' => 'wîkîpedîa ensîklopedîekea zad b rengê wîkî îe.',
],
'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.'
],
];
}
}

View file

@ -0,0 +1,40 @@
<?php
/**
* @group Language
* @covers ShiConverter
*/
class ShiConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers ShiConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result,
$this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
[
[
'shi' => 'AƔ',
'shi-tfng' => 'ⴰⵖ',
'shi-latn' => 'AƔ',
],
'AƔ'
],
[
[
'shi' => 'ⴰⵖ',
'shi-tfng' => 'ⴰⵖ',
'shi-latn' => 'aɣ',
],
'ⴰⵖ'
],
];
}
}

View file

@ -0,0 +1,206 @@
<?php
/**
* @group Language
* @covers SrConverter
*/
class SrConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @covers SrConverter::hasVariants
*/
public function testHasVariants() {
$this->assertTrue( $this->getLanguageConverter()->hasVariants(), 'sr has variants' );
}
/**
* @covers SrConverter::hasVariant
*/
public function testHasVariantBogus() {
$variants = [
'sr-ec',
'sr-el',
];
foreach ( $variants as $variant ) {
$this->assertTrue( $this->getLanguageConverter()->hasVariant( $variant ),
"no variant for $variant language" );
}
}
/**
* @covers SrConverter::convertTo
*/
public function testEasyConversions() {
$this->assertCyrillic(
'шђчћжШЂЧЋЖ',
'Cyrillic guessing characters'
);
$this->assertLatin(
'šđč枊ĐČĆŽ',
'Latin guessing characters'
);
}
/**
* @covers SrConverter::convertTo
*/
public function testMixedConversions() {
$this->assertCyrillic(
'шђчћжШЂЧЋЖ - šđčćž',
'Mostly Cyrillic characters'
);
$this->assertLatin(
'šđč枊ĐČĆŽ - шђчћж',
'Mostly Latin characters'
);
}
/**
* @covers SrConverter::convertTo
*/
public function testSameAmountOfLatinAndCyrillicGetConverted() {
$this->assertConverted(
'4 Latin: šđčć | 4 Cyrillic: шђчћ',
'sr-ec'
);
$this->assertConverted(
'4 Latin: šđčć | 4 Cyrillic: шђчћ',
'sr-el'
);
}
/**
* @author Nikola Smolenski
* @covers SrConverter::convertTo
*/
public function testConversionToCyrillic() {
// A simple conversion of Latin to Cyrillic
$this->assertEquals( 'абвг',
$this->convertToCyrillic( 'abvg' )
);
// Same as above, but assert that -{}-s must be removed and not converted
$this->assertEquals( 'ljабnjвгdž',
$this->convertToCyrillic( '-{lj}-ab-{nj}-vg-{dž}-' )
);
// A simple conversion of Cyrillic to Cyrillic
$this->assertEquals( 'абвг',
$this->convertToCyrillic( 'абвг' )
);
// Same as above, but assert that -{}-s must be removed and not converted
$this->assertEquals( 'ljабnjвгdž',
$this->convertToCyrillic( '-{lj}-аб-{nj}-вг-{dž}-' )
);
// This text has some Latin, but is recognized as Cyrillic, so it should not be converted
$this->assertEquals( 'abvgшђжчћ',
$this->convertToCyrillic( 'abvgшђжчћ' )
);
// Same as above, but assert that -{}-s must be removed
$this->assertEquals( 'љabvgњшђжчћџ',
$this->convertToCyrillic( '-{љ}-abvg-{њ}-шђжчћ-{џ}-' )
);
// This text has some Cyrillic, but is recognized as Latin, so it should be converted
$this->assertEquals( 'абвгшђжчћ',
$this->convertToCyrillic( 'абвгšđžčć' )
);
// Same as above, but assert that -{}-s must be removed and not converted
$this->assertEquals( 'ljабвгnjшђжчћdž',
$this->convertToCyrillic( '-{lj}-абвг-{nj}-šđžčć-{dž}-' )
);
// Roman numerals are not converted
$this->assertEquals( 'а I б II в III г IV шђжчћ',
$this->convertToCyrillic( 'a I b II v III g IV šđžčć' )
);
}
/**
* @covers SrConverter::convertTo
*/
public function testConversionToLatin() {
// A simple conversion of Latin to Latin
$this->assertEquals( 'abcd',
$this->convertToLatin( 'abcd' )
);
// A simple conversion of Cyrillic to Latin
$this->assertEquals( 'abcd',
$this->convertToLatin( 'абцд' )
);
// This text has some Latin, but is recognized as Cyrillic, so it should be converted
$this->assertEquals( 'abcdšđžčć',
$this->convertToLatin( 'abcdшђжчћ' )
);
// This text has some Cyrillic, but is recognized as Latin, so it should not be converted
$this->assertEquals( 'абцдšđžčć',
$this->convertToLatin( 'абцдšđžčć' )
);
}
# #### HELPERS #####################################################
/**
* Wrapper to verify text stay the same after applying conversion
* @param string $text Text to convert
* @param string $variant Language variant 'sr-ec' or 'sr-el'
* @param string $msg Optional message
*/
protected function assertUnConverted( $text, $variant, $msg = '' ) {
$this->assertEquals(
$text,
$this->convertTo( $text, $variant ),
$msg
);
}
/**
* Wrapper to verify a text is different once converted to a variant.
* @param string $text Text to convert
* @param string $variant Language variant 'sr-ec' or 'sr-el'
* @param string $msg Optional message
*/
protected function assertConverted( $text, $variant, $msg = '' ) {
$this->assertNotEquals(
$text,
$this->convertTo( $text, $variant ),
$msg
);
}
/**
* Verifiy the given Cyrillic text is not converted when using
* using the Cyrillic variant and converted to Latin when using
* the Latin variant.
* @param string $text Text to convert
* @param string $msg Optional message
*/
protected function assertCyrillic( $text, $msg = '' ) {
$this->assertUnConverted( $text, 'sr-ec', $msg );
$this->assertConverted( $text, 'sr-el', $msg );
}
/**
* Verifiy the given Latin text is not converted when using
* using the Latin variant and converted to Cyrillic when using
* the Cyrillic variant.
* @param string $text Text to convert
* @param string $msg Optional message
*/
protected function assertLatin( $text, $msg = '' ) {
$this->assertUnConverted( $text, 'sr-el', $msg );
$this->assertConverted( $text, 'sr-ec', $msg );
}
/** Wrapper for converter::convertTo() method */
protected function convertTo( $text, $variant ) {
return $this->getLanguageConverter()->convertTo( $text, $variant );
}
protected function convertToCyrillic( $text ) {
return $this->convertTo( $text, 'sr-ec' );
}
protected function convertToLatin( $text ) {
return $this->convertTo( $text, 'sr-el' );
}
}

View file

@ -0,0 +1,36 @@
<?php
/**
* @group Language
* @covers TgConverter
*/
class TgConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers TgConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
[
[
'tg' => 'г',
'tg-latn' => 'g',
],
'г'
],
[
[
'tg' => 'g',
'tg-latn' => 'g',
],
'g'
],
];
}
}

View file

@ -0,0 +1,129 @@
<?php
/**
* @group Language
* @covers ZhConverter
*/
class ZhConverterTest extends MediaWikiTestCase {
use LanguageConverterTestTrait;
/**
* @dataProvider provideAutoConvertToAllVariants
* @covers ZhConverter::autoConvertToAllVariants
*/
public function testAutoConvertToAllVariants( $result, $value ) {
$this->assertEquals( $result,
$this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
}
public static function provideAutoConvertToAllVariants() {
return [
// Plain hant -> hans
[
[
'zh' => '㑯',
'zh-hans' => '㑔',
'zh-hant' => '㑯',
'zh-cn' => '㑔',
'zh-hk' => '㑯',
'zh-mo' => '㑯',
'zh-my' => '㑔',
'zh-sg' => '㑔',
'zh-tw' => '㑯',
],
'㑯'
],
// Plain hans -> hant
[
[
'zh' => '㐷',
'zh-hans' => '㐷',
'zh-hant' => '傌',
'zh-cn' => '㐷',
'zh-hk' => '傌',
'zh-mo' => '傌',
'zh-my' => '㐷',
'zh-sg' => '㐷',
'zh-tw' => '傌',
],
'㐷'
],
// zh-cn specific
[
[
'zh' => '仲介',
'zh-hans' => '仲介',
'zh-hant' => '仲介',
'zh-cn' => '中介',
'zh-hk' => '仲介',
'zh-mo' => '仲介',
'zh-my' => '中介',
'zh-sg' => '中介',
'zh-tw' => '仲介',
],
'仲介'
],
// zh-hk specific
[
[
'zh' => '中文里',
'zh-hans' => '中文里',
'zh-hant' => '中文裡',
'zh-cn' => '中文里',
'zh-hk' => '中文裏',
'zh-mo' => '中文裏',
'zh-my' => '中文里',
'zh-sg' => '中文里',
'zh-tw' => '中文裡',
],
'中文里'
],
// zh-tw specific
[
[
'zh' => '甲肝',
'zh-hans' => '甲肝',
'zh-hant' => '甲肝',
'zh-cn' => '甲肝',
'zh-hk' => '甲肝',
'zh-mo' => '甲肝',
'zh-my' => '甲肝',
'zh-sg' => '甲肝',
'zh-tw' => 'A肝',
],
'甲肝'
],
// zh-tw overrides zh-hant
[
[
'zh' => '账',
'zh-hans' => '账',
'zh-hant' => '賬',
'zh-cn' => '账',
'zh-hk' => '賬',
'zh-mo' => '賬',
'zh-my' => '账',
'zh-sg' => '账',
'zh-tw' => '帳',
],
'账'
],
// zh-hk overrides zh-hant
[
[
'zh' => '一地里',
'zh-hans' => '一地里',
'zh-hant' => '一地裡',
'zh-cn' => '一地里',
'zh-hk' => '一地裏',
'zh-mo' => '一地裏',
'zh-my' => '一地里',
'zh-sg' => '一地里',
'zh-tw' => '一地裡',
],
'一地里'
],
];
}
}