languages: Move Converter and tests to respective files
Bug: T226833, T243760 Change-Id: I6fc7f267098d663fbefd0e78457726c343c9b3e4
This commit is contained in:
parent
61e0908fa2
commit
863a1d5bdd
29 changed files with 1408 additions and 564 deletions
22
autoload.php
22
autoload.php
|
|
@ -330,7 +330,7 @@ $wgAutoloadLocalClasses = [
|
|||
'CreateCommonPasswordCdb' => __DIR__ . '/maintenance/createCommonPasswordCdb.php',
|
||||
'CreateFileOp' => __DIR__ . '/includes/libs/filebackend/fileop/CreateFileOp.php',
|
||||
'CreditsAction' => __DIR__ . '/includes/actions/CreditsAction.php',
|
||||
'CrhConverter' => __DIR__ . '/languages/classes/LanguageCrh.php',
|
||||
'CrhConverter' => __DIR__ . '/languages/converters/CrhConverter.php',
|
||||
'CryptHKDF' => __DIR__ . '/includes/libs/CryptHKDF.php',
|
||||
'CssContent' => __DIR__ . '/includes/content/CssContent.php',
|
||||
'CssContentHandler' => __DIR__ . '/includes/content/CssContentHandler.php',
|
||||
|
|
@ -451,7 +451,7 @@ $wgAutoloadLocalClasses = [
|
|||
'EmaillingJob' => __DIR__ . '/includes/jobqueue/jobs/EmaillingJob.php',
|
||||
'EmptyBagOStuff' => __DIR__ . '/includes/libs/objectcache/EmptyBagOStuff.php',
|
||||
'EmptyUserGroup' => __DIR__ . '/maintenance/emptyUserGroup.php',
|
||||
'EnConverter' => __DIR__ . '/languages/classes/LanguageEn.php',
|
||||
'EnConverter' => __DIR__ . '/languages/converters/EnConverter.php',
|
||||
'EncryptedPassword' => __DIR__ . '/includes/password/EncryptedPassword.php',
|
||||
'EnhancedChangesList' => __DIR__ . '/includes/changes/EnhancedChangesList.php',
|
||||
'EnotifNotifyJob' => __DIR__ . '/includes/jobqueue/jobs/EnotifNotifyJob.php',
|
||||
|
|
@ -556,7 +556,7 @@ $wgAutoloadLocalClasses = [
|
|||
'FormlessAction' => __DIR__ . '/includes/actions/FormlessAction.php',
|
||||
'GIFHandler' => __DIR__ . '/includes/media/GIFHandler.php',
|
||||
'GIFMetadataExtractor' => __DIR__ . '/includes/media/GIFMetadataExtractor.php',
|
||||
'GanConverter' => __DIR__ . '/languages/classes/LanguageGan.php',
|
||||
'GanConverter' => __DIR__ . '/languages/converters/GanConverter.php',
|
||||
'GenderCache' => __DIR__ . '/includes/cache/GenderCache.php',
|
||||
'GenerateCollationData' => __DIR__ . '/maintenance/language/generateCollationData.php',
|
||||
'GenerateJsonI18n' => __DIR__ . '/maintenance/generateJsonI18n.php',
|
||||
|
|
@ -693,7 +693,7 @@ $wgAutoloadLocalClasses = [
|
|||
'InvalidPassword' => __DIR__ . '/includes/password/InvalidPassword.php',
|
||||
'InvalidateUserSesssions' => __DIR__ . '/maintenance/invalidateUserSessions.php',
|
||||
'IteratorDecorator' => __DIR__ . '/includes/libs/iterators/IteratorDecorator.php',
|
||||
'IuConverter' => __DIR__ . '/languages/classes/LanguageIu.php',
|
||||
'IuConverter' => __DIR__ . '/languages/converters/IuConverter.php',
|
||||
'JSCompilerContext' => __DIR__ . '/includes/libs/jsminplus.php',
|
||||
'JSMinPlus' => __DIR__ . '/includes/libs/jsminplus.php',
|
||||
'JSNode' => __DIR__ . '/includes/libs/jsminplus.php',
|
||||
|
|
@ -722,8 +722,8 @@ $wgAutoloadLocalClasses = [
|
|||
'JpegMetadataExtractor' => __DIR__ . '/includes/media/JpegMetadataExtractor.php',
|
||||
'JsonContent' => __DIR__ . '/includes/content/JsonContent.php',
|
||||
'JsonContentHandler' => __DIR__ . '/includes/content/JsonContentHandler.php',
|
||||
'KkConverter' => __DIR__ . '/languages/classes/LanguageKk.php',
|
||||
'KuConverter' => __DIR__ . '/languages/classes/LanguageKu.php',
|
||||
'KkConverter' => __DIR__ . '/languages/converters/KkConverter.php',
|
||||
'KuConverter' => __DIR__ . '/languages/converters/KuConverter.php',
|
||||
'LCStore' => __DIR__ . '/includes/cache/localisation/LCStore.php',
|
||||
'LCStoreCDB' => __DIR__ . '/includes/cache/localisation/LCStoreCDB.php',
|
||||
'LCStoreDB' => __DIR__ . '/includes/cache/localisation/LCStoreDB.php',
|
||||
|
|
@ -1296,7 +1296,7 @@ $wgAutoloadLocalClasses = [
|
|||
'SectionProfiler' => __DIR__ . '/includes/profiler/SectionProfiler.php',
|
||||
'SerializedValueContainer' => __DIR__ . '/includes/libs/objectcache/serialized/SerializedValueContainer.php',
|
||||
'SevenZipStream' => __DIR__ . '/maintenance/includes/SevenZipStream.php',
|
||||
'ShiConverter' => __DIR__ . '/languages/classes/LanguageShi.php',
|
||||
'ShiConverter' => __DIR__ . '/languages/converters/ShiConverter.php',
|
||||
'ShowJobs' => __DIR__ . '/maintenance/showJobs.php',
|
||||
'ShowSiteStats' => __DIR__ . '/maintenance/showSiteStats.php',
|
||||
'Site' => __DIR__ . '/includes/site/Site.php',
|
||||
|
|
@ -1458,7 +1458,7 @@ $wgAutoloadLocalClasses = [
|
|||
'SqliteUpdater' => __DIR__ . '/includes/installer/SqliteUpdater.php',
|
||||
'SquidPurgeClient' => __DIR__ . '/includes/clientpool/SquidPurgeClient.php',
|
||||
'SquidPurgeClientPool' => __DIR__ . '/includes/clientpool/SquidPurgeClientPool.php',
|
||||
'SrConverter' => __DIR__ . '/languages/classes/LanguageSr.php',
|
||||
'SrConverter' => __DIR__ . '/languages/converters/SrConverter.php',
|
||||
'StatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php',
|
||||
'StatsdAwareInterface' => __DIR__ . '/includes/libs/stats/StatsdAwareInterface.php',
|
||||
'Status' => __DIR__ . '/includes/Status.php',
|
||||
|
|
@ -1494,7 +1494,7 @@ $wgAutoloadLocalClasses = [
|
|||
'TextPassDumper' => __DIR__ . '/maintenance/includes/TextPassDumper.php',
|
||||
'TextSlotDiffRenderer' => __DIR__ . '/includes/diff/TextSlotDiffRenderer.php',
|
||||
'TextStatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php',
|
||||
'TgConverter' => __DIR__ . '/languages/classes/LanguageTg.php',
|
||||
'TgConverter' => __DIR__ . '/languages/converters/TgConverter.php',
|
||||
'ThrottledError' => __DIR__ . '/includes/exception/ThrottledError.php',
|
||||
'ThumbnailImage' => __DIR__ . '/includes/media/ThumbnailImage.php',
|
||||
'ThumbnailRenderJob' => __DIR__ . '/includes/jobqueue/jobs/ThumbnailRenderJob.php',
|
||||
|
|
@ -1584,7 +1584,7 @@ $wgAutoloadLocalClasses = [
|
|||
'UserRightsProxy' => __DIR__ . '/includes/user/UserRightsProxy.php',
|
||||
'UserrightsPage' => __DIR__ . '/includes/specials/SpecialUserrights.php',
|
||||
'UsersPager' => __DIR__ . '/includes/specials/pagers/UsersPager.php',
|
||||
'UzConverter' => __DIR__ . '/languages/classes/LanguageUz.php',
|
||||
'UzConverter' => __DIR__ . '/languages/converters/UzConverter.php',
|
||||
'VFormHTMLForm' => __DIR__ . '/includes/htmlform/VFormHTMLForm.php',
|
||||
'ValidateRegistrationFile' => __DIR__ . '/maintenance/validateRegistrationFile.php',
|
||||
'VersionChecker' => __DIR__ . '/includes/registration/VersionChecker.php',
|
||||
|
|
@ -1715,7 +1715,7 @@ $wgAutoloadLocalClasses = [
|
|||
'XmlJsCode' => __DIR__ . '/includes/XmlJsCode.php',
|
||||
'XmlSelect' => __DIR__ . '/includes/XmlSelect.php',
|
||||
'XmlTypeCheck' => __DIR__ . '/includes/libs/mime/XmlTypeCheck.php',
|
||||
'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php',
|
||||
'ZhConverter' => __DIR__ . '/languages/converters/ZhConverter.php',
|
||||
'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
|
||||
'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReaderError.php',
|
||||
'concatenatedgziphistoryblob' => __DIR__ . '/includes/historyblob/ConcatenatedGzipHistoryBlob.php',
|
||||
|
|
|
|||
|
|
@ -20,62 +20,6 @@
|
|||
* @file
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class EnConverter extends LanguageConverter {
|
||||
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy methods required by base class.
|
||||
*/
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'en' => new ReplacementArray(),
|
||||
'en-x-piglatin' => new ReplacementArray(),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates text into Pig Latin. This allows developers to test the language variants
|
||||
* functionality and user interface without having to switch wiki language away from default.
|
||||
*
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
* @return string
|
||||
*/
|
||||
public function translate( $text, $toVariant ) {
|
||||
if ( $toVariant !== 'en-x-piglatin' ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
// Only process words composed of standard English alphabet, leave the rest unchanged.
|
||||
// This skips some English words like 'naïve' or 'résumé', but we can live with that.
|
||||
// Ignore single letters and words which aren't lowercase or uppercase-first.
|
||||
return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
|
||||
$word = $matches[0];
|
||||
if ( preg_match( '/^[aeiou]/i', $word ) ) {
|
||||
return $word . 'way';
|
||||
}
|
||||
|
||||
return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
|
||||
$ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
|
||||
if ( $ucfirst ) {
|
||||
return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
|
||||
}
|
||||
|
||||
return $m[2] . $m[1] . 'ay';
|
||||
}, $word );
|
||||
}, $text );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* English
|
||||
*
|
||||
|
|
|
|||
|
|
@ -20,59 +20,6 @@
|
|||
* @file
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class GanConverter extends LanguageConverter {
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$this->mDescCodeSep = ':';
|
||||
$this->mDescVarSep = ';';
|
||||
|
||||
$variants = [ 'gan', 'gan-hans', 'gan-hant' ];
|
||||
$variantfallbacks = [
|
||||
'gan' => [ 'gan-hans', 'gan-hant' ],
|
||||
'gan-hans' => [ 'gan' ],
|
||||
'gan-hant' => [ 'gan' ],
|
||||
];
|
||||
$ml = [
|
||||
'gan' => 'disable',
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'gan',
|
||||
$variants,
|
||||
$variantfallbacks,
|
||||
[],
|
||||
$ml
|
||||
);
|
||||
|
||||
$names = [
|
||||
'gan' => '原文',
|
||||
'gan-hans' => '简体',
|
||||
'gan-hant' => '繁體',
|
||||
];
|
||||
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
|
||||
'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
|
||||
'gan' => new ReplacementArray
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'gan' );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gan Chinese
|
||||
*
|
||||
|
|
@ -93,5 +40,4 @@ class LanguageGan extends LanguageZh {
|
|||
// LanguageZh::normalizeForSearch
|
||||
return parent::normalizeForSearch( $string, $autoVariant );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,344 +21,6 @@
|
|||
* @ingroup Language
|
||||
*/
|
||||
|
||||
define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
|
||||
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
|
||||
define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
|
||||
define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
|
||||
// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
|
||||
define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
|
||||
// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER
|
||||
|
||||
/**
|
||||
* Kazakh (Қазақша) converter routines
|
||||
*
|
||||
* @ingroup Language
|
||||
*/
|
||||
class KkConverter extends LanguageConverter {
|
||||
protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
|
||||
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
|
||||
$variantfallbacks = [
|
||||
'kk' => 'kk-cyrl',
|
||||
'kk-cyrl' => 'kk',
|
||||
'kk-latn' => 'kk',
|
||||
'kk-arab' => 'kk',
|
||||
'kk-kz' => 'kk-cyrl',
|
||||
'kk-tr' => 'kk-latn',
|
||||
'kk-cn' => 'kk-arab'
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'kk',
|
||||
$variants, $variantfallbacks, [] );
|
||||
|
||||
// No point delaying this since they're in code.
|
||||
// Waiting until loadDefaultTables() means they never get loaded
|
||||
// when the tables themselves are loaded from cache.
|
||||
$this->loadRegs();
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
// require __DIR__."/../../includes/KkConversion.php";
|
||||
// Placeholder for future implementing. Remove variables declarations
|
||||
// after generating KkConversion.php
|
||||
$kk2Cyrl = [];
|
||||
$kk2Latn = [];
|
||||
$kk2Arab = [];
|
||||
$kk2KZ = [];
|
||||
$kk2TR = [];
|
||||
$kk2CN = [];
|
||||
|
||||
$this->mTables = [
|
||||
'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
|
||||
'kk-latn' => new ReplacementArray( $kk2Latn ),
|
||||
'kk-arab' => new ReplacementArray( $kk2Arab ),
|
||||
'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
|
||||
'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
|
||||
'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
|
||||
'kk' => new ReplacementArray()
|
||||
];
|
||||
}
|
||||
|
||||
protected function postLoadTables() {
|
||||
$this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
|
||||
$this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
|
||||
$this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
|
||||
}
|
||||
|
||||
private function loadRegs() {
|
||||
$this->mCyrl2Latn = [
|
||||
# # Punctuation
|
||||
'/№/u' => 'No.',
|
||||
# # Е after vowels
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
|
||||
# # leading ЁЮЯЩ
|
||||
'/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
|
||||
'/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
|
||||
'/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
|
||||
'/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
|
||||
# # other ЁЮЯ
|
||||
'/Ё/u' => 'YO', '/ё/u' => 'yo',
|
||||
'/Ю/u' => 'YU', '/ю/u' => 'yu',
|
||||
'/Я/u' => 'YA', '/я/u' => 'ya',
|
||||
'/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
|
||||
# # soft and hard signs
|
||||
'/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
|
||||
# # other characters
|
||||
'/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
|
||||
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
|
||||
'/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
|
||||
'/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
|
||||
'/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
|
||||
'/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
|
||||
'/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
|
||||
'/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
|
||||
'/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
|
||||
'/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
|
||||
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
|
||||
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
|
||||
'/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
|
||||
'/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
|
||||
'/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
|
||||
'/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
|
||||
'/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
|
||||
'/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
|
||||
];
|
||||
|
||||
$this->mLatn2Cyrl = [
|
||||
# # Punctuation
|
||||
'/#|No\./' => '№',
|
||||
# # Şç
|
||||
'/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
|
||||
'/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
|
||||
# # soft and hard signs
|
||||
'/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
|
||||
'/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
|
||||
'/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
|
||||
'/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
|
||||
'/ʺ/u' => 'ъ',
|
||||
'/ʹ/u' => 'ь',
|
||||
# # Ye Yo Yu Ya.
|
||||
'/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
|
||||
'/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
|
||||
'/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
|
||||
'/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
|
||||
# # other characters
|
||||
'/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
|
||||
'/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
|
||||
'/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
|
||||
'/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
|
||||
'/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
|
||||
'/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
|
||||
'/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
|
||||
'/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
|
||||
'/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
|
||||
'/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
|
||||
'/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
|
||||
'/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
|
||||
'/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
|
||||
'/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
|
||||
'/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
|
||||
'/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
|
||||
'/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
|
||||
'/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
|
||||
];
|
||||
|
||||
$this->mCyLa2Arab = [
|
||||
# # Punctuation -> Arabic
|
||||
'/#|№|No\./u' => '', # U+0600
|
||||
'/\,/' => '،', # U+060C
|
||||
'/;/' => '؛', # U+061B
|
||||
'/\?/' => '؟', # U+061F
|
||||
'/%/' => '٪', # U+066A
|
||||
'/\*/' => '٭', # U+066D
|
||||
# # Digits -> Arabic
|
||||
'/0/' => '۰', # U+06F0
|
||||
'/1/' => '۱', # U+06F1
|
||||
'/2/' => '۲', # U+06F2
|
||||
'/3/' => '۳', # U+06F3
|
||||
'/4/' => '۴', # U+06F4
|
||||
'/5/' => '۵', # U+06F5
|
||||
'/6/' => '۶', # U+06F6
|
||||
'/7/' => '۷', # U+06F7
|
||||
'/8/' => '۸', # U+06F8
|
||||
'/9/' => '۹', # U+06F9
|
||||
# # Cyrillic -> Arabic
|
||||
'/Аллаһ/ui' => 'ﷲ',
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
|
||||
'/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
|
||||
'/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
|
||||
'/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
|
||||
'/ц/ui' => 'تس', '/щ/ui' => 'شش',
|
||||
'/һ/ui' => 'ح', '/ч/ui' => 'تش',
|
||||
# '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
|
||||
'/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
|
||||
'/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
|
||||
'/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
|
||||
'/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
|
||||
'/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
|
||||
'/ш/ui' => 'ش',
|
||||
# # Latin -> Arabic // commented for now...
|
||||
/*'/Allah/ui' => 'ﷲ',
|
||||
'/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
|
||||
'/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
|
||||
'/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
|
||||
'/c/ui' => 'تس',
|
||||
'/ç/ui' => 'تش', '/h/ui' => 'ح',
|
||||
#'/ç/ui' => 'چ', '/h/ui' => 'ھ',
|
||||
'/b/ui' => 'ب','/d/ui' => 'د',
|
||||
'/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
|
||||
'/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
|
||||
'/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
|
||||
'/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
|
||||
'/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* A function wrapper:
|
||||
* - if there is no selected variant, leave the link
|
||||
* names as they were
|
||||
* - do not try to find variants for usernames
|
||||
*
|
||||
* @param string &$link
|
||||
* @param Title &$nt
|
||||
* @param bool $ignoreOtherCond
|
||||
*/
|
||||
public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
|
||||
// check for user namespace
|
||||
if ( is_object( $nt ) ) {
|
||||
$ns = $nt->getNamespace();
|
||||
if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$oldlink = $link;
|
||||
parent::findVariantLink( $link, $nt, $ignoreOtherCond );
|
||||
if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
|
||||
$link = $oldlink;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* It translates text into variant
|
||||
*
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function translate( $text, $toVariant ) {
|
||||
$text = parent::translate( $text, $toVariant );
|
||||
|
||||
switch ( $toVariant ) {
|
||||
case 'kk-cyrl':
|
||||
case 'kk-kz':
|
||||
$letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
|
||||
break;
|
||||
case 'kk-latn':
|
||||
case 'kk-tr':
|
||||
$letters = KK_C_UC . KK_C_LC . '№0123456789';
|
||||
break;
|
||||
case 'kk-arab':
|
||||
case 'kk-cn':
|
||||
$letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
|
||||
break;
|
||||
default:
|
||||
return $text;
|
||||
}
|
||||
// disable conversion variables like $1, $2...
|
||||
$varsfix = '\$[0-9]';
|
||||
|
||||
$matches = preg_split(
|
||||
'/' . $varsfix . '[^' . $letters . ']+/u',
|
||||
$text,
|
||||
-1,
|
||||
PREG_SPLIT_OFFSET_CAPTURE
|
||||
);
|
||||
|
||||
$mstart = 0;
|
||||
$ret = '';
|
||||
|
||||
foreach ( $matches as $m ) {
|
||||
$ret .= substr( $text, $mstart, $m[1] - $mstart );
|
||||
$ret .= $this->regsConverter( $m[0], $toVariant );
|
||||
$mstart = $m[1] + strlen( $m[0] );
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
* @return mixed|string
|
||||
*/
|
||||
private function regsConverter( $text, $toVariant ) {
|
||||
if ( $text == '' ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
switch ( $toVariant ) {
|
||||
case 'kk-arab':
|
||||
case 'kk-cn':
|
||||
$letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
|
||||
$front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
|
||||
$excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
|
||||
// split text to words
|
||||
$matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
|
||||
$mstart = 0;
|
||||
$ret = '';
|
||||
foreach ( $matches as $m ) {
|
||||
$ret .= substr( $text, $mstart, $m[1] - $mstart );
|
||||
// is matched the word to front vowels?
|
||||
// exclude a words matched to е, э, г, к, к, қ,
|
||||
// them should be without hamza
|
||||
if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
|
||||
!preg_match( '/[' . $excludes . ']/u', $m[0] )
|
||||
) {
|
||||
$ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
|
||||
} else {
|
||||
$ret .= $m[0];
|
||||
}
|
||||
$mstart = $m[1] + strlen( $m[0] );
|
||||
}
|
||||
$text =& $ret;
|
||||
foreach ( $this->mCyLa2Arab as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
case 'kk-latn':
|
||||
case 'kk-tr':
|
||||
foreach ( $this->mCyrl2Latn as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
case 'kk-cyrl':
|
||||
case 'kk-kz':
|
||||
foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
default:
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'kk' );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
|
||||
* right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn.
|
||||
|
|
|
|||
|
|
@ -21,109 +21,6 @@
|
|||
* @ingroup Language
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class ZhConverter extends LanguageConverter {
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$this->mDescCodeSep = ':';
|
||||
$this->mDescVarSep = ';';
|
||||
|
||||
$variants = [
|
||||
'zh',
|
||||
'zh-hans',
|
||||
'zh-hant',
|
||||
'zh-cn',
|
||||
'zh-hk',
|
||||
'zh-mo',
|
||||
'zh-my',
|
||||
'zh-sg',
|
||||
'zh-tw'
|
||||
];
|
||||
|
||||
$variantfallbacks = [
|
||||
'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
|
||||
'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
|
||||
'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
|
||||
'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
|
||||
'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
|
||||
'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
|
||||
'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
|
||||
'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
|
||||
'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
|
||||
];
|
||||
$ml = [
|
||||
'zh' => 'disable',
|
||||
'zh-hans' => 'unidirectional',
|
||||
'zh-hant' => 'unidirectional',
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'zh',
|
||||
$variants,
|
||||
$variantfallbacks,
|
||||
[],
|
||||
$ml );
|
||||
$names = [
|
||||
'zh' => '原文',
|
||||
'zh-hans' => '简体',
|
||||
'zh-hant' => '繁體',
|
||||
'zh-cn' => '大陆',
|
||||
'zh-tw' => '台灣',
|
||||
'zh-hk' => '香港',
|
||||
'zh-mo' => '澳門',
|
||||
'zh-sg' => '新加坡',
|
||||
'zh-my' => '大马',
|
||||
];
|
||||
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
|
||||
'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
|
||||
'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
|
||||
'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
|
||||
'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
|
||||
'zh' => new ReplacementArray
|
||||
];
|
||||
}
|
||||
|
||||
protected function postLoadTables() {
|
||||
$this->mTables['zh-cn']->setArray(
|
||||
$this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-hk']->setArray(
|
||||
$this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
$this->mTables['zh-mo']->setArray(
|
||||
$this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
$this->mTables['zh-my']->setArray(
|
||||
$this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-sg']->setArray(
|
||||
$this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-tw']->setArray(
|
||||
$this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'zh' );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* class that handles both Traditional and Simplified Chinese
|
||||
* right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk.
|
||||
|
|
@ -174,7 +71,7 @@ class LanguageZh extends LanguageZh_hans {
|
|||
public function convertForSearchResult( $termsArray ) {
|
||||
$terms = implode( '|', $termsArray );
|
||||
$terms = self::convertDoubleWidth( $terms );
|
||||
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
|
||||
$terms = implode( '|', $this->getConverter()->autoConvertToAllVariants( $terms ) );
|
||||
$ret = array_unique( explode( '|', $terms ) );
|
||||
return $ret;
|
||||
}
|
||||
|
|
|
|||
74
languages/converters/EnConverter.php
Normal file
74
languages/converters/EnConverter.php
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
<?php
|
||||
/**
|
||||
* English specific code.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class EnConverter extends LanguageConverter {
|
||||
|
||||
public function __construct( \Language $langobj ) {
|
||||
parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy methods required by base class.
|
||||
*/
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'en' => new ReplacementArray(),
|
||||
'en-x-piglatin' => new ReplacementArray(),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates text into Pig Latin. This allows developers to test the language variants
|
||||
* functionality and user interface without having to switch wiki language away from default.
|
||||
*
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
* @return string
|
||||
*/
|
||||
public function translate( $text, $toVariant ) {
|
||||
if ( $toVariant !== 'en-x-piglatin' ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
// Only process words composed of standard English alphabet, leave the rest unchanged.
|
||||
// This skips some English words like 'naïve' or 'résumé', but we can live with that.
|
||||
// Ignore single letters and words which aren't lowercase or uppercase-first.
|
||||
return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
|
||||
$word = $matches[0];
|
||||
if ( preg_match( '/^[aeiou]/i', $word ) ) {
|
||||
return $word . 'way';
|
||||
}
|
||||
|
||||
return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
|
||||
$ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
|
||||
if ( $ucfirst ) {
|
||||
return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
|
||||
}
|
||||
|
||||
return $m[2] . $m[1] . 'ay';
|
||||
}, $word );
|
||||
}, $text );
|
||||
}
|
||||
}
|
||||
74
languages/converters/GanConverter.php
Normal file
74
languages/converters/GanConverter.php
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
<?php
|
||||
/**
|
||||
* Gan Chinese specific code.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class GanConverter extends LanguageConverter {
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$this->mDescCodeSep = ':';
|
||||
$this->mDescVarSep = ';';
|
||||
|
||||
$variants = [ 'gan', 'gan-hans', 'gan-hant' ];
|
||||
$variantfallbacks = [
|
||||
'gan' => [ 'gan-hans', 'gan-hant' ],
|
||||
'gan-hans' => [ 'gan' ],
|
||||
'gan-hant' => [ 'gan' ],
|
||||
];
|
||||
$ml = [
|
||||
'gan' => 'disable',
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'gan',
|
||||
$variants,
|
||||
$variantfallbacks,
|
||||
[],
|
||||
$ml
|
||||
);
|
||||
|
||||
$names = [
|
||||
'gan' => '原文',
|
||||
'gan-hans' => '简体',
|
||||
'gan-hant' => '繁體',
|
||||
];
|
||||
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
|
||||
'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
|
||||
'gan' => new ReplacementArray
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'gan' );
|
||||
}
|
||||
}
|
||||
361
languages/converters/KkConverter.php
Normal file
361
languages/converters/KkConverter.php
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
<?php
|
||||
/**
|
||||
* Kazakh (Қазақша) specific code.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @ingroup Language
|
||||
*/
|
||||
|
||||
define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
|
||||
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
|
||||
define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
|
||||
define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
|
||||
// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
|
||||
define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
|
||||
// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER
|
||||
|
||||
/**
|
||||
* Kazakh (Қазақша) converter routines
|
||||
*
|
||||
* @ingroup Language
|
||||
*/
|
||||
class KkConverter extends LanguageConverter {
|
||||
|
||||
protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
|
||||
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
|
||||
$variantfallbacks = [
|
||||
'kk' => 'kk-cyrl',
|
||||
'kk-cyrl' => 'kk',
|
||||
'kk-latn' => 'kk',
|
||||
'kk-arab' => 'kk',
|
||||
'kk-kz' => 'kk-cyrl',
|
||||
'kk-tr' => 'kk-latn',
|
||||
'kk-cn' => 'kk-arab'
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'kk',
|
||||
$variants, $variantfallbacks, [] );
|
||||
|
||||
// No point delaying this since they're in code.
|
||||
// Waiting until loadDefaultTables() means they never get loaded
|
||||
// when the tables themselves are loaded from cache.
|
||||
$this->loadRegs();
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
// require __DIR__."/../../includes/KkConversion.php";
|
||||
// Placeholder for future implementing. Remove variables declarations
|
||||
// after generating KkConversion.php
|
||||
$kk2Cyrl = [];
|
||||
$kk2Latn = [];
|
||||
$kk2Arab = [];
|
||||
$kk2KZ = [];
|
||||
$kk2TR = [];
|
||||
$kk2CN = [];
|
||||
|
||||
$this->mTables = [
|
||||
'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
|
||||
'kk-latn' => new ReplacementArray( $kk2Latn ),
|
||||
'kk-arab' => new ReplacementArray( $kk2Arab ),
|
||||
'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
|
||||
'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
|
||||
'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
|
||||
'kk' => new ReplacementArray()
|
||||
];
|
||||
}
|
||||
|
||||
protected function postLoadTables() {
|
||||
$this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
|
||||
$this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
|
||||
$this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
|
||||
}
|
||||
|
||||
private function loadRegs() {
|
||||
$this->mCyrl2Latn = [
|
||||
# # Punctuation
|
||||
'/№/u' => 'No.',
|
||||
# # Е after vowels
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
|
||||
# # leading ЁЮЯЩ
|
||||
'/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
|
||||
'/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
|
||||
'/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
|
||||
'/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
|
||||
# # other ЁЮЯ
|
||||
'/Ё/u' => 'YO', '/ё/u' => 'yo',
|
||||
'/Ю/u' => 'YU', '/ю/u' => 'yu',
|
||||
'/Я/u' => 'YA', '/я/u' => 'ya',
|
||||
'/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
|
||||
# # soft and hard signs
|
||||
'/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
|
||||
# # other characters
|
||||
'/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
|
||||
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
|
||||
'/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
|
||||
'/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
|
||||
'/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
|
||||
'/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
|
||||
'/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
|
||||
'/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
|
||||
'/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
|
||||
'/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
|
||||
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
|
||||
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
|
||||
'/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
|
||||
'/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
|
||||
'/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
|
||||
'/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
|
||||
'/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
|
||||
'/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
|
||||
];
|
||||
|
||||
$this->mLatn2Cyrl = [
|
||||
# # Punctuation
|
||||
'/#|No\./' => '№',
|
||||
# # Şç
|
||||
'/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
|
||||
'/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
|
||||
# # soft and hard signs
|
||||
'/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
|
||||
'/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
|
||||
'/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
|
||||
'/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
|
||||
'/ʺ/u' => 'ъ',
|
||||
'/ʹ/u' => 'ь',
|
||||
# # Ye Yo Yu Ya.
|
||||
'/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
|
||||
'/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
|
||||
'/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
|
||||
'/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
|
||||
# # other characters
|
||||
'/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
|
||||
'/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
|
||||
'/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
|
||||
'/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
|
||||
'/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
|
||||
'/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
|
||||
'/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
|
||||
'/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
|
||||
'/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
|
||||
'/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
|
||||
'/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
|
||||
'/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
|
||||
'/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
|
||||
'/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
|
||||
'/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
|
||||
'/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
|
||||
'/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
|
||||
'/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
|
||||
];
|
||||
|
||||
$this->mCyLa2Arab = [
|
||||
# # Punctuation -> Arabic
|
||||
'/#|№|No\./u' => '', # U+0600
|
||||
'/\,/' => '،', # U+060C
|
||||
'/;/' => '؛', # U+061B
|
||||
'/\?/' => '؟', # U+061F
|
||||
'/%/' => '٪', # U+066A
|
||||
'/\*/' => '٭', # U+066D
|
||||
# # Digits -> Arabic
|
||||
'/0/' => '۰', # U+06F0
|
||||
'/1/' => '۱', # U+06F1
|
||||
'/2/' => '۲', # U+06F2
|
||||
'/3/' => '۳', # U+06F3
|
||||
'/4/' => '۴', # U+06F4
|
||||
'/5/' => '۵', # U+06F5
|
||||
'/6/' => '۶', # U+06F6
|
||||
'/7/' => '۷', # U+06F7
|
||||
'/8/' => '۸', # U+06F8
|
||||
'/9/' => '۹', # U+06F9
|
||||
# # Cyrillic -> Arabic
|
||||
'/Аллаһ/ui' => 'ﷲ',
|
||||
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
|
||||
'/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
|
||||
'/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
|
||||
'/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
|
||||
'/ц/ui' => 'تس', '/щ/ui' => 'شش',
|
||||
'/һ/ui' => 'ح', '/ч/ui' => 'تش',
|
||||
# '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
|
||||
'/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
|
||||
'/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
|
||||
'/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
|
||||
'/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
|
||||
'/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
|
||||
'/ш/ui' => 'ش',
|
||||
# # Latin -> Arabic // commented for now...
|
||||
/*'/Allah/ui' => 'ﷲ',
|
||||
'/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
|
||||
'/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
|
||||
'/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
|
||||
'/c/ui' => 'تس',
|
||||
'/ç/ui' => 'تش', '/h/ui' => 'ح',
|
||||
#'/ç/ui' => 'چ', '/h/ui' => 'ھ',
|
||||
'/b/ui' => 'ب','/d/ui' => 'د',
|
||||
'/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
|
||||
'/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
|
||||
'/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
|
||||
'/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
|
||||
'/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* A function wrapper:
|
||||
* - if there is no selected variant, leave the link
|
||||
* names as they were
|
||||
* - do not try to find variants for usernames
|
||||
*
|
||||
* @param string &$link
|
||||
* @param Title &$nt
|
||||
* @param bool $ignoreOtherCond
|
||||
*/
|
||||
public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
|
||||
// check for user namespace
|
||||
if ( is_object( $nt ) ) {
|
||||
$ns = $nt->getNamespace();
|
||||
if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$oldlink = $link;
|
||||
parent::findVariantLink( $link, $nt, $ignoreOtherCond );
|
||||
if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
|
||||
$link = $oldlink;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* It translates text into variant
|
||||
*
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function translate( $text, $toVariant ) {
|
||||
$text = parent::translate( $text, $toVariant );
|
||||
|
||||
switch ( $toVariant ) {
|
||||
case 'kk-cyrl':
|
||||
case 'kk-kz':
|
||||
$letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
|
||||
break;
|
||||
case 'kk-latn':
|
||||
case 'kk-tr':
|
||||
$letters = KK_C_UC . KK_C_LC . '№0123456789';
|
||||
break;
|
||||
case 'kk-arab':
|
||||
case 'kk-cn':
|
||||
$letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
|
||||
break;
|
||||
default:
|
||||
return $text;
|
||||
}
|
||||
// disable conversion variables like $1, $2...
|
||||
$varsfix = '\$[0-9]';
|
||||
|
||||
$matches = preg_split(
|
||||
'/' . $varsfix . '[^' . $letters . ']+/u',
|
||||
$text,
|
||||
-1,
|
||||
PREG_SPLIT_OFFSET_CAPTURE
|
||||
);
|
||||
|
||||
$mstart = 0;
|
||||
$ret = '';
|
||||
|
||||
foreach ( $matches as $m ) {
|
||||
$ret .= substr( $text, $mstart, $m[1] - $mstart );
|
||||
$ret .= $this->regsConverter( $m[0], $toVariant );
|
||||
$mstart = $m[1] + strlen( $m[0] );
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $text
|
||||
* @param string $toVariant
|
||||
* @return mixed|string
|
||||
*/
|
||||
private function regsConverter( $text, $toVariant ) {
|
||||
if ( $text == '' ) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
switch ( $toVariant ) {
|
||||
case 'kk-arab':
|
||||
case 'kk-cn':
|
||||
$letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
|
||||
$front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
|
||||
$excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
|
||||
// split text to words
|
||||
$matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
|
||||
$mstart = 0;
|
||||
$ret = '';
|
||||
foreach ( $matches as $m ) {
|
||||
$ret .= substr( $text, $mstart, $m[1] - $mstart );
|
||||
// is matched the word to front vowels?
|
||||
// exclude a words matched to е, э, г, к, к, қ,
|
||||
// them should be without hamza
|
||||
if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
|
||||
!preg_match( '/[' . $excludes . ']/u', $m[0] )
|
||||
) {
|
||||
$ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
|
||||
} else {
|
||||
$ret .= $m[0];
|
||||
}
|
||||
$mstart = $m[1] + strlen( $m[0] );
|
||||
}
|
||||
$text =& $ret;
|
||||
foreach ( $this->mCyLa2Arab as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
case 'kk-latn':
|
||||
case 'kk-tr':
|
||||
foreach ( $this->mCyrl2Latn as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
case 'kk-cyrl':
|
||||
case 'kk-kz':
|
||||
foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
|
||||
$text = preg_replace( $pat, $rep, $text );
|
||||
}
|
||||
return $text;
|
||||
default:
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'kk' );
|
||||
}
|
||||
}
|
||||
125
languages/converters/ZhConverter.php
Normal file
125
languages/converters/ZhConverter.php
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
<?php
|
||||
/**
|
||||
* Chinese specific code.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @ingroup Language
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup Language
|
||||
*/
|
||||
class ZhConverter extends LanguageConverter {
|
||||
/**
|
||||
* @param Language $langobj
|
||||
*/
|
||||
public function __construct( Language $langobj ) {
|
||||
$this->mDescCodeSep = ':';
|
||||
$this->mDescVarSep = ';';
|
||||
|
||||
$variants = [
|
||||
'zh',
|
||||
'zh-hans',
|
||||
'zh-hant',
|
||||
'zh-cn',
|
||||
'zh-hk',
|
||||
'zh-mo',
|
||||
'zh-my',
|
||||
'zh-sg',
|
||||
'zh-tw'
|
||||
];
|
||||
|
||||
$variantfallbacks = [
|
||||
'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
|
||||
'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
|
||||
'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
|
||||
'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
|
||||
'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
|
||||
'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
|
||||
'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
|
||||
'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
|
||||
'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
|
||||
];
|
||||
$ml = [
|
||||
'zh' => 'disable',
|
||||
'zh-hans' => 'unidirectional',
|
||||
'zh-hant' => 'unidirectional',
|
||||
];
|
||||
|
||||
parent::__construct( $langobj, 'zh',
|
||||
$variants,
|
||||
$variantfallbacks,
|
||||
[],
|
||||
$ml );
|
||||
$names = [
|
||||
'zh' => '原文',
|
||||
'zh-hans' => '简体',
|
||||
'zh-hant' => '繁體',
|
||||
'zh-cn' => '大陆',
|
||||
'zh-tw' => '台灣',
|
||||
'zh-hk' => '香港',
|
||||
'zh-mo' => '澳門',
|
||||
'zh-sg' => '新加坡',
|
||||
'zh-my' => '大马',
|
||||
];
|
||||
$this->mVariantNames = array_merge( $this->mVariantNames, $names );
|
||||
}
|
||||
|
||||
protected function loadDefaultTables() {
|
||||
$this->mTables = [
|
||||
'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
|
||||
'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
|
||||
'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
|
||||
'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
|
||||
'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
|
||||
'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
|
||||
'zh' => new ReplacementArray
|
||||
];
|
||||
}
|
||||
|
||||
protected function postLoadTables() {
|
||||
$this->mTables['zh-cn']->setArray(
|
||||
$this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-hk']->setArray(
|
||||
$this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
$this->mTables['zh-mo']->setArray(
|
||||
$this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
$this->mTables['zh-my']->setArray(
|
||||
$this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-sg']->setArray(
|
||||
$this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
|
||||
);
|
||||
$this->mTables['zh-tw']->setArray(
|
||||
$this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @return string
|
||||
*/
|
||||
public function convertCategoryKey( $key ) {
|
||||
return $this->autoConvert( $key, 'zh' );
|
||||
}
|
||||
}
|
||||
|
|
@ -180,6 +180,7 @@ $wgAutoloadClasses += [
|
|||
|
||||
# tests/phpunit/languages
|
||||
'LanguageClassesTestCase' => "$testDir/phpunit/languages/LanguageClassesTestCase.php",
|
||||
'LanguageConverterTestTrait' => "$testDir/phpunit/languages/LanguageConverterTestTrait.php",
|
||||
|
||||
# tests/phpunit/includes/libs
|
||||
'GenericArrayObjectTest' => "$testDir/phpunit/includes/libs/GenericArrayObjectTest.php",
|
||||
|
|
|
|||
33
tests/phpunit/languages/LanguageConverterTestTrait.php
Normal file
33
tests/phpunit/languages/LanguageConverterTestTrait.php
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
<?php
|
||||
|
||||
use MediaWiki\Languages\LanguageConverterFactory;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
|
||||
trait LanguageConverterTestTrait {
|
||||
|
||||
private $codeRegex = '/^(.+)ConverterTest$/';
|
||||
|
||||
protected function code():string {
|
||||
if ( preg_match( $this->codeRegex, get_class( $this ), $m ) ) {
|
||||
return mb_strtolower( $m[1] );
|
||||
}
|
||||
return 'en';
|
||||
}
|
||||
|
||||
/** Create and return LanguageConveter to be tested.
|
||||
*
|
||||
* @return ILanguageConverter
|
||||
*/
|
||||
protected function getLanguageConverter() : ILanguageConverter {
|
||||
$code = $this->code();
|
||||
|
||||
$language = MediaWikiServices::getInstance()->getLanguageFactory()
|
||||
->getLanguage( $code );
|
||||
|
||||
$factory = new LanguageConverterFactory( false, function () use ( $language ) {
|
||||
return $language;
|
||||
} );
|
||||
|
||||
return $factory->getLanguageConverter( $language );
|
||||
}
|
||||
}
|
||||
|
|
@ -69,7 +69,6 @@ class LanguageSrTest extends LanguageClassesTestCase {
|
|||
foreach ( $langs as $code => $l ) {
|
||||
$p = $langFactory->getParentLanguage( $code );
|
||||
$this->assertTrue( $p === null, 'no parent for bogus language' );
|
||||
$this->assertFalse( $l instanceof LanguageSr, "$code is not sr" );
|
||||
$this->assertFalse( $this->getLang()->hasVariant( $code ), "$code is not a sr variant" );
|
||||
foreach ( [ 'sr', 'sr-ec', 'sr-EC', 'sr-Cyrl', 'sr-cyrl', 'sr-bogus' ] as $v ) {
|
||||
if ( $v !== $code ) {
|
||||
|
|
|
|||
141
tests/phpunit/languages/converters/CrhConverterTest.php
Normal file
141
tests/phpunit/languages/converters/CrhConverterTest.php
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers CrhConverter
|
||||
*/
|
||||
class CrhConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariantsByWord
|
||||
* @covers CrhConverter::autoConvertToAllVariants
|
||||
*
|
||||
* Test individual words and test minimal contextual transforms
|
||||
* by creating test strings "<cyrillic> <latin>" and
|
||||
* "<latin> <cyrillic>" and then converting to all variants.
|
||||
*/
|
||||
public function testAutoConvertToAllVariantsByWord( $cyrl, $lat ) {
|
||||
$value = $lat;
|
||||
$result = [
|
||||
'crh' => $value,
|
||||
'crh-cyrl' => $cyrl,
|
||||
'crh-latn' => $lat,
|
||||
];
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
|
||||
$value = $cyrl;
|
||||
$result = [
|
||||
'crh' => $value,
|
||||
'crh-cyrl' => $cyrl,
|
||||
'crh-latn' => $lat,
|
||||
];
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
|
||||
$value = $cyrl . ' ' . $lat;
|
||||
$result = [
|
||||
'crh' => $value,
|
||||
'crh-cyrl' => $cyrl . ' ' . $cyrl,
|
||||
'crh-latn' => $lat . ' ' . $lat,
|
||||
];
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
|
||||
$value = $lat . ' ' . $cyrl;
|
||||
$result = [
|
||||
'crh' => $value,
|
||||
'crh-cyrl' => $cyrl . ' ' . $cyrl,
|
||||
'crh-latn' => $lat . ' ' . $lat,
|
||||
];
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariantsByWord() {
|
||||
return [
|
||||
// general words, covering more of the alphabet
|
||||
[ 'рузгярнынъ', 'ruzgârnıñ' ], [ 'Париж', 'Parij' ], [ 'чёкюч', 'çöküç' ],
|
||||
[ 'элифбени', 'elifbeni' ], [ 'полициясы', 'politsiyası' ], [ 'хусусында', 'hususında' ],
|
||||
[ 'акъшамларны', 'aqşamlarnı' ], [ 'опькеленюв', 'öpkelenüv' ],
|
||||
[ 'кулюмсиреди', 'külümsiredi' ], [ 'айтмайджагъым', 'aytmaycağım' ],
|
||||
[ 'козьяшсыз', 'közyaşsız' ],
|
||||
|
||||
// exception words
|
||||
[ 'инструменталь', 'instrumental' ], [ 'гургуль', 'gürgül' ], [ 'тюшюнмемек', 'tüşünmemek' ],
|
||||
|
||||
// specific problem words
|
||||
[ 'куню', 'künü' ], [ 'сюргюнлиги', 'sürgünligi' ], [ 'озю', 'özü' ], [ 'этти', 'etti' ],
|
||||
[ 'эсас', 'esas' ], [ 'дёрт', 'dört' ], [ 'кельди', 'keldi' ], [ 'км²', 'km²' ],
|
||||
[ 'юзь', 'yüz' ], [ 'АКъШ', 'AQŞ' ], [ 'ШСДжБнен', 'ŞSCBnen' ], [ 'июль', 'iyül' ],
|
||||
[ 'ишгъаль', 'işğal' ], [ 'ишгъальджилерине', 'işğalcilerine' ], [ 'район', 'rayon' ],
|
||||
[ 'районынынъ', 'rayonınıñ' ], [ 'Ногъай', 'Noğay' ], [ 'Юрьтю', 'Yürtü' ],
|
||||
[ 'ватандан', 'vatandan' ], [ 'ком-кок', 'köm-kök' ], [ 'АКЪКЪЫ', 'AQQI' ],
|
||||
[ 'ДАГЪГЪА', 'DAĞĞA' ], [ '13-юнджи', '13-ünci' ], [ 'ДЖУРЬМЕК', 'CÜRMEK' ],
|
||||
[ 'джумлеси', 'cümlesi' ], [ 'ильи', 'ilyi' ], [ 'Ильи', 'İlyi' ], [ 'бруцел', 'brutsel' ],
|
||||
[ 'коцюб', 'kotsüb' ], [ 'плацен', 'platsen' ], [ 'эпицентр', 'epitsentr' ],
|
||||
|
||||
// -tsin- words
|
||||
[ 'кетсин', 'ketsin' ], [ 'кирлетсин', 'kirletsin' ], [ 'этсин', 'etsin' ],
|
||||
[ 'етсин', 'yetsin' ], [ 'этсинлерми', 'etsinlermi' ], [ 'принцини', 'printsini' ],
|
||||
[ 'медицина', 'meditsina' ], [ 'Щетсин', 'Şçetsin' ], [ 'Щекоцины', 'Şçekotsinı' ],
|
||||
|
||||
// regex pattern words
|
||||
[ 'коюнден', 'köyünden' ], [ 'аньге', 'ange' ],
|
||||
|
||||
// multi part words
|
||||
[ 'эки юз', 'eki yüz' ],
|
||||
|
||||
// affix patterns
|
||||
[ 'койнинъ', 'köyniñ' ], [ 'Авджыкойде', 'Avcıköyde' ], [ 'экваториаль', 'ekvatorial' ],
|
||||
[ 'Джанкой', 'Canköy' ], [ 'усть', 'üst' ], [ 'роль', 'rol' ], [ 'буюк', 'büyük' ],
|
||||
[ 'джонк', 'cönk' ],
|
||||
|
||||
// Roman numerals vs Initials, part 1 - Roman numeral initials without spaces
|
||||
[ 'А.Б.Дж.Д.М. Къадырова XII', 'A.B.C.D.M. Qadırova XII' ],
|
||||
// Roman numerals vs Initials, part 2 - Roman numeral initials with spaces
|
||||
[ 'Г. Х. Ы. В. X. Л. Меметов III', 'G. H. I. V. X. L. Memetov III' ],
|
||||
|
||||
// ALL CAPS, made up acronyms
|
||||
[ 'НЪАБ', 'ÑAB' ], [ 'КЪЫДЖ', 'QIC' ], [ 'ГЪУК', 'ĞUK' ], [ 'ДЖОТ', 'COT' ], [ 'ДЖА', 'CA' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariantsByString
|
||||
* @covers CrhConverter::autoConvertToAllVariants
|
||||
*
|
||||
* Run tests that require some context (like Roman numerals) or with
|
||||
* many-to-one mappings, or other asymmetric results (like smart quotes)
|
||||
*/
|
||||
public function testAutoConvertToAllVariantsByString( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariantsByString() {
|
||||
return [
|
||||
[ // Roman numerals and quotes, esp. single-letter Roman numerals at the end of a string
|
||||
[
|
||||
'crh' => 'VI,VII IX “dört” «дёрт» XI XII I V X L C D M',
|
||||
'crh-cyrl' => 'VI,VII IX «дёрт» «дёрт» XI XII I V X L C D M',
|
||||
'crh-latn' => 'VI,VII IX “dört” "dört" XI XII I V X L C D M',
|
||||
],
|
||||
'VI,VII IX “dört” «дёрт» XI XII I V X L C D M'
|
||||
],
|
||||
[ // Many-to-one mappings: many Cyrillic to one Latin
|
||||
[
|
||||
'crh' => 'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül',
|
||||
'crh-cyrl' => 'шофер шофёр шофёр корбекул корьбекул корьбекуль корьбекуль',
|
||||
'crh-latn' => 'şoför şoför şoför körbekül körbekül körbekül körbekül',
|
||||
],
|
||||
'шофер шофёр şoför корбекул корьбекул корьбекуль körbekül'
|
||||
],
|
||||
[ // Many-to-one mappings: many Latin to one Cyrillic
|
||||
[
|
||||
'crh' => 'fevqülade fevqulade февкъульаде beyude beyüde бейуде',
|
||||
'crh-cyrl' => 'февкъульаде февкъульаде февкъульаде бейуде бейуде бейуде',
|
||||
'crh-latn' => 'fevqülade fevqulade fevqulade beyude beyüde beyüde',
|
||||
],
|
||||
'fevqülade fevqulade февкъульаде beyude beyüde бейуде'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
41
tests/phpunit/languages/converters/GanConverterTest.php
Normal file
41
tests/phpunit/languages/converters/GanConverterTest.php
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers GanConverter
|
||||
*/
|
||||
class GanConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers GanConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
// zh2Hans
|
||||
[
|
||||
[
|
||||
'gan' => '㑯',
|
||||
'gan-hans' => '㑔',
|
||||
'gan-hant' => '㑯',
|
||||
],
|
||||
'㑯'
|
||||
],
|
||||
// zh2Hant
|
||||
[
|
||||
[
|
||||
'gan' => '㐷',
|
||||
'gan-hans' => '㐷',
|
||||
'gan-hant' => '傌',
|
||||
],
|
||||
'㐷'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
41
tests/phpunit/languages/converters/IuConverterTest.php
Normal file
41
tests/phpunit/languages/converters/IuConverterTest.php
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers IuConverter
|
||||
*/
|
||||
class IuConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers IuConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
// ike-cans
|
||||
[
|
||||
[
|
||||
'ike-cans' => 'ᐴ',
|
||||
'ike-latn' => 'PUU',
|
||||
'iu' => 'PUU',
|
||||
],
|
||||
'PUU'
|
||||
],
|
||||
// ike-latn
|
||||
[
|
||||
[
|
||||
'ike-cans' => 'ᐴ',
|
||||
'ike-latn' => 'puu',
|
||||
'iu' => 'ᐴ',
|
||||
],
|
||||
'ᐴ'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
47
tests/phpunit/languages/converters/KkConverterTest.php
Normal file
47
tests/phpunit/languages/converters/KkConverterTest.php
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers KkConverter
|
||||
*/
|
||||
class KkConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers KkConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
[
|
||||
[
|
||||
'kk' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
|
||||
'kk-cyrl' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
|
||||
'kk-latn' => 'Adamdarğa aqıl-parasat, ar-ojdan berilgen',
|
||||
'kk-arab' => 'ادامدارعا اقىل-پاراسات، ار-وجدان بەرىلگەن',
|
||||
'kk-kz' => 'Адамдарға ақыл-парасат, ар-ождан берілген',
|
||||
'kk-tr' => 'Adamdarğa aqıl-parasat, ar-ojdan berilgen',
|
||||
'kk-cn' => 'ادامدارعا اقىل-پاراسات، ار-وجدان بەرىلگەن'
|
||||
],
|
||||
'Адамдарға ақыл-парасат, ар-ождан берілген'
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers KkConverter::findVariantLink
|
||||
* @covers LanguageConverter::findVariantLink
|
||||
*/
|
||||
public function testFindVariantLinks() {
|
||||
$old = "sample_link";
|
||||
$newLink = $old;
|
||||
$title = Title::newFromText( "Same page for link" );
|
||||
$this->getLanguageConverter()->findVariantLink( $newLink, $title );
|
||||
$this->assertSame( $old, $newLink, "inks should'n be changed" );
|
||||
}
|
||||
}
|
||||
47
tests/phpunit/languages/converters/KuConverterTest.php
Normal file
47
tests/phpunit/languages/converters/KuConverterTest.php
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers KuConverter
|
||||
*/
|
||||
class KuConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers KuConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
[
|
||||
[
|
||||
'ku' => '١',
|
||||
'ku-arab' => '١',
|
||||
'ku-latn' => '1',
|
||||
],
|
||||
'١'
|
||||
],
|
||||
[
|
||||
[
|
||||
'ku' => 'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.',
|
||||
'ku-arab' => 'ویکیپەدیائە نسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
|
||||
'ku-latn' => 'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.',
|
||||
],
|
||||
'Wîkîpediya ensîklopediyeke azad bi rengê wîkî ye.'
|
||||
],
|
||||
[
|
||||
[
|
||||
'ku' => 'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
|
||||
'ku-arab' => 'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.',
|
||||
'ku-latn' => 'wîkîpedîa ensîklopedîekea zad b rengê wîkî îe.',
|
||||
],
|
||||
'ویکیپەدیا ەنسیکلۆپەدیەکەئا زاد ب رەنگێ ویکی یە.'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
40
tests/phpunit/languages/converters/ShiConverterTest.php
Normal file
40
tests/phpunit/languages/converters/ShiConverterTest.php
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers ShiConverter
|
||||
*/
|
||||
class ShiConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers ShiConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result,
|
||||
$this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
[
|
||||
[
|
||||
'shi' => 'AƔ',
|
||||
'shi-tfng' => 'ⴰⵖ',
|
||||
'shi-latn' => 'AƔ',
|
||||
],
|
||||
'AƔ'
|
||||
],
|
||||
[
|
||||
[
|
||||
'shi' => 'ⴰⵖ',
|
||||
'shi-tfng' => 'ⴰⵖ',
|
||||
'shi-latn' => 'aɣ',
|
||||
],
|
||||
'ⴰⵖ'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
206
tests/phpunit/languages/converters/SrConverterTest.php
Normal file
206
tests/phpunit/languages/converters/SrConverterTest.php
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers SrConverter
|
||||
*/
|
||||
class SrConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @covers SrConverter::hasVariants
|
||||
*/
|
||||
public function testHasVariants() {
|
||||
$this->assertTrue( $this->getLanguageConverter()->hasVariants(), 'sr has variants' );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers SrConverter::hasVariant
|
||||
*/
|
||||
public function testHasVariantBogus() {
|
||||
$variants = [
|
||||
'sr-ec',
|
||||
'sr-el',
|
||||
];
|
||||
|
||||
foreach ( $variants as $variant ) {
|
||||
$this->assertTrue( $this->getLanguageConverter()->hasVariant( $variant ),
|
||||
"no variant for $variant language" );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers SrConverter::convertTo
|
||||
*/
|
||||
public function testEasyConversions() {
|
||||
$this->assertCyrillic(
|
||||
'шђчћжШЂЧЋЖ',
|
||||
'Cyrillic guessing characters'
|
||||
);
|
||||
$this->assertLatin(
|
||||
'šđč枊ĐČĆŽ',
|
||||
'Latin guessing characters'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers SrConverter::convertTo
|
||||
*/
|
||||
public function testMixedConversions() {
|
||||
$this->assertCyrillic(
|
||||
'шђчћжШЂЧЋЖ - šđčćž',
|
||||
'Mostly Cyrillic characters'
|
||||
);
|
||||
$this->assertLatin(
|
||||
'šđč枊ĐČĆŽ - шђчћж',
|
||||
'Mostly Latin characters'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers SrConverter::convertTo
|
||||
*/
|
||||
public function testSameAmountOfLatinAndCyrillicGetConverted() {
|
||||
$this->assertConverted(
|
||||
'4 Latin: šđčć | 4 Cyrillic: шђчћ',
|
||||
'sr-ec'
|
||||
);
|
||||
$this->assertConverted(
|
||||
'4 Latin: šđčć | 4 Cyrillic: шђчћ',
|
||||
'sr-el'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @author Nikola Smolenski
|
||||
* @covers SrConverter::convertTo
|
||||
*/
|
||||
public function testConversionToCyrillic() {
|
||||
// A simple conversion of Latin to Cyrillic
|
||||
$this->assertEquals( 'абвг',
|
||||
$this->convertToCyrillic( 'abvg' )
|
||||
);
|
||||
// Same as above, but assert that -{}-s must be removed and not converted
|
||||
$this->assertEquals( 'ljабnjвгdž',
|
||||
$this->convertToCyrillic( '-{lj}-ab-{nj}-vg-{dž}-' )
|
||||
);
|
||||
// A simple conversion of Cyrillic to Cyrillic
|
||||
$this->assertEquals( 'абвг',
|
||||
$this->convertToCyrillic( 'абвг' )
|
||||
);
|
||||
// Same as above, but assert that -{}-s must be removed and not converted
|
||||
$this->assertEquals( 'ljабnjвгdž',
|
||||
$this->convertToCyrillic( '-{lj}-аб-{nj}-вг-{dž}-' )
|
||||
);
|
||||
// This text has some Latin, but is recognized as Cyrillic, so it should not be converted
|
||||
$this->assertEquals( 'abvgшђжчћ',
|
||||
$this->convertToCyrillic( 'abvgшђжчћ' )
|
||||
);
|
||||
// Same as above, but assert that -{}-s must be removed
|
||||
$this->assertEquals( 'љabvgњшђжчћџ',
|
||||
$this->convertToCyrillic( '-{љ}-abvg-{њ}-шђжчћ-{џ}-' )
|
||||
);
|
||||
// This text has some Cyrillic, but is recognized as Latin, so it should be converted
|
||||
$this->assertEquals( 'абвгшђжчћ',
|
||||
$this->convertToCyrillic( 'абвгšđžčć' )
|
||||
);
|
||||
// Same as above, but assert that -{}-s must be removed and not converted
|
||||
$this->assertEquals( 'ljабвгnjшђжчћdž',
|
||||
$this->convertToCyrillic( '-{lj}-абвг-{nj}-šđžčć-{dž}-' )
|
||||
);
|
||||
// Roman numerals are not converted
|
||||
$this->assertEquals( 'а I б II в III г IV шђжчћ',
|
||||
$this->convertToCyrillic( 'a I b II v III g IV šđžčć' )
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers SrConverter::convertTo
|
||||
*/
|
||||
public function testConversionToLatin() {
|
||||
// A simple conversion of Latin to Latin
|
||||
$this->assertEquals( 'abcd',
|
||||
$this->convertToLatin( 'abcd' )
|
||||
);
|
||||
// A simple conversion of Cyrillic to Latin
|
||||
$this->assertEquals( 'abcd',
|
||||
$this->convertToLatin( 'абцд' )
|
||||
);
|
||||
// This text has some Latin, but is recognized as Cyrillic, so it should be converted
|
||||
$this->assertEquals( 'abcdšđžčć',
|
||||
$this->convertToLatin( 'abcdшђжчћ' )
|
||||
);
|
||||
// This text has some Cyrillic, but is recognized as Latin, so it should not be converted
|
||||
$this->assertEquals( 'абцдšđžčć',
|
||||
$this->convertToLatin( 'абцдšđžčć' )
|
||||
);
|
||||
}
|
||||
|
||||
# #### HELPERS #####################################################
|
||||
|
||||
/**
|
||||
* Wrapper to verify text stay the same after applying conversion
|
||||
* @param string $text Text to convert
|
||||
* @param string $variant Language variant 'sr-ec' or 'sr-el'
|
||||
* @param string $msg Optional message
|
||||
*/
|
||||
protected function assertUnConverted( $text, $variant, $msg = '' ) {
|
||||
$this->assertEquals(
|
||||
$text,
|
||||
$this->convertTo( $text, $variant ),
|
||||
$msg
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper to verify a text is different once converted to a variant.
|
||||
* @param string $text Text to convert
|
||||
* @param string $variant Language variant 'sr-ec' or 'sr-el'
|
||||
* @param string $msg Optional message
|
||||
*/
|
||||
protected function assertConverted( $text, $variant, $msg = '' ) {
|
||||
$this->assertNotEquals(
|
||||
$text,
|
||||
$this->convertTo( $text, $variant ),
|
||||
$msg
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifiy the given Cyrillic text is not converted when using
|
||||
* using the Cyrillic variant and converted to Latin when using
|
||||
* the Latin variant.
|
||||
* @param string $text Text to convert
|
||||
* @param string $msg Optional message
|
||||
*/
|
||||
protected function assertCyrillic( $text, $msg = '' ) {
|
||||
$this->assertUnConverted( $text, 'sr-ec', $msg );
|
||||
$this->assertConverted( $text, 'sr-el', $msg );
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifiy the given Latin text is not converted when using
|
||||
* using the Latin variant and converted to Cyrillic when using
|
||||
* the Cyrillic variant.
|
||||
* @param string $text Text to convert
|
||||
* @param string $msg Optional message
|
||||
*/
|
||||
protected function assertLatin( $text, $msg = '' ) {
|
||||
$this->assertUnConverted( $text, 'sr-el', $msg );
|
||||
$this->assertConverted( $text, 'sr-ec', $msg );
|
||||
}
|
||||
|
||||
/** Wrapper for converter::convertTo() method */
|
||||
protected function convertTo( $text, $variant ) {
|
||||
return $this->getLanguageConverter()->convertTo( $text, $variant );
|
||||
}
|
||||
|
||||
protected function convertToCyrillic( $text ) {
|
||||
return $this->convertTo( $text, 'sr-ec' );
|
||||
}
|
||||
|
||||
protected function convertToLatin( $text ) {
|
||||
return $this->convertTo( $text, 'sr-el' );
|
||||
}
|
||||
}
|
||||
36
tests/phpunit/languages/converters/TgConverterTest.php
Normal file
36
tests/phpunit/languages/converters/TgConverterTest.php
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers TgConverter
|
||||
*/
|
||||
class TgConverterTest extends MediaWikiTestCase {
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers TgConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result, $this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
[
|
||||
[
|
||||
'tg' => 'г',
|
||||
'tg-latn' => 'g',
|
||||
],
|
||||
'г'
|
||||
],
|
||||
[
|
||||
[
|
||||
'tg' => 'g',
|
||||
'tg-latn' => 'g',
|
||||
],
|
||||
'g'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
129
tests/phpunit/languages/converters/ZhConverterTest.php
Normal file
129
tests/phpunit/languages/converters/ZhConverterTest.php
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @group Language
|
||||
* @covers ZhConverter
|
||||
*/
|
||||
class ZhConverterTest extends MediaWikiTestCase {
|
||||
|
||||
use LanguageConverterTestTrait;
|
||||
|
||||
/**
|
||||
* @dataProvider provideAutoConvertToAllVariants
|
||||
* @covers ZhConverter::autoConvertToAllVariants
|
||||
*/
|
||||
public function testAutoConvertToAllVariants( $result, $value ) {
|
||||
$this->assertEquals( $result,
|
||||
$this->getLanguageConverter()->autoConvertToAllVariants( $value ) );
|
||||
}
|
||||
|
||||
public static function provideAutoConvertToAllVariants() {
|
||||
return [
|
||||
// Plain hant -> hans
|
||||
[
|
||||
[
|
||||
'zh' => '㑯',
|
||||
'zh-hans' => '㑔',
|
||||
'zh-hant' => '㑯',
|
||||
'zh-cn' => '㑔',
|
||||
'zh-hk' => '㑯',
|
||||
'zh-mo' => '㑯',
|
||||
'zh-my' => '㑔',
|
||||
'zh-sg' => '㑔',
|
||||
'zh-tw' => '㑯',
|
||||
],
|
||||
'㑯'
|
||||
],
|
||||
// Plain hans -> hant
|
||||
[
|
||||
[
|
||||
'zh' => '㐷',
|
||||
'zh-hans' => '㐷',
|
||||
'zh-hant' => '傌',
|
||||
'zh-cn' => '㐷',
|
||||
'zh-hk' => '傌',
|
||||
'zh-mo' => '傌',
|
||||
'zh-my' => '㐷',
|
||||
'zh-sg' => '㐷',
|
||||
'zh-tw' => '傌',
|
||||
],
|
||||
'㐷'
|
||||
],
|
||||
// zh-cn specific
|
||||
[
|
||||
[
|
||||
'zh' => '仲介',
|
||||
'zh-hans' => '仲介',
|
||||
'zh-hant' => '仲介',
|
||||
'zh-cn' => '中介',
|
||||
'zh-hk' => '仲介',
|
||||
'zh-mo' => '仲介',
|
||||
'zh-my' => '中介',
|
||||
'zh-sg' => '中介',
|
||||
'zh-tw' => '仲介',
|
||||
],
|
||||
'仲介'
|
||||
],
|
||||
// zh-hk specific
|
||||
[
|
||||
[
|
||||
'zh' => '中文里',
|
||||
'zh-hans' => '中文里',
|
||||
'zh-hant' => '中文裡',
|
||||
'zh-cn' => '中文里',
|
||||
'zh-hk' => '中文裏',
|
||||
'zh-mo' => '中文裏',
|
||||
'zh-my' => '中文里',
|
||||
'zh-sg' => '中文里',
|
||||
'zh-tw' => '中文裡',
|
||||
],
|
||||
'中文里'
|
||||
],
|
||||
// zh-tw specific
|
||||
[
|
||||
[
|
||||
'zh' => '甲肝',
|
||||
'zh-hans' => '甲肝',
|
||||
'zh-hant' => '甲肝',
|
||||
'zh-cn' => '甲肝',
|
||||
'zh-hk' => '甲肝',
|
||||
'zh-mo' => '甲肝',
|
||||
'zh-my' => '甲肝',
|
||||
'zh-sg' => '甲肝',
|
||||
'zh-tw' => 'A肝',
|
||||
],
|
||||
'甲肝'
|
||||
],
|
||||
// zh-tw overrides zh-hant
|
||||
[
|
||||
[
|
||||
'zh' => '账',
|
||||
'zh-hans' => '账',
|
||||
'zh-hant' => '賬',
|
||||
'zh-cn' => '账',
|
||||
'zh-hk' => '賬',
|
||||
'zh-mo' => '賬',
|
||||
'zh-my' => '账',
|
||||
'zh-sg' => '账',
|
||||
'zh-tw' => '帳',
|
||||
],
|
||||
'账'
|
||||
],
|
||||
// zh-hk overrides zh-hant
|
||||
[
|
||||
[
|
||||
'zh' => '一地里',
|
||||
'zh-hans' => '一地里',
|
||||
'zh-hant' => '一地裡',
|
||||
'zh-cn' => '一地里',
|
||||
'zh-hk' => '一地裏',
|
||||
'zh-mo' => '一地裏',
|
||||
'zh-my' => '一地里',
|
||||
'zh-sg' => '一地里',
|
||||
'zh-tw' => '一地裡',
|
||||
],
|
||||
'一地里'
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue