wiki.techinc.nl/languages/classes/LanguageSr.php

192 lines
6.3 KiB
PHP
Raw Normal View History

<?php
/**
* Serbian (Српски / Srpski) specific code.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Language
*/
/**
* There are two levels of conversion for Serbian: the script level
* (Cyrillics <-> Latin), and the variant level (ekavian
* <->iyekavian). The two are orthogonal. So we really only need two
* dictionaries: one for Cyrillics and Latin, and one for ekavian and
* iyekavian.
*
* @ingroup Language
*/
class SrConverter extends LanguageConverter {
public $mToLatin = [
'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd',
'ђ' => 'đ', 'е' => 'e', 'ж' => 'ž', 'з' => 'z', 'и' => 'i',
'ј' => 'j', 'к' => 'k', 'л' => 'l', 'љ' => 'lj', 'м' => 'm',
'н' => 'n', 'њ' => 'nj', 'о' => 'o', 'п' => 'p', 'р' => 'r',
'с' => 's', 'т' => 't', 'ћ' => 'ć', 'у' => 'u', 'ф' => 'f',
'х' => 'h', 'ц' => 'c', 'ч' => 'č', 'џ' => 'dž', 'ш' => 'š',
'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D',
'Ђ' => 'Đ', 'Е' => 'E', 'Ж' => 'Ž', 'З' => 'Z', 'И' => 'I',
'Ј' => 'J', 'К' => 'K', 'Л' => 'L', 'Љ' => 'Lj', 'М' => 'M',
'Н' => 'N', 'Њ' => 'Nj', 'О' => 'O', 'П' => 'P', 'Р' => 'R',
'С' => 'S', 'Т' => 'T', 'Ћ' => 'Ć', 'У' => 'U', 'Ф' => 'F',
'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Č', 'Џ' => 'Dž', 'Ш' => 'Š',
];
public $mToCyrillics = [
'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ',
'd' => 'д', 'dž' => 'џ', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф',
'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к',
'l' => 'л', 'lj' => 'љ', 'm' => 'м', 'n' => 'н', 'nj' => 'њ',
'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш',
't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж',
'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ',
'D' => 'Д', 'Dž' => 'Џ', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф',
'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К',
'L' => 'Л', 'LJ' => 'Љ', 'M' => 'М', 'N' => 'Н', 'NJ' => 'Њ',
'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш',
'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж',
'DŽ' => 'Џ', 'd!ž' => 'дж', 'D!ž' => 'Дж', 'D!Ž' => 'ДЖ',
'Lj' => 'Љ', 'l!j' => 'лј', 'L!j' => 'Лј', 'L!J' => 'ЛЈ',
'Nj' => 'Њ', 'n!j' => 'нј', 'N!j' => 'Нј', 'N!J' => 'НЈ'
];
function loadDefaultTables() {
$this->mTables = [
'sr-ec' => new ReplacementArray( $this->mToCyrillics ),
'sr-el' => new ReplacementArray( $this->mToLatin ),
'sr' => new ReplacementArray()
];
}
/**
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
2011-05-29 16:32:05 +00:00
*
* @param string &$link
* @param Title &$nt
* @param bool $ignoreOtherCond
*/
function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
// check for user namespace
if ( is_object( $nt ) ) {
$ns = $nt->getNamespace();
if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
return;
}
}
$oldlink = $link;
parent::findVariantLink( $link, $nt, $ignoreOtherCond );
if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
$link = $oldlink;
}
}
/**
* It translates text into variant, specials:
* - ommiting roman numbers
2011-05-29 15:59:47 +00:00
*
* @param string $text
* @param string $toVariant
2011-05-29 16:32:05 +00:00
*
* @throws MWException
2011-05-29 15:59:47 +00:00
* @return string
*/
function translate( $text, $toVariant ) {
$breaks = '[^\w\x80-\xff]';
// regexp for roman numbers
// Lookahead assertion ensures $roman doesn't match the empty string
$roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
Update plural rules to CLDR 24 Updated plurals.xml with new data from CLDR 24. This data is according to UTS #35 Rev 33. Update the CLDRPluralRuleParser.js to version 1.1 from upstream https://github.com/santhoshtr/CLDRPluralRuleParser Changes to the plural rules: * Hebrew override removed since CLDR 24 matches with MW plural rules. * Updated the syntax of overridden rules to TR35 Rev 33 for Lower Sorbian (dsb), Upper Sorbian (hsb), Belarusian in Taraskievica orthography (be_tarask), Old Church Slavonic (cu), Bhojpuri (bho), Samogitian (sgs). * Removed Manx (gv) override. See I46ab3dadc7fe08c1e60bbd81a1ee841e166e9608. * Removed the overriden convertPlural method for Serbian from LanguageSr.php, since CLDR 24 matches with MW rules. Updated and added more tests. Tests updated for Serbocroatian (sh), too. Old CLDR versions had 4 plural rules and MW had only 3. In CLDR 24, the form 'many' was removed and it became identical to the MW. Same for Bosnian (bs) and Croatian (hr). Also for variants sr-ec and sr-el * Macedonian (mk) used to count 11 as 'other' form. CLDR 24 counts it as 'one'. Not overriding, using CLDR 24 here. Updated the tests. MW will not override this. * Armenian (hy) used to count 0 as 'other'. Now it is 'one' form. Updated the tests. MW will not override this. * Latvian (lv) used to count only 0 as 'zero' form, but CLDR 24, any number satisifying the following formula is counted as zero: n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 Examples: 0, 10~20, 30, 40, 50, 60, 100. Updated the tests accordingly. Not overriding it in MW. Users will see different plural form for the above numbers. * Removed Ukranian custom plural rule since it match with MW * Russian (ru) plural rules have a major change. The 'few' form is merged with the 'other' form. The current forms are 'one', 'many', 'other'. In MW ru plural rules were overridden using convertPlural methdod in LanguagesRu.php with 3 forms. Effectively forms[1] and forms[2] are swapped. This will affect the messages, and such messages must be reviewed and updated. This change is not included in this patch and wil be done separately. Russian is the only remaining language class with convertPlural method overridden. Notable impact on the exising messages: * For languages ru, uk, be_tarask, sr, For the special case of two plural forms and first mapped to 1 and rest to the other form, syntax like {{plural:$1|1=one|other}} should be used. For further information regarding each of the above language changes, see 1. http://unicode.org/cldr/trac/ticket/3727 2. http://goo.gl/H2HEz CLDR 24 can handle fractions. Ideally it should start working in MW without any code changes, but MW language test suite does not have enough tests to confirm. Followup: e571717e06667228ec8d689be067e00bdd06d34d Bug: 56931 Change-Id: I9930b290d004667a3bb09e5c1663ec2c9c27d8a6
2014-01-01 09:05:39 +00:00
$reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks
. $roman . '$|' . $breaks . $roman . $breaks . '/';
$matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
$m = array_shift( $matches );
$this->loadTables();
if ( !isset( $this->mTables[$toVariant] ) ) {
Update plural rules to CLDR 24 Updated plurals.xml with new data from CLDR 24. This data is according to UTS #35 Rev 33. Update the CLDRPluralRuleParser.js to version 1.1 from upstream https://github.com/santhoshtr/CLDRPluralRuleParser Changes to the plural rules: * Hebrew override removed since CLDR 24 matches with MW plural rules. * Updated the syntax of overridden rules to TR35 Rev 33 for Lower Sorbian (dsb), Upper Sorbian (hsb), Belarusian in Taraskievica orthography (be_tarask), Old Church Slavonic (cu), Bhojpuri (bho), Samogitian (sgs). * Removed Manx (gv) override. See I46ab3dadc7fe08c1e60bbd81a1ee841e166e9608. * Removed the overriden convertPlural method for Serbian from LanguageSr.php, since CLDR 24 matches with MW rules. Updated and added more tests. Tests updated for Serbocroatian (sh), too. Old CLDR versions had 4 plural rules and MW had only 3. In CLDR 24, the form 'many' was removed and it became identical to the MW. Same for Bosnian (bs) and Croatian (hr). Also for variants sr-ec and sr-el * Macedonian (mk) used to count 11 as 'other' form. CLDR 24 counts it as 'one'. Not overriding, using CLDR 24 here. Updated the tests. MW will not override this. * Armenian (hy) used to count 0 as 'other'. Now it is 'one' form. Updated the tests. MW will not override this. * Latvian (lv) used to count only 0 as 'zero' form, but CLDR 24, any number satisifying the following formula is counted as zero: n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 Examples: 0, 10~20, 30, 40, 50, 60, 100. Updated the tests accordingly. Not overriding it in MW. Users will see different plural form for the above numbers. * Removed Ukranian custom plural rule since it match with MW * Russian (ru) plural rules have a major change. The 'few' form is merged with the 'other' form. The current forms are 'one', 'many', 'other'. In MW ru plural rules were overridden using convertPlural methdod in LanguagesRu.php with 3 forms. Effectively forms[1] and forms[2] are swapped. This will affect the messages, and such messages must be reviewed and updated. This change is not included in this patch and wil be done separately. Russian is the only remaining language class with convertPlural method overridden. Notable impact on the exising messages: * For languages ru, uk, be_tarask, sr, For the special case of two plural forms and first mapped to 1 and rest to the other form, syntax like {{plural:$1|1=one|other}} should be used. For further information regarding each of the above language changes, see 1. http://unicode.org/cldr/trac/ticket/3727 2. http://goo.gl/H2HEz CLDR 24 can handle fractions. Ideally it should start working in MW without any code changes, but MW language test suite does not have enough tests to confirm. Followup: e571717e06667228ec8d689be067e00bdd06d34d Bug: 56931 Change-Id: I9930b290d004667a3bb09e5c1663ec2c9c27d8a6
2014-01-01 09:05:39 +00:00
throw new MWException( "Broken variant table: "
. implode( ',', array_keys( $this->mTables ) ) );
}
$ret = $this->mTables[$toVariant]->replace( $m[0] );
$mstart = $m[1] + strlen( $m[0] );
foreach ( $matches as $m ) {
$ret .= substr( $text, $mstart, $m[1] - $mstart );
$ret .= parent::translate( $m[0], $toVariant );
$mstart = $m[1] + strlen( $m[0] );
}
return $ret;
}
/**
* Guess if a text is written in Cyrillic or Latin.
* Overrides LanguageConverter::guessVariant()
*
* @param string $text The text to be checked
* @param string $variant Language code of the variant to be checked for
* @return bool True if $text appears to be written in $variant
*
* @author Nikola Smolenski <smolensk@eunet.rs>
* @since 1.19
*/
public function guessVariant( $text, $variant ) {
$numCyrillic = preg_match_all( "/[шђчћжШЂЧЋЖ]/u", $text, $dummy );
$numLatin = preg_match_all( "/[šđč枊ĐČĆŽ]/u", $text, $dummy );
if ( $variant == 'sr-ec' ) {
return $numCyrillic > $numLatin;
} elseif ( $variant == 'sr-el' ) {
return $numLatin > $numCyrillic;
} else {
return false;
}
}
}
/**
2010-10-10 12:53:37 +00:00
* Serbian (Српски / Srpski)
*
* @ingroup Language
*/
Update plural rules to CLDR 24 Updated plurals.xml with new data from CLDR 24. This data is according to UTS #35 Rev 33. Update the CLDRPluralRuleParser.js to version 1.1 from upstream https://github.com/santhoshtr/CLDRPluralRuleParser Changes to the plural rules: * Hebrew override removed since CLDR 24 matches with MW plural rules. * Updated the syntax of overridden rules to TR35 Rev 33 for Lower Sorbian (dsb), Upper Sorbian (hsb), Belarusian in Taraskievica orthography (be_tarask), Old Church Slavonic (cu), Bhojpuri (bho), Samogitian (sgs). * Removed Manx (gv) override. See I46ab3dadc7fe08c1e60bbd81a1ee841e166e9608. * Removed the overriden convertPlural method for Serbian from LanguageSr.php, since CLDR 24 matches with MW rules. Updated and added more tests. Tests updated for Serbocroatian (sh), too. Old CLDR versions had 4 plural rules and MW had only 3. In CLDR 24, the form 'many' was removed and it became identical to the MW. Same for Bosnian (bs) and Croatian (hr). Also for variants sr-ec and sr-el * Macedonian (mk) used to count 11 as 'other' form. CLDR 24 counts it as 'one'. Not overriding, using CLDR 24 here. Updated the tests. MW will not override this. * Armenian (hy) used to count 0 as 'other'. Now it is 'one' form. Updated the tests. MW will not override this. * Latvian (lv) used to count only 0 as 'zero' form, but CLDR 24, any number satisifying the following formula is counted as zero: n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 Examples: 0, 10~20, 30, 40, 50, 60, 100. Updated the tests accordingly. Not overriding it in MW. Users will see different plural form for the above numbers. * Removed Ukranian custom plural rule since it match with MW * Russian (ru) plural rules have a major change. The 'few' form is merged with the 'other' form. The current forms are 'one', 'many', 'other'. In MW ru plural rules were overridden using convertPlural methdod in LanguagesRu.php with 3 forms. Effectively forms[1] and forms[2] are swapped. This will affect the messages, and such messages must be reviewed and updated. This change is not included in this patch and wil be done separately. Russian is the only remaining language class with convertPlural method overridden. Notable impact on the exising messages: * For languages ru, uk, be_tarask, sr, For the special case of two plural forms and first mapped to 1 and rest to the other form, syntax like {{plural:$1|1=one|other}} should be used. For further information regarding each of the above language changes, see 1. http://unicode.org/cldr/trac/ticket/3727 2. http://goo.gl/H2HEz CLDR 24 can handle fractions. Ideally it should start working in MW without any code changes, but MW language test suite does not have enough tests to confirm. Followup: e571717e06667228ec8d689be067e00bdd06d34d Bug: 56931 Change-Id: I9930b290d004667a3bb09e5c1663ec2c9c27d8a6
2014-01-01 09:05:39 +00:00
class LanguageSr extends Language {
function __construct() {
Merged localisation-work branch: * Made lines from initialiseMessages() appear as list items during installation * Moved the bulk of the localisation data from the Language*.php files to the Messages*.php files. Deleted most of the Languages*.php files. * Introduced "stub global" framework to provide deferred initialisation of core modules. * Removed placeholder values for $wgTitle and $wgArticle, these variables will now be null during the initialisation process, until they are set by index.php or another entry point. * Added DBA cache type, for BDB-style caches. * Removed custom date format functions, replacing them with a format string in the style of PHP's date(). Used string identifiers instead of integer identifiers, in both the language files and user preferences. Migration should be transparent in most cases. * Simplified the initialisation API for LoadBalancer objects. * Removed the broken altencoding feature. * Moved default user options and toggles from Language to User. Language objects are still able to define default preference overrides and extra user toggles, via a slightly different interface. * Don't include the date option in the parser cache rendering hash unless $wgUseDynamicDates is enabled. * Merged LanguageUtf8 with Language. Removed LanguageUtf8.php. * Removed inclusion of language files from the bottom of Language.php. This is now consistently done from Language::factory(). * Add the name of the executing maintenance script to the debug log. Start the profiler during maintenance scripts. * Added "serialized" directory, for storing precompiled data in serialized form.
2006-07-26 07:15:39 +00:00
parent::__construct();
$variants = [ 'sr', 'sr-ec', 'sr-el' ];
$variantfallbacks = [
'sr' => 'sr-ec',
'sr-ec' => 'sr',
'sr-el' => 'sr',
];
$flags = [
'S' => 'S', 'писмо' => 'S', 'pismo' => 'S',
'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W'
];
$this->mConverter = new SrConverter( $this, 'sr', $variants, $variantfallbacks, $flags );
}
}