Add detection for unicode normalization. Next step: use what we find! :)
I think I want to point to an as-yet-to-be-created page on MediaWiki.org to help people understand what to do if they're stuck with pure PHP normalization, but any pointers here would help.
This commit is contained in:
parent
8fa4c45bd6
commit
a21fb8651f
2 changed files with 68 additions and 0 deletions
|
|
@ -79,6 +79,10 @@ You cannot install MediaWiki.',
|
|||
'config-env-latest-old' => "'''Warning:''' You are installing an outdated version of Mediawiki.",
|
||||
'config-env-latest-help' => 'You are installing version $1, but the latest version is $2.
|
||||
You are advised to use the latest release, which can be downloaded from [http://www.mediawiki.org/wiki/Download mediawiki.org]',
|
||||
'config-unicode-php' => "Using pure PHP to normalize Unicode characters.",
|
||||
'config-unicode-pure-php-warning' => "'''Warning''': Either the PECL Intl extension is not available, or it uses an older version of [http://site.icu-project.org/ the ICU project's] library for handling Unicode normalization. If you run a high-traffic site, you should read a little on [http://www.mediawiki.org/wiki/Unicode_normalization_considerations Unicode normalization].",
|
||||
'config-unicode-utf8' => "Using Brion Vibber's utf8_normalize.so for UTF",
|
||||
'config-unicode-intl' => "Using the [http://pecl.php.net/intl intl PECL extension] for UTF-8 normalization.",
|
||||
'config-no-db' => 'Could not find a suitable database driver!',
|
||||
'config-no-db-help' => 'You need to install a database driver for PHP.
|
||||
The following database types are supported: $1.
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ abstract class Installer {
|
|||
'envCheckExtension',
|
||||
'envCheckShellLocale',
|
||||
'envCheckUploadsDirectory',
|
||||
'envCheckLibicu'
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
@ -811,6 +812,69 @@ abstract class Installer {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a hex string representing a Unicode code point to that code point.
|
||||
* @param string $c
|
||||
* @return string
|
||||
*/
|
||||
protected function unicodeChar( $c ) {
|
||||
$c = hexdec($c);
|
||||
if ($c <= 0x7F) {
|
||||
return chr($c);
|
||||
} else if ($c <= 0x7FF) {
|
||||
return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
|
||||
} else if ($c <= 0xFFFF) {
|
||||
return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
|
||||
. chr(0x80 | $c & 0x3F);
|
||||
} else if ($c <= 0x10FFFF) {
|
||||
return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
|
||||
. chr(0x80 | $c >> 6 & 0x3F)
|
||||
. chr(0x80 | $c & 0x3F);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check the libicu version
|
||||
*/
|
||||
public function envCheckLibicu() {
|
||||
$utf8 = function_exists( 'utf8_normalize' );
|
||||
$intl = function_exists( 'normalizer_normalize' );
|
||||
|
||||
/**
|
||||
* This needs to be updated something that the latest libicu
|
||||
* will properly normalize. This normalization was found at
|
||||
* http://www.unicode.org/versions/Unicode5.2.0/#Character_Additions
|
||||
* Note that we use the hex representation to create the code
|
||||
* points in order to avoid any Unicode-destroying during transite.
|
||||
*/
|
||||
$not_normal_c = $this->unicodeChar("FA6C");
|
||||
$normal_c = $this->unicodeChar("242EE");
|
||||
|
||||
$useNormalizer = 'config-unicode-php';
|
||||
|
||||
/**
|
||||
* We're going to prefer the pecl extension here unless
|
||||
* utf8_normalize is more up to date.
|
||||
*/
|
||||
if( $utf8 ) {
|
||||
$utf8 = utf8_normalize( $not_normal_c, UNORM_NFC );
|
||||
$useNormalizer = 'config-unicode-utf8';
|
||||
}
|
||||
if( $intl ) {
|
||||
$intl = normalizer_normalize( $not_normal_c, Normalizer::FORM_C );
|
||||
$useNormalizer = 'config-unicode-intl';
|
||||
}
|
||||
|
||||
$this->showMessage( $useNormalizer );
|
||||
if( $useNormalizer === 'config-unicode-php' ) {
|
||||
$this->showMessage( 'config-unicode-pure-php-warning' );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search a path for any of the given executable names. Returns the
|
||||
* executable name if found. Also checks the version string returned
|
||||
|
|
|
|||
Loading…
Reference in a new issue