Use PHP \u{xxxx} syntax
Let PHP do the UTF-8 encoding of Unicode characters in PHP strings. Also use faster str_replace instead of preg_replace. Change-Id: I4e99de694a607e2b5df52c6efcd3d863bb42f76e
This commit is contained in:
parent
a7786688dd
commit
3a322ef9b0
3 changed files with 10 additions and 10 deletions
|
|
@ -47,7 +47,7 @@ class LanguageBe_tarask extends Language {
|
|||
# need to fold cases and convert to hex
|
||||
|
||||
# Replacing apostrophe sign U+2019 with U+0027
|
||||
$s = preg_replace( '/\xe2\x80\x99/', '\'', $string );
|
||||
$s = str_replace( "\u{2019}", '\'', $string );
|
||||
|
||||
$s = parent::normalizeForSearch( $s );
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class FormatJsonTest extends MediaWikiUnitTestCase {
|
|||
public function testEncodePhpBug46944() {
|
||||
$this->assertNotEquals(
|
||||
'\ud840\udc00',
|
||||
strtolower( FormatJson::encode( "\xf0\xa0\x80\x80" ) ),
|
||||
strtolower( FormatJson::encode( "\u{20000}" ) ),
|
||||
'Test encoding an broken json_encode character (U+20000)'
|
||||
);
|
||||
}
|
||||
|
|
@ -286,12 +286,12 @@ class FormatJsonTest extends MediaWikiUnitTestCase {
|
|||
'\\u00e9' => '\\\u00e9', // security check for Unicode unescaping
|
||||
|
||||
// Line terminators
|
||||
"\xe2\x80\xa8" => '\u2028',
|
||||
"\xe2\x80\xa9" => '\u2029',
|
||||
"\u{2028}" => '\u2028',
|
||||
"\u{2029}" => '\u2029',
|
||||
],
|
||||
'unicode' => [
|
||||
"\xc3\xa9" => '\u00e9',
|
||||
"\xf0\x9d\x92\x9e" => '\ud835\udc9e', // U+1D49E, outside the BMP
|
||||
"\u{00E9}" => '\u00e9',
|
||||
"\u{1D49E}" => '\ud835\udc9e', // U+1D49E, outside the BMP
|
||||
],
|
||||
'xmlmeta' => [
|
||||
'<' => '\u003C', // JSON_HEX_TAG uses uppercase hex digits
|
||||
|
|
|
|||
|
|
@ -16,19 +16,19 @@ class SanitizerUnitTest extends MediaWikiUnitTestCase {
|
|||
public function provideDecodeCharReferences() {
|
||||
return [
|
||||
'decode named entities' => [
|
||||
"\xc3\xa9cole",
|
||||
"\u{00E9}cole",
|
||||
'école',
|
||||
],
|
||||
'decode numeric entities' => [
|
||||
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
|
||||
"\u{0108}io bonas dans l'\u{00E9}cole!",
|
||||
"Ĉio bonas dans l'école!",
|
||||
],
|
||||
'decode mixed numeric/named entities' => [
|
||||
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
|
||||
"\u{0108}io bonas dans l'\u{00E9}cole!",
|
||||
"Ĉio bonas dans l'école!",
|
||||
],
|
||||
'decode mixed complex entities' => [
|
||||
"\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)",
|
||||
"\u{0108}io bonas dans l'\u{00E9}cole! (mais pas Ĉio dans l'école)",
|
||||
"Ĉio bonas dans l'école! (mais pas &#x108;io dans l'&eacute;cole)",
|
||||
],
|
||||
'Invalid ampersand' => [
|
||||
|
|
|
|||
Loading…
Reference in a new issue