Merge "Language::truncate(): don't chop up multibyte characters when input contains newlines"

This commit is contained in:
jenkins-bot 2015-10-27 03:33:15 +00:00 committed by Gerrit Code Review
commit d3aee2413e
2 changed files with 12 additions and 1 deletions

View file

@ -3691,8 +3691,9 @@ class Language {
# We got the first byte only of a multibyte char; remove it.
$string = substr( $string, 0, -1 );
} elseif ( $char >= 0x80 &&
// Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines
preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
'[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m )
'[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m )
) {
# We chopped in the middle of a character; remove it
$string = $m[1];

View file

@ -261,6 +261,16 @@ class LanguageTest extends LanguageClassesTestCase {
$this->getLang()->truncate( "1234567890", 5, 'XXX', false ),
'truncate without adjustment'
);
$this->assertEquals(
"泰乐菌...",
$this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ),
'truncate does not chop Unicode characters in half'
);
$this->assertEquals(
"\n泰乐菌...",
$this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ),
'truncate does not chop Unicode characters in half if there is a preceding newline'
);
}
/**