segmentForDiff() is supposed to allow character-level diffing of Chinese text, by adding spaces and then removing them after the diff is complete. But when I tested it for I2d0a6996b02d37a3, unsegmentForDiff() failed to remove the spaces, since there was an <ins> tag between the space and the Chinese character. So instead, use formfeed characters to separate the Chinese characters, and strip them unconditionally instead of relying on them being next to Chinese characters. Add test. Change-Id: I230d8261bbda34ad313785a1f7c31d4db7bf989b
20 lines
592 B
PHP
20 lines
592 B
PHP
<?php
|
|
|
|
use MediaWiki\MainConfigNames;
|
|
|
|
/**
|
|
* @covers LanguageZh
|
|
*/
|
|
class LanguageZhTest extends LanguageClassesTestCase {
|
|
public function testSegmentForDiff() {
|
|
$this->overrideConfigValue( MainConfigNames::DiffEngine, 'php' );
|
|
$lhs = '维基';
|
|
$rhs = '维基百科';
|
|
$diff = TextSlotDiffRenderer::diff( $lhs, $rhs, [ 'contentLanguage' => 'zh' ] );
|
|
// Check that only the second part is highlighted, and word segmentation markers are not present
|
|
$this->assertStringContainsString(
|
|
'<div>维基<ins class="diffchange diffchange-inline">百科</ins></div>',
|
|
$diff
|
|
);
|
|
}
|
|
}
|