Don't apply French spacing in raw text elements

This also means we don't need to take special care for French spacing in
attributes, since it's no longer applied there.

Adds a test that captures this change.

Note that the test "Nowiki and french spacing" wonders whether this
escaping should be applied to nowiki content.

Bug: T255007
Change-Id: Ic8965e81882d7cf024bdced437f684064a30ac86
This commit is contained in:
Arlo Breault 2021-02-10 10:42:26 -05:00
parent 5d66187122
commit c44a3958a3
6 changed files with 54 additions and 15 deletions

View file

@ -36,6 +36,7 @@ use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\SpecialPage\SpecialPageFactory;
use MediaWiki\Tidy\RemexDriver;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use Wikimedia\IPUtils;
@ -354,6 +355,9 @@ class Parser {
/** @var HookRunner */
private $hookRunner;
/** @var RemexDriver */
private $remexDriver;
/**
* @internal For use by ServiceWiring
*/
@ -378,7 +382,8 @@ class Parser {
'StylePath',
'TranscludeCacheExpiry',
'PreprocessorCacheThreshold',
'DisableLangConversion'
'DisableLangConversion',
'TidyConfig',
];
/**
@ -468,6 +473,10 @@ class Parser {
MediaWikiServices::getInstance()->getHookContainer();
$this->hookRunner = new HookRunner( $this->hookContainer );
$this->remexDriver = new RemexDriver(
$this->svcOptions->get( 'TidyConfig' ) ?? []
);
// T250444: This will eventually be inlined here and the
// standalone method removed.
$this->firstCallInit();
@ -1673,12 +1682,9 @@ class Parser {
$text = $this->mStripState->unstripGeneral( $text );
# Clean up special characters, only run once, after doBlockLevels
$text = Sanitizer::armorFrenchSpaces( $text );
$text = Sanitizer::normalizeCharReferences( $text );
$text = MWTidy::tidy( $text );
$text = $this->remexDriver->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
if ( $isMain ) {
$this->hookRunner->onParserAfterTidy( $this, $text );

View file

@ -784,9 +784,6 @@ class Sanitizer {
'__' => '__',
] );
# Armor against French spaces detection (T5158)
$encValue = self::armorFrenchSpaces( $encValue, ' ' );
# Stupid hack
$encValue = preg_replace_callback(
'/((?i)' . wfUrlProtocols() . ')/',

View file

@ -16,18 +16,34 @@ class RemexCompatFormatter extends HtmlFormatter {
'tr' => true,
];
/* @var ?callable */
private $textProcessor;
public function __construct( $options = [] ) {
parent::__construct( $options );
$this->attributeEscapes["\u{00A0}"] = ' ';
unset( $this->attributeEscapes["&"] );
$this->textEscapes["\u{00A0}"] = ' ';
unset( $this->textEscapes["&"] );
$this->textProcessor = $options['textProcessor'] ?? null;
}
public function startDocument( $fragmentNamespace, $fragmentName ) {
return '';
}
public function characters( SerializerNode $parent, $text, $start, $length ) {
$text = parent::characters( $parent, $text, $start, $length );
if ( $parent->namespace !== HTMLData::NS_HTML
|| !isset( $this->rawTextElements[$parent->name] )
) {
if ( $this->textProcessor !== null ) {
$text = call_user_func( $this->textProcessor, $text );
}
}
return $text;
}
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
$data = $node->snData;
if ( $data && $data->isPWrapper ) {

View file

@ -29,12 +29,11 @@ class RemexDriver extends TidyDriverBase {
parent::__construct( $config );
}
public function tidy( $text ) {
public function tidy( $text, callable $textProcessor = null ) {
$traceCallback = static function ( $msg ) {
wfDebug( "RemexHtml: $msg" );
};
$formatter = new RemexCompatFormatter;
$formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
if ( $this->serializerTrace ) {
$serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
} else {

View file

@ -1754,6 +1754,23 @@ Nowiki and french spacing
<p><span typeof="mw:Nowiki">test<span typeof="mw:DisplaySpace"> </span>: 123</span></p>
!! end
!! test
T255007: French spacing in raw text elements
!! options
wgRawHtml=1
!! wikitext
<html>
<script>test ; 123</script>
<style>test : 123</style>
</html>
!! html/php
<p>
<script>test ; 123</script>
<style>test : 123</style>
</p>
!! end
###
### Comments
###
@ -18253,7 +18270,7 @@ Punctuation: CSS ! important (T13874; with space after)
!! wikitext
<div style="width:50% ! important">important</div>
!! html
<div style="width:50%&#32;! important">important</div>
<div style="width:50% ! important">important</div>
!! end
!! test
@ -22820,7 +22837,7 @@ Play a bit with r67090 and T5158
<div style="width:50% !important">&#160;</div>
<div style="width:50%&#160;!important">&#160;</div>
<div style="width:50%&#160;!important">&#160;</div>
<div style="border&#32;: solid;">&#160;</div>
<div style="border : solid;">&#160;</div>
!! html/parsoid
<div style="width:50% !important" data-parsoid='{"stx":"html"}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
<div style="width:50% !important" data-parsoid='{"stx":"html","a":{"style":"width:50% !important"},"sa":{"style":"width:50%&amp;nbsp;!important"}}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
@ -22860,7 +22877,7 @@ T5158: Test for French spaces in attributes
!! wikitext
<br style=" clear : both ; " />
!! html/php
<p><br style="clear&#32;: both&#32;;" />
<p><br style="clear : both ;" />
</p>
!! end

View file

@ -8,7 +8,11 @@ class ParserTest extends MediaWikiIntegrationTestCase {
// Create a mock Config object that will satisfy ServiceOptions::__construct
$mockConfig = $this->createMock( Config::class );
$mockConfig->method( 'has' )->willReturn( true );
$mockConfig->method( 'get' )->willReturn( 'I like otters.' );
$mockConfig->method( 'get' )->will(
$this->returnCallback( function ( $arg ) {
return ( $arg === 'TidyConfig' ) ? null : 'I like otters.';
} )
);
// Stub out a MagicWordFactory so the Parser can initialize its
// function hooks when it is created.