Don't apply French spacing in raw text elements
This also means we don't need to take special care for French spacing in attributes, since it's no longer applied there. Adds a test that captures this change. Note that the test "Nowiki and french spacing" wonders whether this escaping should be applied to nowiki content. Bug: T255007 Change-Id: Ic8965e81882d7cf024bdced437f684064a30ac86
This commit is contained in:
parent
5d66187122
commit
c44a3958a3
6 changed files with 54 additions and 15 deletions
|
|
@ -36,6 +36,7 @@ use MediaWiki\Revision\RevisionAccessException;
|
|||
use MediaWiki\Revision\RevisionRecord;
|
||||
use MediaWiki\Revision\SlotRecord;
|
||||
use MediaWiki\SpecialPage\SpecialPageFactory;
|
||||
use MediaWiki\Tidy\RemexDriver;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Psr\Log\NullLogger;
|
||||
use Wikimedia\IPUtils;
|
||||
|
|
@ -354,6 +355,9 @@ class Parser {
|
|||
/** @var HookRunner */
|
||||
private $hookRunner;
|
||||
|
||||
/** @var RemexDriver */
|
||||
private $remexDriver;
|
||||
|
||||
/**
|
||||
* @internal For use by ServiceWiring
|
||||
*/
|
||||
|
|
@ -378,7 +382,8 @@ class Parser {
|
|||
'StylePath',
|
||||
'TranscludeCacheExpiry',
|
||||
'PreprocessorCacheThreshold',
|
||||
'DisableLangConversion'
|
||||
'DisableLangConversion',
|
||||
'TidyConfig',
|
||||
];
|
||||
|
||||
/**
|
||||
|
|
@ -468,6 +473,10 @@ class Parser {
|
|||
MediaWikiServices::getInstance()->getHookContainer();
|
||||
$this->hookRunner = new HookRunner( $this->hookContainer );
|
||||
|
||||
$this->remexDriver = new RemexDriver(
|
||||
$this->svcOptions->get( 'TidyConfig' ) ?? []
|
||||
);
|
||||
|
||||
// T250444: This will eventually be inlined here and the
|
||||
// standalone method removed.
|
||||
$this->firstCallInit();
|
||||
|
|
@ -1673,12 +1682,9 @@ class Parser {
|
|||
|
||||
$text = $this->mStripState->unstripGeneral( $text );
|
||||
|
||||
# Clean up special characters, only run once, after doBlockLevels
|
||||
$text = Sanitizer::armorFrenchSpaces( $text );
|
||||
|
||||
$text = Sanitizer::normalizeCharReferences( $text );
|
||||
|
||||
$text = MWTidy::tidy( $text );
|
||||
$text = $this->remexDriver->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
|
||||
|
||||
if ( $isMain ) {
|
||||
$this->hookRunner->onParserAfterTidy( $this, $text );
|
||||
|
|
|
|||
|
|
@ -784,9 +784,6 @@ class Sanitizer {
|
|||
'__' => '__',
|
||||
] );
|
||||
|
||||
# Armor against French spaces detection (T5158)
|
||||
$encValue = self::armorFrenchSpaces( $encValue, ' ' );
|
||||
|
||||
# Stupid hack
|
||||
$encValue = preg_replace_callback(
|
||||
'/((?i)' . wfUrlProtocols() . ')/',
|
||||
|
|
|
|||
|
|
@ -16,18 +16,34 @@ class RemexCompatFormatter extends HtmlFormatter {
|
|||
'tr' => true,
|
||||
];
|
||||
|
||||
/* @var ?callable */
|
||||
private $textProcessor;
|
||||
|
||||
public function __construct( $options = [] ) {
|
||||
parent::__construct( $options );
|
||||
$this->attributeEscapes["\u{00A0}"] = ' ';
|
||||
unset( $this->attributeEscapes["&"] );
|
||||
$this->textEscapes["\u{00A0}"] = ' ';
|
||||
unset( $this->textEscapes["&"] );
|
||||
$this->textProcessor = $options['textProcessor'] ?? null;
|
||||
}
|
||||
|
||||
public function startDocument( $fragmentNamespace, $fragmentName ) {
|
||||
return '';
|
||||
}
|
||||
|
||||
public function characters( SerializerNode $parent, $text, $start, $length ) {
|
||||
$text = parent::characters( $parent, $text, $start, $length );
|
||||
if ( $parent->namespace !== HTMLData::NS_HTML
|
||||
|| !isset( $this->rawTextElements[$parent->name] )
|
||||
) {
|
||||
if ( $this->textProcessor !== null ) {
|
||||
$text = call_user_func( $this->textProcessor, $text );
|
||||
}
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
|
||||
$data = $node->snData;
|
||||
if ( $data && $data->isPWrapper ) {
|
||||
|
|
|
|||
|
|
@ -29,12 +29,11 @@ class RemexDriver extends TidyDriverBase {
|
|||
parent::__construct( $config );
|
||||
}
|
||||
|
||||
public function tidy( $text ) {
|
||||
public function tidy( $text, callable $textProcessor = null ) {
|
||||
$traceCallback = static function ( $msg ) {
|
||||
wfDebug( "RemexHtml: $msg" );
|
||||
};
|
||||
|
||||
$formatter = new RemexCompatFormatter;
|
||||
$formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
|
||||
if ( $this->serializerTrace ) {
|
||||
$serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1754,6 +1754,23 @@ Nowiki and french spacing
|
|||
<p><span typeof="mw:Nowiki">test<span typeof="mw:DisplaySpace"> </span>: 123</span></p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
T255007: French spacing in raw text elements
|
||||
!! options
|
||||
wgRawHtml=1
|
||||
!! wikitext
|
||||
<html>
|
||||
<script>test ; 123</script>
|
||||
<style>test : 123</style>
|
||||
</html>
|
||||
!! html/php
|
||||
<p>
|
||||
<script>test ; 123</script>
|
||||
<style>test : 123</style>
|
||||
|
||||
</p>
|
||||
!! end
|
||||
|
||||
###
|
||||
### Comments
|
||||
###
|
||||
|
|
@ -18253,7 +18270,7 @@ Punctuation: CSS ! important (T13874; with space after)
|
|||
!! wikitext
|
||||
<div style="width:50% ! important">important</div>
|
||||
!! html
|
||||
<div style="width:50% ! important">important</div>
|
||||
<div style="width:50% ! important">important</div>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
|
|
@ -22820,7 +22837,7 @@ Play a bit with r67090 and T5158
|
|||
<div style="width:50% !important"> </div>
|
||||
<div style="width:50% !important"> </div>
|
||||
<div style="width:50% !important"> </div>
|
||||
<div style="border : solid;"> </div>
|
||||
<div style="border : solid;"> </div>
|
||||
!! html/parsoid
|
||||
<div style="width:50% !important" data-parsoid='{"stx":"html"}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
|
||||
<div style="width:50% !important" data-parsoid='{"stx":"html","a":{"style":"width:50% !important"},"sa":{"style":"width:50%&nbsp;!important"}}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
|
||||
|
|
@ -22860,7 +22877,7 @@ T5158: Test for French spaces in attributes
|
|||
!! wikitext
|
||||
<br style=" clear : both ; " />
|
||||
!! html/php
|
||||
<p><br style="clear : both ;" />
|
||||
<p><br style="clear : both ;" />
|
||||
</p>
|
||||
!! end
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,11 @@ class ParserTest extends MediaWikiIntegrationTestCase {
|
|||
// Create a mock Config object that will satisfy ServiceOptions::__construct
|
||||
$mockConfig = $this->createMock( Config::class );
|
||||
$mockConfig->method( 'has' )->willReturn( true );
|
||||
$mockConfig->method( 'get' )->willReturn( 'I like otters.' );
|
||||
$mockConfig->method( 'get' )->will(
|
||||
$this->returnCallback( function ( $arg ) {
|
||||
return ( $arg === 'TidyConfig' ) ? null : 'I like otters.';
|
||||
} )
|
||||
);
|
||||
|
||||
// Stub out a MagicWordFactory so the Parser can initialize its
|
||||
// function hooks when it is created.
|
||||
|
|
|
|||
Loading…
Reference in a new issue