Parser: Move Sanitizer::normalizeCharReferences into RemexCompatFormatter
Choosing a particular encoding of HTML entities is logically a task of the Remex formatter (which serializes HTML). Move it out of the Parser so that it is part of the serialization specification. This is a follow up to Ic8965e81882d7cf024bdced437f684064a30ac86. Change-Id: If45907baf24d60987b39cd1f7709c5f7caf19f37
This commit is contained in:
parent
5c25863bd9
commit
5d317c25be
3 changed files with 7 additions and 2 deletions
|
|
@ -1682,8 +1682,6 @@ class Parser {
|
|||
|
||||
$text = $this->mStripState->unstripGeneral( $text );
|
||||
|
||||
$text = Sanitizer::normalizeCharReferences( $text );
|
||||
|
||||
$text = $this->remexDriver->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
|
||||
|
||||
if ( $isMain ) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ namespace MediaWiki\Tidy;
|
|||
use RemexHtml\HTMLData;
|
||||
use RemexHtml\Serializer\HtmlFormatter;
|
||||
use RemexHtml\Serializer\SerializerNode;
|
||||
use Sanitizer;
|
||||
|
||||
/**
|
||||
* @internal
|
||||
|
|
@ -34,6 +35,7 @@ class RemexCompatFormatter extends HtmlFormatter {
|
|||
|
||||
public function characters( SerializerNode $parent, $text, $start, $length ) {
|
||||
$text = parent::characters( $parent, $text, $start, $length );
|
||||
|
||||
if ( $parent->namespace !== HTMLData::NS_HTML
|
||||
|| !isset( $this->rawTextElements[$parent->name] )
|
||||
) {
|
||||
|
|
@ -41,6 +43,9 @@ class RemexCompatFormatter extends HtmlFormatter {
|
|||
$text = call_user_func( $this->textProcessor, $text );
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure a consistent representation for all entities
|
||||
$text = Sanitizer::normalizeCharReferences( $text );
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
|
@ -65,6 +70,7 @@ class RemexCompatFormatter extends HtmlFormatter {
|
|||
$s = "<$name";
|
||||
foreach ( $attrs->getValues() as $attrName => $attrValue ) {
|
||||
$encValue = strtr( $attrValue, $this->attributeEscapes );
|
||||
$encValue = Sanitizer::normalizeCharReferences( $encValue );
|
||||
$s .= " $attrName=\"$encValue\"";
|
||||
}
|
||||
if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ class TidyTest extends \MediaWikiUnitTestCase {
|
|||
</mrow>
|
||||
</math>
|
||||
MathML;
|
||||
$testMathML = Sanitizer::normalizeCharReferences( $testMathML );
|
||||
return [
|
||||
[
|
||||
'<mw:editsection page="foo" section="bar">foo</mw:editsection>',
|
||||
|
|
|
|||
Loading…
Reference in a new issue