SECURITY: Ensure emitted HTML is safe against Unicode NFC normalization
CVE-2025-32699 Ensure that Unicode NFC normalization can be applied to our HTML output safely. Even though the W3C officially recommends against normalizing HTML https://www.w3.org/International/questions/qa-html-css-normalization#converting this is still easily done inadvertently, especially when using the MediaWiki action API which normalizes parameters and results by default. See also I671648603c4635a35585c860b4857f5ea085e47f in Parsoid, and T266140 / I2e78e660ba1867744e34eda7d00ea527ec016b71 for another similar issue. The following changes are made: * The various HTML serializers (Remex/Tidy-derived, as well as the Html::* helpers) are tweaked to entity-escape U+0338 wherever it appears. * Similarly, Message::escaped() is tweaked to entity-escape U+0338. * Finally, a post-processing pass is added to the OutputTransform pipeline to catch any remaining U+0338 and entity-escape them. This catches U+0338 added during any of the previous OutputTransform stages (like TOC insertion, section edit links, etc). *When backporting* this code will likely need to be moved to ParserOutput::getText(), as the OutputTransform pipeline wasn't added until MW 1.42. Bug: T387130 Change-Id: I66564e14e730f5393f4fa5780b80f24de6075af5
This commit is contained in:
parent
8702751d5e
commit
94f193a894
14 changed files with 116 additions and 9 deletions
|
|
@ -1873,6 +1873,7 @@ $wgAutoloadLocalClasses = [
|
||||||
'MediaWiki\\OutputTransform\\Stages\\HandleParsoidSectionLinks' => __DIR__ . '/includes/OutputTransform/Stages/HandleParsoidSectionLinks.php',
|
'MediaWiki\\OutputTransform\\Stages\\HandleParsoidSectionLinks' => __DIR__ . '/includes/OutputTransform/Stages/HandleParsoidSectionLinks.php',
|
||||||
'MediaWiki\\OutputTransform\\Stages\\HandleSectionLinks' => __DIR__ . '/includes/OutputTransform/Stages/HandleSectionLinks.php',
|
'MediaWiki\\OutputTransform\\Stages\\HandleSectionLinks' => __DIR__ . '/includes/OutputTransform/Stages/HandleSectionLinks.php',
|
||||||
'MediaWiki\\OutputTransform\\Stages\\HandleTOCMarkers' => __DIR__ . '/includes/OutputTransform/Stages/HandleTOCMarkers.php',
|
'MediaWiki\\OutputTransform\\Stages\\HandleTOCMarkers' => __DIR__ . '/includes/OutputTransform/Stages/HandleTOCMarkers.php',
|
||||||
|
'MediaWiki\\OutputTransform\\Stages\\HardenNFC' => __DIR__ . '/includes/OutputTransform/Stages/HardenNFC.php',
|
||||||
'MediaWiki\\OutputTransform\\Stages\\HydrateHeaderPlaceholders' => __DIR__ . '/includes/OutputTransform/Stages/HydrateHeaderPlaceholders.php',
|
'MediaWiki\\OutputTransform\\Stages\\HydrateHeaderPlaceholders' => __DIR__ . '/includes/OutputTransform/Stages/HydrateHeaderPlaceholders.php',
|
||||||
'MediaWiki\\OutputTransform\\Stages\\ParsoidLocalization' => __DIR__ . '/includes/OutputTransform/Stages/ParsoidLocalization.php',
|
'MediaWiki\\OutputTransform\\Stages\\ParsoidLocalization' => __DIR__ . '/includes/OutputTransform/Stages/ParsoidLocalization.php',
|
||||||
'MediaWiki\\OutputTransform\\Stages\\RenderDebugInfo' => __DIR__ . '/includes/OutputTransform/Stages/RenderDebugInfo.php',
|
'MediaWiki\\OutputTransform\\Stages\\RenderDebugInfo' => __DIR__ . '/includes/OutputTransform/Stages/RenderDebugInfo.php',
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ namespace MediaWiki\Html;
|
||||||
use MediaWiki\Json\FormatJson;
|
use MediaWiki\Json\FormatJson;
|
||||||
use MediaWiki\MainConfigNames;
|
use MediaWiki\MainConfigNames;
|
||||||
use MediaWiki\MediaWikiServices;
|
use MediaWiki\MediaWikiServices;
|
||||||
|
use MediaWiki\Parser\Sanitizer;
|
||||||
use MediaWiki\Request\ContentSecurityPolicy;
|
use MediaWiki\Request\ContentSecurityPolicy;
|
||||||
use UnexpectedValueException;
|
use UnexpectedValueException;
|
||||||
|
|
||||||
|
|
@ -193,6 +194,7 @@ class Html {
|
||||||
if ( isset( self::$voidElements[$element] ) ) {
|
if ( isset( self::$voidElements[$element] ) ) {
|
||||||
return $start;
|
return $start;
|
||||||
} else {
|
} else {
|
||||||
|
$contents = Sanitizer::escapeCombiningChar( $contents ?? '' );
|
||||||
return $start . $contents . self::closeElement( $element );
|
return $start . $contents . self::closeElement( $element );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
namespace MediaWiki\Html;
|
namespace MediaWiki\Html;
|
||||||
|
|
||||||
use Wikimedia\Assert\Assert;
|
use Wikimedia\Assert\Assert;
|
||||||
|
use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
|
||||||
use Wikimedia\RemexHtml\Serializer\SerializerNode;
|
use Wikimedia\RemexHtml\Serializer\SerializerNode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -23,6 +24,9 @@ trait HtmlHelperTrait {
|
||||||
parent::__construct( $options );
|
parent::__construct( $options );
|
||||||
$this->shouldModifyCallback = $shouldModifyCallback;
|
$this->shouldModifyCallback = $shouldModifyCallback;
|
||||||
$this->modifyCallback = $modifyCallback;
|
$this->modifyCallback = $modifyCallback;
|
||||||
|
// Escape U+0338 (T387130)
|
||||||
|
'@phan-var HtmlFormatter $this';
|
||||||
|
$this->textEscapes["\u{0338}"] = '̸';
|
||||||
}
|
}
|
||||||
|
|
||||||
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
|
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ use MediaWiki\Page\PageReference;
|
||||||
use MediaWiki\Page\PageReferenceValue;
|
use MediaWiki\Page\PageReferenceValue;
|
||||||
use MediaWiki\Parser\Parser;
|
use MediaWiki\Parser\Parser;
|
||||||
use MediaWiki\Parser\ParserOutput;
|
use MediaWiki\Parser\ParserOutput;
|
||||||
|
use MediaWiki\Parser\Sanitizer;
|
||||||
use MediaWiki\StubObject\StubUserLang;
|
use MediaWiki\StubObject\StubUserLang;
|
||||||
use MediaWiki\Title\Title;
|
use MediaWiki\Title\Title;
|
||||||
use RuntimeException;
|
use RuntimeException;
|
||||||
|
|
@ -1042,7 +1043,7 @@ class Message implements Stringable, MessageSpecifier, Serializable {
|
||||||
// '⧼' is used instead of '<' to side-step any
|
// '⧼' is used instead of '<' to side-step any
|
||||||
// double-escaping issues.
|
// double-escaping issues.
|
||||||
// (Keep synchronised with mw.Message#toString in JS.)
|
// (Keep synchronised with mw.Message#toString in JS.)
|
||||||
return '⧼' . htmlspecialchars( $this->key ) . '⧽';
|
return '⧼' . Sanitizer::escapeCombiningChar( htmlspecialchars( $this->key ) ) . '⧽';
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( in_array( $this->getLanguage()->getCode(), [ 'qqx', 'x-xss' ] ) ) {
|
if ( in_array( $this->getLanguage()->getCode(), [ 'qqx', 'x-xss' ] ) ) {
|
||||||
|
|
@ -1078,6 +1079,7 @@ class Message implements Stringable, MessageSpecifier, Serializable {
|
||||||
} elseif ( $format === self::FORMAT_ESCAPED ) {
|
} elseif ( $format === self::FORMAT_ESCAPED ) {
|
||||||
$string = $this->transformText( $string );
|
$string = $this->transformText( $string );
|
||||||
$string = htmlspecialchars( $string, ENT_QUOTES, 'UTF-8', false );
|
$string = htmlspecialchars( $string, ENT_QUOTES, 'UTF-8', false );
|
||||||
|
$string = Sanitizer::escapeCombiningChar( $string );
|
||||||
}
|
}
|
||||||
|
|
||||||
# Raw parameter replacement
|
# Raw parameter replacement
|
||||||
|
|
@ -1591,7 +1593,7 @@ class Message implements Stringable, MessageSpecifier, Serializable {
|
||||||
case self::FORMAT_BLOCK_PARSE:
|
case self::FORMAT_BLOCK_PARSE:
|
||||||
case self::FORMAT_ESCAPED:
|
case self::FORMAT_ESCAPED:
|
||||||
default:
|
default:
|
||||||
return htmlspecialchars( $plaintext, ENT_QUOTES );
|
return Sanitizer::escapeCombiningChar( htmlspecialchars( $plaintext, ENT_QUOTES ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ use MediaWiki\OutputTransform\Stages\ExtractBody;
|
||||||
use MediaWiki\OutputTransform\Stages\HandleParsoidSectionLinks;
|
use MediaWiki\OutputTransform\Stages\HandleParsoidSectionLinks;
|
||||||
use MediaWiki\OutputTransform\Stages\HandleSectionLinks;
|
use MediaWiki\OutputTransform\Stages\HandleSectionLinks;
|
||||||
use MediaWiki\OutputTransform\Stages\HandleTOCMarkers;
|
use MediaWiki\OutputTransform\Stages\HandleTOCMarkers;
|
||||||
|
use MediaWiki\OutputTransform\Stages\HardenNFC;
|
||||||
use MediaWiki\OutputTransform\Stages\HydrateHeaderPlaceholders;
|
use MediaWiki\OutputTransform\Stages\HydrateHeaderPlaceholders;
|
||||||
use MediaWiki\OutputTransform\Stages\ParsoidLocalization;
|
use MediaWiki\OutputTransform\Stages\ParsoidLocalization;
|
||||||
use MediaWiki\OutputTransform\Stages\RenderDebugInfo;
|
use MediaWiki\OutputTransform\Stages\RenderDebugInfo;
|
||||||
|
|
@ -98,6 +99,10 @@ class DefaultOutputPipelineFactory {
|
||||||
'HydrateHeaderPlaceholders' => [
|
'HydrateHeaderPlaceholders' => [
|
||||||
'class' => HydrateHeaderPlaceholders::class,
|
'class' => HydrateHeaderPlaceholders::class,
|
||||||
],
|
],
|
||||||
|
# This should be last, in order to ensure final output is hardened
|
||||||
|
'HardenNFC' => [
|
||||||
|
'class' => HardenNFC::class,
|
||||||
|
],
|
||||||
];
|
];
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
|
|
|
||||||
23
includes/OutputTransform/Stages/HardenNFC.php
Normal file
23
includes/OutputTransform/Stages/HardenNFC.php
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MediaWiki\OutputTransform\Stages;
|
||||||
|
|
||||||
|
use MediaWiki\OutputTransform\ContentTextTransformStage;
|
||||||
|
use MediaWiki\Parser\ParserOptions;
|
||||||
|
use MediaWiki\Parser\ParserOutput;
|
||||||
|
use MediaWiki\Parser\Sanitizer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hardens the output against NFC normalization (T387130).
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class HardenNFC extends ContentTextTransformStage {
|
||||||
|
|
||||||
|
public function shouldRun( ParserOutput $po, ?ParserOptions $popts, array $options = [] ): bool {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
|
||||||
|
return Sanitizer::escapeCombiningChar( $text );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1038,6 +1038,12 @@ class Sanitizer {
|
||||||
$class ), '_' );
|
$class ), '_' );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function escapeCombiningChar( string $html ): string {
|
||||||
|
return strtr( $html, [
|
||||||
|
"\u{0338}" => '̸', # T387130
|
||||||
|
] );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given HTML input, escape with htmlspecialchars but un-escape entities.
|
* Given HTML input, escape with htmlspecialchars but un-escape entities.
|
||||||
* This allows (generally harmless) entities like   to survive.
|
* This allows (generally harmless) entities like   to survive.
|
||||||
|
|
@ -1053,7 +1059,7 @@ class Sanitizer {
|
||||||
# hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters
|
# hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters
|
||||||
# don't cause the entire string to disappear.
|
# don't cause the entire string to disappear.
|
||||||
$html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE );
|
$html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE );
|
||||||
return $html;
|
return self::escapeCombiningChar( $html );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,8 @@ class RemexCompatFormatter extends HtmlFormatter {
|
||||||
// Escape non-breaking space
|
// Escape non-breaking space
|
||||||
$this->attributeEscapes["\u{00A0}"] = ' ';
|
$this->attributeEscapes["\u{00A0}"] = ' ';
|
||||||
$this->textEscapes["\u{00A0}"] = ' ';
|
$this->textEscapes["\u{00A0}"] = ' ';
|
||||||
|
// Escape U+0338 (T387130)
|
||||||
|
$this->textEscapes["\u{0338}"] = '̸';
|
||||||
// Disable escaping of '&', because we expect to see entities, due to 'ignoreCharRefs'
|
// Disable escaping of '&', because we expect to see entities, due to 'ignoreCharRefs'
|
||||||
unset( $this->attributeEscapes["&"] );
|
unset( $this->attributeEscapes["&"] );
|
||||||
unset( $this->textEscapes["&"] );
|
unset( $this->textEscapes["&"] );
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -96,6 +96,18 @@ class HtmlTest extends MediaWikiIntegrationTestCase {
|
||||||
Html::element( 'element', [], '' ),
|
Html::element( 'element', [], '' ),
|
||||||
'Close tag for empty element (array, string)'
|
'Close tag for empty element (array, string)'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
$this->assertEquals(
|
||||||
|
"<p test=\"\u{0338}"&\"≯ & < ></p>",
|
||||||
|
Html::element( 'p', [ 'test' => "\u{0338}\"&" ], "\u{0338} & < >" ),
|
||||||
|
'Attribute and content escaping'
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertEquals(
|
||||||
|
'<p≯ &</p>',
|
||||||
|
Html::rawElement( 'p', [], "\u{0338} &" ),
|
||||||
|
"Combining characters escaped even in raw contents (T387130)"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function dataXmlMimeType() {
|
public function dataXmlMimeType() {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MediaWiki\Tests\OutputTransform\Stages;
|
||||||
|
|
||||||
|
use MediaWiki\Config\ServiceOptions;
|
||||||
|
use MediaWiki\OutputTransform\OutputTransformStage;
|
||||||
|
use MediaWiki\OutputTransform\Stages\HardenNFC;
|
||||||
|
use MediaWiki\Parser\ParserOutput;
|
||||||
|
use MediaWiki\Tests\OutputTransform\OutputTransformStageTestBase;
|
||||||
|
use Psr\Log\NullLogger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers \MediaWiki\OutputTransform\Stages\HardenNFC
|
||||||
|
*/
|
||||||
|
class HardenNFCTest extends OutputTransformStageTestBase {
|
||||||
|
|
||||||
|
public function createStage(): OutputTransformStage {
|
||||||
|
return new HardenNFC(
|
||||||
|
new ServiceOptions( [] ),
|
||||||
|
new NullLogger()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function provideShouldRun(): array {
|
||||||
|
return [
|
||||||
|
[ new ParserOutput(), null, [] ]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function provideShouldNotRun(): array {
|
||||||
|
$this->markTestSkipped( 'HydrateHeaderPlaceHolders should always run' );
|
||||||
|
}
|
||||||
|
|
||||||
|
public function provideTransform(): array {
|
||||||
|
$text = "<h1>\u{0338}</h1>";
|
||||||
|
$expectedText = "<h1≯</h1>";
|
||||||
|
return [
|
||||||
|
[ new ParserOutput( $text ), null, [], new ParserOutput( $expectedText ) ],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -282,6 +282,7 @@ class MessageTest extends MediaWikiLangTestCase {
|
||||||
'⧼script>alert(1)</script⧽' ],
|
'⧼script>alert(1)</script⧽' ],
|
||||||
[ 'script>alert(1)</script', 'plain', '⧼script>alert(1)</script⧽',
|
[ 'script>alert(1)</script', 'plain', '⧼script>alert(1)</script⧽',
|
||||||
'⧼script>alert(1)</script⧽' ],
|
'⧼script>alert(1)</script⧽' ],
|
||||||
|
[ "\u{0338}isolated combining char", 'escaped', '⧼̸isolated combining char⧽', '⧼̸isolated combining char⧽' ],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -309,6 +310,7 @@ class MessageTest extends MediaWikiLangTestCase {
|
||||||
'<script>alert(1)</script>' ],
|
'<script>alert(1)</script>' ],
|
||||||
[ '<script>alert(1)</script>', 'plain', '<script>alert(1)</script>',
|
[ '<script>alert(1)</script>', 'plain', '<script>alert(1)</script>',
|
||||||
'<script>alert(1)</script>' ],
|
'<script>alert(1)</script>' ],
|
||||||
|
[ "\u{0338}isolated combining char", 'escaped', '̸isolated combining char', '̸isolated combining char' ],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -584,28 +586,28 @@ class MessageTest extends MediaWikiLangTestCase {
|
||||||
public static function providePlaintextParams() {
|
public static function providePlaintextParams() {
|
||||||
return [
|
return [
|
||||||
[
|
[
|
||||||
'one $2 <div>foo</div> [[Bar]] {{Baz}} <',
|
"one $2 <div>\u{0338}foo</div> [[Bar]] {{Baz}} <",
|
||||||
'plain',
|
'plain',
|
||||||
],
|
],
|
||||||
|
|
||||||
[
|
[
|
||||||
// expect
|
// expect
|
||||||
'one $2 <div>foo</div> [[Bar]] {{Baz}} <',
|
"one $2 <div>\u{0338}foo</div> [[Bar]] {{Baz}} <",
|
||||||
// format
|
// format
|
||||||
'text',
|
'text',
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
'one $2 <div>foo</div> [[Bar]] {{Baz}} &lt;',
|
'one $2 <div≯foo</div> [[Bar]] {{Baz}} &lt;',
|
||||||
'escaped',
|
'escaped',
|
||||||
],
|
],
|
||||||
|
|
||||||
[
|
[
|
||||||
'one $2 <div>foo</div> [[Bar]] {{Baz}} &lt;',
|
'one $2 <div≯foo</div> [[Bar]] {{Baz}} &lt;',
|
||||||
'parse',
|
'parse',
|
||||||
],
|
],
|
||||||
|
|
||||||
[
|
[
|
||||||
"<p>one $2 <div>foo</div> [[Bar]] {{Baz}} &lt;\n</p>",
|
"<p>one $2 <div≯foo</div> [[Bar]] {{Baz}} &lt;\n</p>",
|
||||||
'parseAsBlock',
|
'parseAsBlock',
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
|
|
@ -618,7 +620,7 @@ class MessageTest extends MediaWikiLangTestCase {
|
||||||
$msg = new RawMessage( '$1 $2' );
|
$msg = new RawMessage( '$1 $2' );
|
||||||
$params = [
|
$params = [
|
||||||
'one $2',
|
'one $2',
|
||||||
'<div>foo</div> [[Bar]] {{Baz}} <',
|
"<div>\u{0338}foo</div> [[Bar]] {{Baz}} <",
|
||||||
];
|
];
|
||||||
$this->assertSame(
|
$this->assertSame(
|
||||||
$expect,
|
$expect,
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,8 @@ class SanitizerUnitTest extends MediaWikiUnitTestCase {
|
||||||
[ 'a¡b', 'a¡b' ],
|
[ 'a¡b', 'a¡b' ],
|
||||||
[ 'foo'bar', "foo'bar" ],
|
[ 'foo'bar', "foo'bar" ],
|
||||||
[ '<script>foo</script>', '<script>foo</script>' ],
|
[ '<script>foo</script>', '<script>foo</script>' ],
|
||||||
|
[ '̸', "\u{0338}" ],
|
||||||
|
[ '̸', '̸' ],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -309,6 +309,11 @@ class RemexDriverTest extends MediaWikiUnitTestCase {
|
||||||
'<meta foo="bar"/>foo',
|
'<meta foo="bar"/>foo',
|
||||||
"<meta foo=\"bar\" /><p>foo</p>",
|
"<meta foo=\"bar\" /><p>foo</p>",
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
'Unicode combining characters (T387130)',
|
||||||
|
"<p>\u{0338} <!--comment-->\u{0338}</p>",
|
||||||
|
'<p≯ <!--comment--≯</p>',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
|
|
||||||
public static function provider() {
|
public static function provider() {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue