CVE-2025-32699 Ensure that Unicode NFC normalization can be applied to our HTML output safely. Even though the W3C officially recommends against normalizing HTML https://www.w3.org/International/questions/qa-html-css-normalization#converting this is still easily done inadvertently, especially when using the MediaWiki action API which normalizes parameters and results by default. See also I671648603c4635a35585c860b4857f5ea085e47f in Parsoid, and T266140 / I2e78e660ba1867744e34eda7d00ea527ec016b71 for another similar issue. The following changes are made: * The various HTML serializers (Remex/Tidy-derived, as well as the Html::* helpers) are tweaked to entity-escape U+0338 wherever it appears. * Similarly, Message::escaped() is tweaked to entity-escape U+0338. * Finally, a post-processing pass is added to the OutputTransform pipeline to catch any remaining U+0338 and entity-escape them. This catches U+0338 added during any of the previous OutputTransform stages (like TOC insertion, section edit links, etc). *When backporting* this code will likely need to be moved to ParserOutput::getText(), as the OutputTransform pipeline wasn't added until MW 1.42. Bug: T387130 Change-Id: I66564e14e730f5393f4fa5780b80f24de6075af5
47 lines
1.3 KiB
PHP
47 lines
1.3 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Html;
|
|
|
|
use Wikimedia\Assert\Assert;
|
|
use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
|
|
use Wikimedia\RemexHtml\Serializer\SerializerNode;
|
|
|
|
/**
|
|
* Internal helper trait for HtmlHelper::modifyHtml.
|
|
*
|
|
* This is designed to extend a HtmlFormatter.
|
|
*
|
|
* @phan-file-suppress PhanTraitParentReference
|
|
*/
|
|
trait HtmlHelperTrait {
|
|
/** @var callable */
|
|
private $shouldModifyCallback;
|
|
|
|
/** @var callable */
|
|
private $modifyCallback;
|
|
|
|
public function __construct( $options, callable $shouldModifyCallback, callable $modifyCallback ) {
|
|
parent::__construct( $options );
|
|
$this->shouldModifyCallback = $shouldModifyCallback;
|
|
$this->modifyCallback = $modifyCallback;
|
|
// Escape U+0338 (T387130)
|
|
'@phan-var HtmlFormatter $this';
|
|
$this->textEscapes["\u{0338}"] = '̸';
|
|
}
|
|
|
|
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
|
|
if ( ( $this->shouldModifyCallback )( $node ) ) {
|
|
$node = clone $node;
|
|
$node->attrs = clone $node->attrs;
|
|
$newNode = ( $this->modifyCallback )( $node );
|
|
Assert::parameterType( SerializerNode::class, $newNode, 'return value' );
|
|
return parent::element( $parent, $newNode, $contents );
|
|
} else {
|
|
return parent::element( $parent, $node, $contents );
|
|
}
|
|
}
|
|
|
|
public function startDocument( $fragmentNamespace, $fragmentName ) {
|
|
return '';
|
|
}
|
|
}
|