CVE-2025-32699 Ensure that Unicode NFC normalization can be applied to our HTML output safely. Even though the W3C officially recommends against normalizing HTML https://www.w3.org/International/questions/qa-html-css-normalization#converting this is still easily done inadvertently, especially when using the MediaWiki action API which normalizes parameters and results by default. See also I671648603c4635a35585c860b4857f5ea085e47f in Parsoid, and T266140 / I2e78e660ba1867744e34eda7d00ea527ec016b71 for another similar issue. The following changes are made: * The various HTML serializers (Remex/Tidy-derived, as well as the Html::* helpers) are tweaked to entity-escape U+0338 wherever it appears. * Similarly, Message::escaped() is tweaked to entity-escape U+0338. * Finally, a post-processing pass is added to the OutputTransform pipeline to catch any remaining U+0338 and entity-escape them. This catches U+0338 added during any of the previous OutputTransform stages (like TOC insertion, section edit links, etc). *When backporting* this code will likely need to be moved to ParserOutput::getText(), as the OutputTransform pipeline wasn't added until MW 1.42. Bug: T387130 Change-Id: I66564e14e730f5393f4fa5780b80f24de6075af5
151 lines
4 KiB
PHP
151 lines
4 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\OutputTransform;
|
|
|
|
use MediaWiki\Config\Config;
|
|
use MediaWiki\Config\ServiceOptions;
|
|
use MediaWiki\MainConfigNames;
|
|
use MediaWiki\OutputTransform\Stages\AddRedirectHeader;
|
|
use MediaWiki\OutputTransform\Stages\AddWrapperDivClass;
|
|
use MediaWiki\OutputTransform\Stages\DeduplicateStyles;
|
|
use MediaWiki\OutputTransform\Stages\ExecutePostCacheTransformHooks;
|
|
use MediaWiki\OutputTransform\Stages\ExpandToAbsoluteUrls;
|
|
use MediaWiki\OutputTransform\Stages\ExtractBody;
|
|
use MediaWiki\OutputTransform\Stages\HandleParsoidSectionLinks;
|
|
use MediaWiki\OutputTransform\Stages\HandleSectionLinks;
|
|
use MediaWiki\OutputTransform\Stages\HandleTOCMarkers;
|
|
use MediaWiki\OutputTransform\Stages\HardenNFC;
|
|
use MediaWiki\OutputTransform\Stages\HydrateHeaderPlaceholders;
|
|
use MediaWiki\OutputTransform\Stages\ParsoidLocalization;
|
|
use MediaWiki\OutputTransform\Stages\RenderDebugInfo;
|
|
use Psr\Log\LoggerInterface;
|
|
use Wikimedia\ObjectFactory\ObjectFactory;
|
|
|
|
/**
|
|
* This class contains the default output transformation pipeline factory for wikitext. It is a postprocessor for
|
|
* ParserOutput objects either directly resulting from a parse or fetched from ParserCache.
|
|
* @unstable
|
|
*/
|
|
class DefaultOutputPipelineFactory {
|
|
|
|
private ServiceOptions $options;
|
|
private Config $config;
|
|
private LoggerInterface $logger;
|
|
private ObjectFactory $objectFactory;
|
|
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
MainConfigNames::OutputPipelineStages,
|
|
];
|
|
|
|
private const CORE_LIST = [
|
|
'ExtractBody' => [
|
|
'class' => ExtractBody::class,
|
|
'services' => [
|
|
'UrlUtils',
|
|
],
|
|
'optional_services' => [
|
|
'MobileFrontend.Context',
|
|
],
|
|
],
|
|
'AddRedirectHeader' => [
|
|
'class' => AddRedirectHeader::class,
|
|
],
|
|
'RenderDebugInfo' => [
|
|
'class' => RenderDebugInfo::class,
|
|
'services' => [
|
|
'HookContainer',
|
|
],
|
|
],
|
|
'ParsoidLocalization' => [
|
|
'class' => ParsoidLocalization::class,
|
|
],
|
|
'ExecutePostCacheTransformHooks' => [
|
|
'class' => ExecutePostCacheTransformHooks::class,
|
|
'services' => [
|
|
'HookContainer',
|
|
],
|
|
],
|
|
'AddWrapperDivClass' => [
|
|
'class' => AddWrapperDivClass::class,
|
|
'services' => [
|
|
'LanguageFactory',
|
|
'ContentLanguage',
|
|
],
|
|
],
|
|
'HandleSectionLinks' => [
|
|
'class' => HandleSectionLinks::class,
|
|
'services' => [
|
|
'TitleFactory',
|
|
],
|
|
],
|
|
'HandleParsoidSectionLinks' => [
|
|
'class' => HandleParsoidSectionLinks::class,
|
|
'services' => [
|
|
'TitleFactory',
|
|
],
|
|
],
|
|
'HandleTOCMarkers' => [
|
|
'class' => HandleTOCMarkers::class,
|
|
'services' => [
|
|
'Tidy',
|
|
],
|
|
],
|
|
'DeduplicateStyles' => [
|
|
'class' => DeduplicateStyles::class,
|
|
],
|
|
'ExpandToAbsoluteUrls' => [
|
|
'class' => ExpandToAbsoluteUrls::class,
|
|
],
|
|
'HydrateHeaderPlaceholders' => [
|
|
'class' => HydrateHeaderPlaceholders::class,
|
|
],
|
|
# This should be last, in order to ensure final output is hardened
|
|
'HardenNFC' => [
|
|
'class' => HardenNFC::class,
|
|
],
|
|
];
|
|
|
|
public function __construct(
|
|
ServiceOptions $options,
|
|
Config $config,
|
|
LoggerInterface $logger,
|
|
ObjectFactory $objectFactory
|
|
) {
|
|
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
|
$this->options = $options;
|
|
$this->config = $config;
|
|
$this->logger = $logger;
|
|
$this->objectFactory = $objectFactory;
|
|
}
|
|
|
|
/**
|
|
* Creates a pipeline of transformations to transform the content of the ParserOutput object from "parsed HTML"
|
|
* to "output HTML" and returns it.
|
|
* @internal
|
|
* @return OutputTransformPipeline
|
|
*/
|
|
public function buildPipeline(): OutputTransformPipeline {
|
|
// Add extension stages
|
|
$list = array_merge(
|
|
self::CORE_LIST,
|
|
$this->options->get( MainConfigNames::OutputPipelineStages )
|
|
);
|
|
|
|
$otp = new OutputTransformPipeline();
|
|
foreach ( $list as $spec ) {
|
|
$class = $spec['class'];
|
|
$svcOptions = new ServiceOptions(
|
|
$class::CONSTRUCTOR_OPTIONS, $this->config
|
|
);
|
|
$transform = $this->objectFactory->createObject(
|
|
$spec,
|
|
[
|
|
'assertClass' => OutputTransformStage::class,
|
|
'extraArgs' => [ $svcOptions, $this->logger ],
|
|
]
|
|
);
|
|
$otp->addStage( $transform );
|
|
}
|
|
return $otp;
|
|
}
|
|
}
|