wiki.techinc.nl/includes/OutputTransform/DefaultOutputPipelineFactory.php
C. Scott Ananian 94f193a894 SECURITY: Ensure emitted HTML is safe against Unicode NFC normalization
CVE-2025-32699

Ensure that Unicode NFC normalization can be applied to our HTML
output safely.  Even though the W3C officially recommends against
normalizing HTML

https://www.w3.org/International/questions/qa-html-css-normalization#converting

this is still easily done inadvertently, especially when using the
MediaWiki action API which normalizes parameters and results by
default.

See also I671648603c4635a35585c860b4857f5ea085e47f in Parsoid, and
T266140 / I2e78e660ba1867744e34eda7d00ea527ec016b71 for another similar
issue.

The following changes are made:

* The various HTML serializers (Remex/Tidy-derived, as well as the
  Html::* helpers) are tweaked to entity-escape U+0338 wherever it
  appears.

* Similarly, Message::escaped() is tweaked to entity-escape U+0338.

* Finally, a post-processing pass is added to the OutputTransform
  pipeline to catch any remaining U+0338 and entity-escape them.
  This catches U+0338 added during any of the previous OutputTransform
  stages (like TOC insertion, section edit links, etc).
  *When backporting* this code will likely need to be moved to
  ParserOutput::getText(), as the OutputTransform pipeline wasn't added
  until MW 1.42.

Bug: T387130
Change-Id: I66564e14e730f5393f4fa5780b80f24de6075af5
2025-04-10 15:56:06 +01:00

151 lines
4 KiB
PHP

<?php
namespace MediaWiki\OutputTransform;
use MediaWiki\Config\Config;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\MainConfigNames;
use MediaWiki\OutputTransform\Stages\AddRedirectHeader;
use MediaWiki\OutputTransform\Stages\AddWrapperDivClass;
use MediaWiki\OutputTransform\Stages\DeduplicateStyles;
use MediaWiki\OutputTransform\Stages\ExecutePostCacheTransformHooks;
use MediaWiki\OutputTransform\Stages\ExpandToAbsoluteUrls;
use MediaWiki\OutputTransform\Stages\ExtractBody;
use MediaWiki\OutputTransform\Stages\HandleParsoidSectionLinks;
use MediaWiki\OutputTransform\Stages\HandleSectionLinks;
use MediaWiki\OutputTransform\Stages\HandleTOCMarkers;
use MediaWiki\OutputTransform\Stages\HardenNFC;
use MediaWiki\OutputTransform\Stages\HydrateHeaderPlaceholders;
use MediaWiki\OutputTransform\Stages\ParsoidLocalization;
use MediaWiki\OutputTransform\Stages\RenderDebugInfo;
use Psr\Log\LoggerInterface;
use Wikimedia\ObjectFactory\ObjectFactory;
/**
* This class contains the default output transformation pipeline factory for wikitext. It is a postprocessor for
* ParserOutput objects either directly resulting from a parse or fetched from ParserCache.
* @unstable
*/
class DefaultOutputPipelineFactory {
private ServiceOptions $options;
private Config $config;
private LoggerInterface $logger;
private ObjectFactory $objectFactory;
public const CONSTRUCTOR_OPTIONS = [
MainConfigNames::OutputPipelineStages,
];
private const CORE_LIST = [
'ExtractBody' => [
'class' => ExtractBody::class,
'services' => [
'UrlUtils',
],
'optional_services' => [
'MobileFrontend.Context',
],
],
'AddRedirectHeader' => [
'class' => AddRedirectHeader::class,
],
'RenderDebugInfo' => [
'class' => RenderDebugInfo::class,
'services' => [
'HookContainer',
],
],
'ParsoidLocalization' => [
'class' => ParsoidLocalization::class,
],
'ExecutePostCacheTransformHooks' => [
'class' => ExecutePostCacheTransformHooks::class,
'services' => [
'HookContainer',
],
],
'AddWrapperDivClass' => [
'class' => AddWrapperDivClass::class,
'services' => [
'LanguageFactory',
'ContentLanguage',
],
],
'HandleSectionLinks' => [
'class' => HandleSectionLinks::class,
'services' => [
'TitleFactory',
],
],
'HandleParsoidSectionLinks' => [
'class' => HandleParsoidSectionLinks::class,
'services' => [
'TitleFactory',
],
],
'HandleTOCMarkers' => [
'class' => HandleTOCMarkers::class,
'services' => [
'Tidy',
],
],
'DeduplicateStyles' => [
'class' => DeduplicateStyles::class,
],
'ExpandToAbsoluteUrls' => [
'class' => ExpandToAbsoluteUrls::class,
],
'HydrateHeaderPlaceholders' => [
'class' => HydrateHeaderPlaceholders::class,
],
# This should be last, in order to ensure final output is hardened
'HardenNFC' => [
'class' => HardenNFC::class,
],
];
public function __construct(
ServiceOptions $options,
Config $config,
LoggerInterface $logger,
ObjectFactory $objectFactory
) {
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
$this->options = $options;
$this->config = $config;
$this->logger = $logger;
$this->objectFactory = $objectFactory;
}
/**
* Creates a pipeline of transformations to transform the content of the ParserOutput object from "parsed HTML"
* to "output HTML" and returns it.
* @internal
* @return OutputTransformPipeline
*/
public function buildPipeline(): OutputTransformPipeline {
// Add extension stages
$list = array_merge(
self::CORE_LIST,
$this->options->get( MainConfigNames::OutputPipelineStages )
);
$otp = new OutputTransformPipeline();
foreach ( $list as $spec ) {
$class = $spec['class'];
$svcOptions = new ServiceOptions(
$class::CONSTRUCTOR_OPTIONS, $this->config
);
$transform = $this->objectFactory->createObject(
$spec,
[
'assertClass' => OutputTransformStage::class,
'extraArgs' => [ $svcOptions, $this->logger ],
]
);
$otp->addStage( $transform );
}
return $otp;
}
}