Re-apply "Use Remex for DeduplicateStyles transform"

This reverts commit 7f63d5250e,
re-applying commit 82da9cf14b.
It can be re-applied safely after T354361 was fixed.

Most of the incidental changes from the original patch are
no longer needed, as they were made unnecessary by other work,
or were applied in I4cb2f29cf890af90f295624c586d9e1eb1939b95.

Change-Id: I1ff9a7c94244bffffe5574c0b99379ed1121a86d
(cherry picked from commit 09703c2c774a65dd9ee57ec83154aa1eab5a9d03)
This commit is contained in:
Bartosz Dziewoński 2024-01-10 00:48:18 +01:00 committed by jenkins-bot
parent a6739e066e
commit 1bf9db0779
4 changed files with 29 additions and 24 deletions

View file

@ -2,11 +2,12 @@
namespace MediaWiki\OutputTransform\Stages;
use MediaWiki\Html\Html;
use MediaWiki\Html\HtmlHelper;
use MediaWiki\OutputTransform\ContentTextTransformStage;
use MediaWiki\Parser\ParserOptions;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Sanitizer;
use Wikimedia\RemexHtml\Serializer\SerializerNode;
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
/**
* Generates a list of unique style links
@ -20,28 +21,32 @@ class DeduplicateStyles extends ContentTextTransformStage {
protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
$seen = [];
return preg_replace_callback( '#<style\s+([^>]*data-mw-deduplicate\s*=[\'"][^>]*)>.*?</style>#s',
static function ( $m ) use ( &$seen ) {
$attr = Sanitizer::decodeTagAttributes( $m[1] );
if ( !isset( $attr['data-mw-deduplicate'] ) ) {
return $m[0];
}
$key = $attr['data-mw-deduplicate'];
return HtmlHelper::modifyElements(
$text,
static function ( SerializerNode $node ): bool {
return $node->name === 'style' &&
( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
},
static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
$key = $node->attrs['data-mw-deduplicate'];
if ( !isset( $seen[$key] ) ) {
$seen[$key] = true;
return $m[0];
return $node;
}
// We were going to use an empty <style> here, but there
// was concern that would be too much overhead for browsers.
// So let's hope a <link> with a non-standard rel and href isn't
// going to be misinterpreted or mangled by any subsequent processing.
return Html::element( 'link', [
$node->name = 'link';
$node->attrs = new PlainAttributes( [
'rel' => 'mw-deduplicated-inline-style',
'href' => "mw-data:" . wfUrlencode( $key ),
] );
}, $text );
$node->children = [];
$node->void = true;
return $node;
},
$options['isParsoidContent'] ?? false
);
}
}

View file

@ -39,12 +39,12 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase {
$dedup = <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" />
<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
EOF;

View file

@ -99,7 +99,7 @@ class WikitextContentHandlerIntegrationTest extends TextContentHandlerIntegratio
'title' => 'WikitextContentTest_testGetParserOutput',
'model' => CONTENT_MODEL_WIKITEXT,
'text' => "#REDIRECT [[Main Page]]",
'expectedHtml' => "<div class=\"mw-content-ltr mw-parser-output\" lang=\"en\" dir=\"ltr\" $parsoidVersion><div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/w/index.php?title=Main_Page&amp;action=edit&amp;redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\" /></section></div>",
'expectedHtml' => "<div class=\"mw-content-ltr mw-parser-output\" lang=\"en\" dir=\"ltr\" $parsoidVersion><div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/w/index.php?title=Main_Page&amp;action=edit&amp;redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\"></section></div>",
'expectedFields' => [
'Links' => [
[ 'Main_Page' => 0 ],

View file

@ -495,12 +495,12 @@ EOF
[], $dedupText, <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" />
<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
EOF