Revert "Use Remex for DeduplicateStyles transform"

This reverts commit 82da9cf14b.

Passing through Remex seems to have unexpected consequences to be
investigated but, for the sake of unbreaking the UBN, let's revert this
first.

Bug: T353920
Change-Id: Iaac7942aa77aee5ab525852ac5b41dd516ff13c9
This commit is contained in:
Isabelle Hurbain-Palatin 2023-12-22 11:24:19 +01:00
parent 9555038716
commit 7f63d5250e
9 changed files with 26 additions and 45 deletions

View file

@ -2,12 +2,11 @@
namespace Mediawiki\OutputTransform\Stages;
use MediaWiki\Html\HtmlHelper;
use MediaWiki\Html\Html;
use Mediawiki\OutputTransform\ContentTextTransformStage;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Sanitizer;
use ParserOptions;
use Wikimedia\RemexHtml\Serializer\SerializerNode;
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
/**
* Generates a list of unique style links
@ -21,32 +20,28 @@ class DeduplicateStyles extends ContentTextTransformStage {
protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
$seen = [];
return HtmlHelper::modifyElements(
$text,
static function ( SerializerNode $node ): bool {
return $node->name === 'style' &&
( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
},
static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
$key = $node->attrs['data-mw-deduplicate'];
return preg_replace_callback( '#<style\s+([^>]*data-mw-deduplicate\s*=[\'"][^>]*)>.*?</style>#s',
static function ( $m ) use ( &$seen ) {
$attr = Sanitizer::decodeTagAttributes( $m[1] );
if ( !isset( $attr['data-mw-deduplicate'] ) ) {
return $m[0];
}
$key = $attr['data-mw-deduplicate'];
if ( !isset( $seen[$key] ) ) {
$seen[$key] = true;
return $node;
return $m[0];
}
// We were going to use an empty <style> here, but there
// was concern that would be too much overhead for browsers.
// So let's hope a <link> with a non-standard rel and href isn't
// going to be misinterpreted or mangled by any subsequent processing.
$node->name = 'link';
$node->attrs = new PlainAttributes( [
return Html::element( 'link', [
'rel' => 'mw-deduplicated-inline-style',
'href' => "mw-data:" . wfUrlencode( $key ),
] );
$node->children = [];
$node->void = true;
return $node;
},
$options['isParsoidContent'] ?? false
);
}, $text );
}
}

View file

@ -1447,12 +1447,6 @@ class Message implements MessageSpecifier, Serializable {
// they're inside they already are from the outer div.
'unwrap' => true,
'userLang' => $this->getLanguage(),
// Although we *could* have template styles in messages,
// they are rare enough that it's not worth the performance
// hit on all messages to run deduplication (and the
// deduplication would only extend within the message anyway,
// not to the article content)
'deduplicateStyles' => false,
] )
: $out;
}

View file

@ -1456,7 +1456,6 @@ class ParserTestRunner {
'allowTOC' => !isset( $opts['notoc'] ),
'unwrap' => !isset( $opts['wrap'] ),
'skin' => $this->getSkin( $opts['skin'] ?? 'fallback' ),
'deduplicateStyles' => isset( $opts['deduplicateStyles'] ),
] );
$out = preg_replace( '/\s+$/', '', $out );
}

View file

@ -46,7 +46,6 @@ version=2
# thumbsize=NNN set the default thumb size to NNNpx for this test
# wrap include the normal wrapper <div class="mw-parser-output"> (since 1.30)
# local format section links in edit comment text as local links
# deduplicateStyles apply style deduplication to the output
#
# Configuration globals:
#

View file

@ -33,12 +33,12 @@ class DeduplicateStylesTest extends OutputTransformStageTest {
$dedup = <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2">
<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
EOF;

View file

@ -75,9 +75,7 @@ class JsonContentHandlerIntegrationTest extends MediaWikiLangTestCase {
true
);
$this->assertInstanceOf( ParserOutput::class, $parserOutput );
$this->assertEquals( $expected, $parserOutput->getText( [
'deduplicateStyles' => false,
] ) );
$this->assertEquals( $expected, $parserOutput->getText() );
}
/**

View file

@ -38,9 +38,7 @@ class TextContentHandlerIntegrationTest extends MediaWikiLangTestCase {
$contentRenderer = $this->getServiceContainer()->getContentRenderer();
$po = $contentRenderer->getParserOutput( $content, $title, null, $parserOptions );
$html = $po->getText( [
'deduplicateStyles' => false,
] );
$html = $po->getText();
$html = preg_replace( '#<!--.*?-->#sm', '', $html ); // strip comments
if ( $expectedHtml !== null ) {

View file

@ -937,9 +937,7 @@ class WikiPageDbTest extends MediaWikiLangTestCase {
$opt = $page->makeParserOptions( 'canonical' );
$po = $page->getParserOutput( $opt );
$text = $po->getText( [
'deduplicateStyles' => false,
] );
$text = $po->getText();
$text = trim( preg_replace( '/<!--.*?-->/sm', '', $text ) ); # strip injected comments
$text = preg_replace( '!\s*(</p>|</div>)!m', '\1', $text ); # don't let tidy confuse us

View file

@ -449,12 +449,12 @@ EOF
[], $dedupText, <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2">
<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
EOF