diff --git a/includes/Html/HtmlHelper.php b/includes/Html/HtmlHelper.php index 66a8399dbe3..0ff50bf75c0 100644 --- a/includes/Html/HtmlHelper.php +++ b/includes/Html/HtmlHelper.php @@ -23,7 +23,8 @@ class HtmlHelper { * RemexHtml\Serializer\SerializerNode argument, and returns true if it should be modified. * @param callable $modifyCallback A callback which takes a single * RemexHtml\Serializer\SerializerNode argument and actually performs the modification on it. - * It must return the new node (which can be the original node object). + * It must return the new node (which can be the original node object) + * or a string, which is treated as the outerHTML of a replacement. * @param bool $html5format Defaults to true, which uses standard HTML5 * serialization for the parsed HTML. If set to false, uses a * serialization which is more compatible with the output of the diff --git a/includes/Html/HtmlHelperTrait.php b/includes/Html/HtmlHelperTrait.php index 270d84df9e1..1a938d8825e 100644 --- a/includes/Html/HtmlHelperTrait.php +++ b/includes/Html/HtmlHelperTrait.php @@ -34,7 +34,11 @@ trait HtmlHelperTrait { $node = clone $node; $node->attrs = clone $node->attrs; $newNode = ( $this->modifyCallback )( $node ); - Assert::parameterType( SerializerNode::class, $newNode, 'return value' ); + Assert::parameterType( [ SerializerNode::class, 'string' ], $newNode, 'return value' ); + if ( is_string( $newNode ) ) { + // Replace this element with an "outerHTML" string. + return $newNode; + } return parent::element( $parent, $newNode, $contents ); } else { return parent::element( $parent, $node, $contents ); diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index c9a3329e509..11a8dd0408e 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -42,6 +42,7 @@ use MediaWiki\Debug\DeprecationHelper; use MediaWiki\HookContainer\HookContainer; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Html\Html; +use MediaWiki\Html\HtmlHelper; use MediaWiki\Http\HttpRequestFactory; use MediaWiki\Language\ILanguageConverter; use MediaWiki\Language\Language; @@ -97,6 +98,7 @@ use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\DOM\Node; use Wikimedia\Parsoid\Utils\DOMCompat; use Wikimedia\Parsoid\Utils\DOMUtils; +use Wikimedia\RemexHtml\Serializer\SerializerNode; use Wikimedia\ScopedCallback; /** @@ -225,15 +227,6 @@ class Parser { */ public const TOC_PLACEHOLDER = ''; - /** - * Permissive regexp matching TOC_PLACEHOLDER. This allows for some - * minor modifications to the placeholder to be made by extensions - * without breaking the TOC (T317857); note also that Parsoid's version - * of the placeholder might include additional attributes. - * @var string - */ - private const TOC_PLACEHOLDER_REGEX = '/]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/'; - # Persistent: /** @var array */ private array $mTagHooks = []; @@ -4878,20 +4871,24 @@ class Parser { */ public static function replaceTableOfContentsMarker( $text, $toc ) { $replaced = false; - // remove the additional metas. while not strictly necessary, this also ensures idempotence if we run - // the pass more than once on a given content and TOC markers are not inserted by $toc. At the same time, - // if $toc inserts TOC markers (which, as of 2024-05, it shouldn't be able to), these are preserved by the - // fact that we run a single pass with a callback (rather than doing a first replacement with the $toc and - // a replacement of leftover markers as a second pass). - $callback = static function ( array $matches ) use( &$replaced, $toc ): string { - if ( !$replaced ) { + return HtmlHelper::modifyElements( + $text, + static function ( SerializerNode $node ): bool { + $prop = $node->attrs['property'] ?? ''; + return $node->name === 'meta' && $prop === 'mw:PageProp/toc'; + }, + static function ( SerializerNode $node ) use ( &$replaced, $toc ) { + if ( $replaced ) { + // Remove the additional metas. While not strictly + // necessary, this also ensures idempotence if we + // run the pass more than once on a given content. + return ''; + } $replaced = true; - return $toc; - } - return ''; - }; - - return preg_replace_callback( self::TOC_PLACEHOLDER_REGEX, $callback, $text ); + return $toc; // outerHTML replacement. + }, + false /* use legacy-compatible serialization */ + ); } /** diff --git a/tests/parser/headings.txt b/tests/parser/headings.txt index 271747addb0..4ddb94d2eac 100644 --- a/tests/parser/headings.txt +++ b/tests/parser/headings.txt @@ -1860,7 +1860,7 @@ section 5

text & text

[edit]

section 3

-

text ' text

[edit]
+

text ' text

[edit]

section 4

text " text

[edit]
@@ -2444,7 +2444,7 @@ wgParserEnableLegacyHeadingDOM=false !! wikitext ==   A   B   == !! html/php -

  A   B  

[edit]
+

  A   B  

[edit]
!! html/parsoid

  A   B  

!! end @@ -2690,7 +2690,7 @@ wgParserEnableLegacyHeadingDOM=false

Тест

[edit]

Тест

[edit]

тест

[edit]
-

Hey < # " > % : '

[edit]
+

Hey < # " > % : '

[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

💩

#啤酒 #啤酒 @@ -2757,7 +2757,7 @@ wgParserEnableLegacyHeadingDOM=false

Тест

[edit]

Тест

[edit]

тест

[edit]
-

Hey < # " > % : '

[edit]
+

Hey < # " > % : '

[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

.F0.9F.92.A9

#啤酒 #啤酒 @@ -2806,7 +2806,7 @@ wgParserEnableLegacyHeadingDOM=false

Тест

[edit]

Тест

[edit]

тест

[edit]
-

Hey < # " > % : '

[edit]
+

Hey < # " > % : '

[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

💩

#啤酒 #啤酒 @@ -2823,7 +2823,7 @@ wgParserEnableLegacyHeadingDOM=false [[#Foo bar]] !! html/php -

Foo bar

[edit]
+

Foo bar

[edit]

#Foo bar

!! html/parsoid diff --git a/tests/parser/legacyHeadings.txt b/tests/parser/legacyHeadings.txt index a4023e6dce1..ae3408d5047 100644 --- a/tests/parser/legacyHeadings.txt +++ b/tests/parser/legacyHeadings.txt @@ -1857,7 +1857,7 @@ section 5

text & text[edit]

section 3

-

text ' text[edit]

+

text ' text[edit]

section 4

text " text[edit]

@@ -2441,7 +2441,7 @@ wgParserEnableLegacyHeadingDOM=true !! wikitext ==   A   B   == !! html/php -

  A   B  [edit]

+

  A   B  [edit]

!! html/parsoid

  A   B  

!! end @@ -2686,7 +2686,7 @@ wgParserEnableLegacyHeadingDOM=true

Тест[edit]

Тест[edit]

тест[edit]

-

Hey < # " > % : '[edit]

+

Hey < # " > % : '[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

💩

#啤酒 #啤酒 @@ -2753,7 +2753,7 @@ wgParserEnableLegacyHeadingDOM=true

Тест[edit]

Тест[edit]

тест[edit]

-

Hey < # " > % : '[edit]

+

Hey < # " > % : '[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

.F0.9F.92.A9

#啤酒 #啤酒 @@ -2802,7 +2802,7 @@ wgParserEnableLegacyHeadingDOM=true

Тест[edit]

Тест[edit]

тест[edit]

-

Hey < # " > % : '[edit]

+

Hey < # " > % : '[edit]

#Foo bar #foo Bar #Тест #тест #Hey < # " > % : '

💩

#啤酒 #啤酒 @@ -2819,7 +2819,7 @@ wgParserEnableLegacyHeadingDOM=true [[#Foo bar]] !! html/php -

Foo bar[edit]

+

Foo bar[edit]

#Foo bar

!! html/parsoid diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index de840c1428c..256b4f4353d 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -11090,7 +11090,7 @@ parsoid={ "modes": ["wt2html","html2html"], "normalizePhp": true }
This is an okay multi-codepoint HTML5 entity
∾̳
This are valid wikitext-specific entities
-
‏ ‏
+
‏ ‏
Highest valid characters
􏿽 􏿾 􏿿
!! end diff --git a/tests/phpunit/includes/content/CssContentHandlerIntegrationTest.php b/tests/phpunit/includes/content/CssContentHandlerIntegrationTest.php index 6bb2fea2776..74e654dde08 100644 --- a/tests/phpunit/includes/content/CssContentHandlerIntegrationTest.php +++ b/tests/phpunit/includes/content/CssContentHandlerIntegrationTest.php @@ -12,7 +12,7 @@ class CssContentHandlerIntegrationTest extends TextContentHandlerIntegrationTest 'title' => 'MediaWiki:Test.css', 'model' => null, 'text' => "hello x\n", - 'expectedHtml' => "
\nhello <world>x\n\n
", + 'expectedHtml' => "
hello <world>x\n\n
", 'expectedFields' => [ 'Links' => [ ], @@ -24,7 +24,7 @@ class CssContentHandlerIntegrationTest extends TextContentHandlerIntegrationTest 'title' => 'MediaWiki:Test.css', 'model' => null, 'text' => "/* hello [[world]] */\n", - 'expectedHtml' => "
\n/* hello [[world]] */\n\n
", + 'expectedHtml' => "
/* hello [[world]] */\n\n
", 'expectedFields' => [ 'Links' => [ [ 'World' => 0, ], @@ -37,7 +37,7 @@ class CssContentHandlerIntegrationTest extends TextContentHandlerIntegrationTest 'title' => 'MediaWiki:Test.css', 'model' => null, 'text' => "==One==\n

Two

", - 'expectedHtml' => "
\n==One==\n<h2>Two</h2>\n
", + 'expectedHtml' => "
==One==\n<h2>Two</h2>\n
", 'expectedFields' => [ 'Links' => [ ], diff --git a/tests/phpunit/includes/content/JavaScriptContentHandlerIntegrationTest.php b/tests/phpunit/includes/content/JavaScriptContentHandlerIntegrationTest.php index 52fa08cbe06..2b04512f14c 100644 --- a/tests/phpunit/includes/content/JavaScriptContentHandlerIntegrationTest.php +++ b/tests/phpunit/includes/content/JavaScriptContentHandlerIntegrationTest.php @@ -12,7 +12,7 @@ class JavaScriptContentHandlerIntegrationTest extends TextContentHandlerIntegrat 'title' => 'MediaWiki:Test.js', 'model' => null, 'text' => "hello \n", - 'expectedHtml' => "
\nhello <world>\n\n
", + 'expectedHtml' => "
hello <world>\n\n
", 'expectedFields' => [ 'Links' => [ ], @@ -24,7 +24,7 @@ class JavaScriptContentHandlerIntegrationTest extends TextContentHandlerIntegrat 'title' => 'MediaWiki:Test.js', 'model' => null, 'text' => "hello(); // [[world]]\n", - 'expectedHtml' => "
\nhello(); // [[world]]\n\n
", + 'expectedHtml' => "
hello(); // [[world]]\n\n
", 'expectedFields' => [ 'Links' => [ [ 'World' => 0, ], @@ -37,7 +37,7 @@ class JavaScriptContentHandlerIntegrationTest extends TextContentHandlerIntegrat 'title' => 'MediaWiki:Test.js', 'model' => null, 'text' => "==One==\n

Two

", - 'expectedHtml' => "
\n==One==\n<h2>Two</h2>\n
", + 'expectedHtml' => "
==One==\n<h2>Two</h2>\n
", 'expectedFields' => [ 'Links' => [ ], diff --git a/tests/phpunit/includes/content/JsonContentHandlerIntegrationTest.php b/tests/phpunit/includes/content/JsonContentHandlerIntegrationTest.php index 04810575a05..6fea47ca7e5 100644 --- a/tests/phpunit/includes/content/JsonContentHandlerIntegrationTest.php +++ b/tests/phpunit/includes/content/JsonContentHandlerIntegrationTest.php @@ -54,7 +54,7 @@ class JsonContentHandlerIntegrationTest extends MediaWikiLangTestCase { [ (object)[ '' ], '
0"' . - '<script>alert("evil!")</script>"' . + '<script>alert("evil!")</script>"' . '
', ], [ diff --git a/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php b/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php index 3bf9415ff49..3ebb7364880 100644 --- a/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php +++ b/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php @@ -99,7 +99,7 @@ class WikitextContentHandlerIntegrationTest extends TextContentHandlerIntegratio 'title' => 'WikitextContentTest_testGetParserOutput', 'model' => CONTENT_MODEL_WIKITEXT, 'text' => "#REDIRECT [[Main Page]]", - 'expectedHtml' => "

Redirect to:

", + 'expectedHtml' => "

Redirect to:

", 'expectedFields' => [ 'Links' => [ [ 'Main_Page' => 0 ], diff --git a/tests/phpunit/includes/page/WikiPageDbTest.php b/tests/phpunit/includes/page/WikiPageDbTest.php index b3e114c937e..40c841fd8bc 100644 --- a/tests/phpunit/includes/page/WikiPageDbTest.php +++ b/tests/phpunit/includes/page/WikiPageDbTest.php @@ -926,12 +926,12 @@ class WikiPageDbTest extends MediaWikiLangTestCase { [ CONTENT_MODEL_JAVASCRIPT, "var test='

not really a heading

';", - "
\nvar test='<h2>not really a heading</h2>';\n
", + "
var test='<h2>not really a heading</h2>';\n
", ], [ CONTENT_MODEL_CSS, "/* Not ''wikitext'' */", - "
\n/* Not ''wikitext'' */\n
", + "
/* Not ''wikitext'' */\n
", ], // @todo more...? ];