Follow-up to Ibce512b3c4a52f74b2d2124f0159e306f2689ea5. HEADING_REGEX will now correctly match opening tags when one of the attributes contains an unencoded > character. In a better world, this would not use regular expressions. However, while implementing it as a DOM transformation is easy enough, doing so causes never-ending test failures due to changes in HTML serialization, so we gave up on it for now in after discussion on the original patch. Bug: T358810 Change-Id: Ibad4b29a988c2a4911ebe6512791042c46dd1a9b
122 lines
4.7 KiB
PHP
122 lines
4.7 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Tests\OutputTransform;
|
|
|
|
use MediaWiki\Parser\ParserOutput;
|
|
use Wikimedia\Parsoid\Core\SectionMetadata;
|
|
use Wikimedia\Parsoid\Core\TOCData;
|
|
|
|
/**
|
|
* Consts and utils used for OutputTransform tests
|
|
*/
|
|
class TestUtils {
|
|
public const TEST_DOC = <<<EOF
|
|
<p>Test document.
|
|
</p>
|
|
<meta property="mw:PageProp/toc" />
|
|
<h2 data-mw-anchor="Section_1">Section 1<mw:editsection page="Test Page" section="1">Section 1</mw:editsection></h2>
|
|
<p>One
|
|
</p>
|
|
<h2 data-mw-anchor="Section_2">Section 2<mw:editsection page="Test Page" section="2">Section 2</mw:editsection></h2>
|
|
<p>Two
|
|
</p>
|
|
<h3 data-mw-anchor="Section_2.1">Section 2.1<mw:editsection page="Talk:User:Bug_T261347" section="3">Section 2.1</mw:editsection></h3>
|
|
<p>Two point one
|
|
</p>
|
|
<h2 data-mw-anchor="Section_3">Section 3<mw:editsection page="Test Page" section="4">Section 3</mw:editsection></h2>
|
|
<p>Three
|
|
</p>
|
|
EOF;
|
|
public const TEST_DOC_WITH_LINKS = <<<EOF
|
|
<p>Test document.
|
|
</p>
|
|
<meta property="mw:PageProp/toc" />
|
|
<h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&action=edit&section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
|
<p>One
|
|
</p>
|
|
<h2><span class="mw-headline" id="Section_2">Section 2</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&action=edit&section=2" title="Edit section: Section 2">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
|
<p>Two
|
|
</p>
|
|
<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span></h3>
|
|
<p>Two point one
|
|
</p>
|
|
<h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&action=edit&section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
|
<p>Three
|
|
</p>
|
|
EOF;
|
|
public const TEST_DOC_WITHOUT_LINKS = <<<EOF
|
|
<p>Test document.
|
|
</p>
|
|
<meta property="mw:PageProp/toc" />
|
|
<h2><span class="mw-headline" id="Section_1">Section 1</span></h2>
|
|
<p>One
|
|
</p>
|
|
<h2><span class="mw-headline" id="Section_2">Section 2</span></h2>
|
|
<p>Two
|
|
</p>
|
|
<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span></h3>
|
|
<p>Two point one
|
|
</p>
|
|
<h2><span class="mw-headline" id="Section_3">Section 3</span></h2>
|
|
<p>Three
|
|
</p>
|
|
EOF;
|
|
|
|
// In this test, the `>` is not escaped in the 'data-mw-anchor' attribute and tag content, which
|
|
// is allowed in HTML, but it's not the serialization used by the Parser. Extensions that modify
|
|
// the HTML, e.g. DiscussionTools, can cause this to appear.
|
|
public const TEST_DOC_ANGLE_BRACKETS = <<<HTML
|
|
<h2 data-mw-anchor=">">><mw:editsection page="Test Page" section="1">></mw:editsection></h2>
|
|
HTML;
|
|
public const TEST_DOC_ANGLE_BRACKETS_WITH_LINKS = <<<HTML
|
|
<h2><span class="mw-headline" id=">">></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&action=edit&section=1" title="Edit section: >">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
|
HTML;
|
|
public const TEST_DOC_ANGLE_BRACKETS_WITHOUT_LINKS = <<<HTML
|
|
<h2><span class="mw-headline" id=">">></span></h2>
|
|
HTML;
|
|
|
|
public const TEST_TO_DEDUP = <<<EOF
|
|
<p>This is a test document.</p>
|
|
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
|
|
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
|
|
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
|
|
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
|
|
<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
|
|
<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
|
|
<style data-mw-deduplicate="duplicate1">.Same-attribute-different-content {}</style>
|
|
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
|
|
<style>.Duplicate1 {}</style>
|
|
EOF;
|
|
|
|
public static function initSections( ParserOutput $po ): void {
|
|
$po->setTOCData( new TOCData( SectionMetadata::fromLegacy( [
|
|
'index' => "1",
|
|
'level' => 1,
|
|
'toclevel' => 1,
|
|
'number' => "1",
|
|
'line' => "Section 1",
|
|
'anchor' => "Section_1"
|
|
] ), SectionMetadata::fromLegacy( [
|
|
'index' => "2",
|
|
'level' => 1,
|
|
'toclevel' => 1,
|
|
'number' => "2",
|
|
'line' => "Section 2",
|
|
'anchor' => "Section_2"
|
|
] ), SectionMetadata::fromLegacy( [
|
|
'index' => "3",
|
|
'level' => 2,
|
|
'toclevel' => 2,
|
|
'number' => "2.1",
|
|
'line' => "Section 2.1",
|
|
'anchor' => "Section_2.1"
|
|
] ), SectionMetadata::fromLegacy( [
|
|
'index' => "4",
|
|
'level' => 1,
|
|
'toclevel' => 1,
|
|
'number' => "3",
|
|
'line' => "Section 3",
|
|
'anchor' => "Section_3"
|
|
] ), ) );
|
|
}
|
|
}
|