From 24949480eb22947298e6a82db06c55af0d262db2 Mon Sep 17 00:00:00 2001
From: jdlrobson
Date: Tue, 14 Sep 2021 18:00:06 -0700
Subject: [PATCH] Give skins more flexibility over table of contents render
* Do not store table of contents in parser output
* Instead inject table of contents via strpos where needed
inside Article based on Skin "toc" option
* Use as a TOC placeholder; for Parsoid compatibility
this will be replaced with a tag in a followup patch.
Bug: T287767
Change-Id: I44045b3b9e78e7ab793da3f37e3c0dbc91cd7d39
---
includes/Linker.php | 2 +-
includes/OutputPage.php | 35 ++++-
includes/api/ApiParse.php | 7 +-
includes/page/Article.php | 9 +-
includes/parser/Parser.php | 73 ++++++++--
includes/parser/ParserOutput.php | 38 +++++-
includes/skins/Skin.php | 50 ++++++-
.../includes/parser/ParserOutputTest.php | 129 +++++++++++++++++-
.../includes/skins/SkinTemplateTest.php | 120 ++++++++++++++++
9 files changed, 438 insertions(+), 25 deletions(-)
diff --git a/includes/Linker.php b/includes/Linker.php
index 750b0456bb1..d0b02c4c4be 100644
--- a/includes/Linker.php
+++ b/includes/Linker.php
@@ -1644,7 +1644,7 @@ class Linker {
'class' => 'toctogglelabel',
] )
. ''
- . "\n"
+ . ""
. $toc
. "\n\n";
}
diff --git a/includes/OutputPage.php b/includes/OutputPage.php
index e61833cabff..1df9b2c50c6 100644
--- a/includes/OutputPage.php
+++ b/includes/OutputPage.php
@@ -103,6 +103,11 @@ class OutputPage extends ContextSource {
*/
private $mPrintable = false;
+ /**
+ * @var array sections from ParserOutput
+ */
+ private $mSections = [];
+
/**
* @var array Contains the page subtitle. Special pages usually have some
* links here. Don't confuse with site subtitle added by skins.
@@ -1880,10 +1885,28 @@ class OutputPage extends ContextSource {
] );
}
+ /**
+ * Adds sections to OutputPage from ParserOutput
+ * @param array $sections
+ * @internal For use by Article.php
+ */
+ public function setSections( array $sections ) {
+ $this->mSections = $sections;
+ }
+
+ /**
+ * @internal For usage in Skin::getSectionsData() only.
+ * @return array Array of sections.
+ * Empty if OutputPage::setSections() has not been called.
+ */
+ public function getSections(): array {
+ return $this->mSections;
+ }
+
/**
* Add all metadata associated with a ParserOutput object, but without the actual HTML. This
* includes categories, language links, ResourceLoader modules, effects of certain magic words,
- * and so on.
+ * and so on. It does *not* include section information.
*
* @since 1.24
* @param ParserOutput $parserOutput
@@ -1893,13 +1916,18 @@ class OutputPage extends ContextSource {
array_merge( $this->mLanguageLinks, $parserOutput->getLanguageLinks() );
$this->addCategoryLinks( $parserOutput->getCategories() );
$this->setIndicators( $parserOutput->getIndicators() );
+
+ // FIXME: Best practice is for OutputPage to be an accumulator, as
+ // addParserOutputMetadata() may be called multiple times, but the
+ // following lines overwrite any previous data. These should
+ // be migrated to an injection pattern.
$this->mNewSectionLink = $parserOutput->getNewSection();
$this->mHideNewSectionLink = $parserOutput->getHideNewSection();
+ $this->mNoGallery = $parserOutput->getNoGallery();
if ( !$parserOutput->isCacheable() ) {
$this->enableClientCache( false );
}
- $this->mNoGallery = $parserOutput->getNoGallery();
$this->mHeadItems = array_merge( $this->mHeadItems, $parserOutput->getHeadItems() );
$this->addModules( $parserOutput->getModules() );
$this->addModuleStyles( $parserOutput->getModuleStyles() );
@@ -1969,6 +1997,9 @@ class OutputPage extends ContextSource {
}
// Include parser limit report
+ // FIXME: This should append, rather than overwrite, or else this
+ // data should be injected into the OutputPage like is done for the
+ // other page-level things (like OutputPage::setSections()).
if ( !$this->limitReportJSData ) {
$this->limitReportJSData = $parserOutput->getLimitReportJSData();
}
diff --git a/includes/api/ApiParse.php b/includes/api/ApiParse.php
index aa3a13521ce..3000632ed65 100644
--- a/includes/api/ApiParse.php
+++ b/includes/api/ApiParse.php
@@ -491,12 +491,17 @@ class ApiParse extends ApiBase {
}
if ( isset( $prop['text'] ) ) {
+ $skin = $context ? $context->getSkin() : null;
+ $options = $skin ? $skin->getOptions() : [
+ 'toc' => true,
+ ];
$result_array['text'] = $p_result->getText( [
'allowTOC' => !$params['disabletoc'],
+ 'injectTOC' => $options['toc'],
'enableSectionEditLinks' => !$params['disableeditsection'],
'wrapperDivClass' => $params['wrapoutputclass'],
'deduplicateStyles' => !$params['disablestylededuplication'],
- 'skin' => $context ? $context->getSkin() : null,
+ 'skin' => $skin,
] );
$result_array[ApiResult::META_BC_SUBELEMENTS][] = 'text';
if ( $context ) {
diff --git a/includes/page/Article.php b/includes/page/Article.php
index 67af54ea783..7a13f1f16a8 100644
--- a/includes/page/Article.php
+++ b/includes/page/Article.php
@@ -744,8 +744,13 @@ class Article implements Page {
# Ensure that UI elements requiring revision ID have
# the correct version information.
$outputPage->setRevisionId( $pOutput->getCacheRevisionId() ?? $this->getRevIdFetched() );
-
- $outputPage->addParserOutput( $pOutput, $textOptions );
+ # Ensure that the skin has the necessary ToC information
+ # (and do this before OutputPage::addParserOutput() calls the
+ # OutputPageParserOutput hook)
+ $outputPage->setSections( $pOutput->getSections() );
+ $outputPage->addParserOutput( $pOutput, $textOptions + [
+ 'injectTOC' => true,
+ ] );
# Preload timestamp to avoid a DB hit
$cachedTimestamp = $pOutput->getTimestamp();
if ( $cachedTimestamp !== null ) {
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 1f20b91bc23..10660b4058a 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -149,10 +149,46 @@ class Parser {
public const MARKER_SUFFIX = "-QINU`\"'\x7f";
public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
- # Markers used for wrapping the table of contents
+ /**
+ * Internal Markers used for wrapping the table of contents.
+ *
+ * The use of the `mw:` prefix makes sure that the table of contents is
+ * identified as a block element, and prevents the introduction of `p` tags
+ * wrapping the table of contents; see BlockLevelPass.
+ *
+ * @var string
+ * @deprecated since 1.38. These markers are used in old cached
+ * content but not generated from the current parser (or from Parsoid).
+ * The constants will be removed in a future MediaWiki release.
+ */
public const TOC_START = '';
+
+ /**
+ * See ::TOC_START
+ * @var string
+ * @deprecated since 1.38. See ::TOC_START
+ */
public const TOC_END = '';
+ /**
+ * Internal marker used by parser to track where the table of
+ * contents should be. Various magic words can change the position
+ * during the parse. The table of contents is generated during
+ * the parse, however skins have the final decision on whether the
+ * table of contents is injected. This placeholder element
+ * identifies where in the page the table of contents should be
+ * injected, if at all.
+ * @var string
+ * @see Keep this in sync with BlockLevelPass::execute() and
+ * RemexCompatMunger::isTableOfContentsMarker()
+ * @internal This will be made private as soon as old content
+ * has expired from the cache (at the moment it is needed in
+ * ParserOutput for a compatibility fallback). Skins should
+ * *not* directly reference TOC_PLACEHOLDER but instead use
+ * Parser::replaceTableOfContentsMarker().
+ */
+ public const TOC_PLACEHOLDER = '';
+
# Persistent:
private $mTagHooks = [];
private $mFunctionHooks = [];
@@ -4049,7 +4085,7 @@ class Parser {
$this->mForceTocPosition = true;
# Set a placeholder. At the end we'll fill it in with the TOC.
- $text = $mw->replace( '', $text, 1 );
+ $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
# Only keep the first one.
$text = $mw->replace( '', $text );
@@ -4457,7 +4493,6 @@ class Parser {
}
$toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
$this->mOutput->setTOCHTML( $toc );
- $toc = self::TOC_START . $toc . self::TOC_END;
}
if ( $isMain ) {
@@ -4496,16 +4531,12 @@ class Parser {
if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
// append the TOC at the beginning
// Top anchor now in skin
- $sections[0] .= $toc . "\n";
+ $sections[0] .= self::TOC_PLACEHOLDER . "\n";
}
$full .= implode( '', $sections );
- if ( $this->mForceTocPosition ) {
- return str_replace( '', $toc, $full );
- } else {
- return $full;
- }
+ return $full;
}
/**
@@ -4766,6 +4797,30 @@ class Parser {
return $text;
}
+ /**
+ * Replace table of contents marker in parsed HTML.
+ *
+ * Used to remove or replace the marker. This method should be
+ * used instead of direct access to Parser::TOC_PLACEHOLDER, since
+ * in the future the placeholder might have additional attributes
+ * attached which should be ignored when the replacement is made.
+ *
+ * @since 1.38
+ * @stable
+ *
+ * @param string $text Parsed HTML
+ * @param string $toc HTML table of contents string, or else an empty
+ * string to remove the marker.
+ * @return string Result HTML
+ */
+ public static function replaceTableOfContentsMarker( $text, $toc ) {
+ return str_replace(
+ self::TOC_PLACEHOLDER,
+ $toc,
+ $text
+ );
+ }
+
/**
* Set up some variables which are usually set up in parse()
* so that an external function can call some class members with confidence
diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php
index 5a7f6346f49..19ef812ec5c 100644
--- a/includes/parser/ParserOutput.php
+++ b/includes/parser/ParserOutput.php
@@ -333,6 +333,10 @@ class ParserOutput extends CacheTime {
* - allowTOC: (bool) Show the TOC, assuming there were enough headings
* to generate one and `__NOTOC__` wasn't used. Default is true,
* but might be statefully overridden.
+ * - injectTOC: (bool) Replace the TOC_PLACEHOLDER with TOC contents;
+ * otherwise the marker will be left in the article (and the skin
+ * will be responsible for replacing or removing it). Default is
+ * true.
* - enableSectionEditLinks: (bool) Include section edit links, assuming
* section edit link tokens are present in the HTML. Default is true,
* but might be statefully overridden.
@@ -356,6 +360,7 @@ class ParserOutput extends CacheTime {
public function getText( $options = [] ) {
$options += [
'allowTOC' => true,
+ 'injectTOC' => true,
'enableSectionEditLinks' => true,
'skin' => null,
'unwrap' => false,
@@ -371,7 +376,9 @@ class ParserOutput extends CacheTime {
}
if ( $options['enableSectionEditLinks'] ) {
- // TODO: Passing the skin should be required
+ // TODO: Skin should not be required.
+ // It would be better to define one or more narrow interfaces to use here,
+ // so this code doesn't have to depend on all of Skin.
$skin = $options['skin'] ?: RequestContext::getMain()->getSkin();
$text = preg_replace_callback(
@@ -409,8 +416,32 @@ class ParserOutput extends CacheTime {
}
if ( $options['allowTOC'] ) {
- $text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text );
+ if ( $options['injectTOC'] ) {
+ // XXX Use DI to inject this once ::getText() is moved out
+ // of ParserOutput.
+ $tidy = MediaWikiServices::getInstance()->getTidy();
+ $toc = $tidy->tidy(
+ $this->getTOCHTML(),
+ [ Sanitizer::class, 'armorFrenchSpaces' ]
+ );
+ $text = Parser::replaceTableOfContentsMarker( $text, $toc );
+ // The line below can be removed once old content has expired
+ // from the parser cache
+ $text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text );
+ } else {
+ // The line below can be removed once old content has expired
+ // from the parser cache (and Parser::TOC_PLACEHOLDER should
+ // then be made private)
+ $text = preg_replace(
+ '#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s',
+ Parser::TOC_PLACEHOLDER,
+ $text
+ );
+ }
} else {
+ $text = Parser::replaceTableOfContentsMarker( $text, '' );
+ // The line below can be removed once old content has expired
+ // from the parser cache
$text = preg_replace(
'#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s',
'',
@@ -634,6 +665,9 @@ class ParserOutput extends CacheTime {
return $this->mTitleText;
}
+ /**
+ * @return array
+ */
public function getSections() {
return $this->mSections;
}
diff --git a/includes/skins/Skin.php b/includes/skins/Skin.php
index 15f729f097e..7f8eac403ac 100644
--- a/includes/skins/Skin.php
+++ b/includes/skins/Skin.php
@@ -90,6 +90,40 @@ abstract class Skin extends ContextSource {
return self::VERSION_MAJOR;
}
+ /**
+ * Enriches section data with has-subsections and is-last-subsection
+ * properties so that the table of contents can be rendered in Mustache.
+ *
+ * Example Mustache template code:
+ *
+ * {{#array-sections}}
+ * - {{number}} {{line}}
+ * {{#has-subsections}}
{{/has-subsections}}
+ * {{^has-subsections}}
{{/has-subsections}}
+ * {{#is-last-item}}
{{/is-last-item}}
+ * {{/array-sections}}
+ *
+ *
+ * @return array
+ */
+ private function getSectionsData(): array {
+ $sections = $this->getOutput()->getSections();
+ $data = [];
+ $parent = null;
+ $lastLevel = 0;
+ foreach ( $sections as $i => $section ) {
+ $nextSection = $sections[$i + 1] ?? null;
+ $level = $section['toclevel'];
+
+ $data[] = $section + [
+ 'has-subsections' => $nextSection !== null && $nextSection['toclevel'] > $level,
+ 'is-last-item' => $nextSection === null || $nextSection['toclevel'] < $level,
+ ];
+ }
+
+ return $data;
+ }
+
/**
* Subclasses may extend this method to add additional
* template data.
@@ -113,6 +147,9 @@ abstract class Skin extends ContextSource {
$out = $this->getOutput();
$user = $this->getUser();
$data = [
+ // Array values
+ 'array-sections' => $this->getSectionsData(),
+
// Boolean values
'is-anon' => $user->isAnon(),
'is-article' => $out->isArticle(),
@@ -185,6 +222,10 @@ abstract class Skin extends ContextSource {
* tag can be set on the skin. Note, users can disable this feature via user
* preference.
* `link` an array of link options that will be used in makeLink calls. See Skin::makeLink
+ * `toc` Whether a table of contents is included in the main article
+ * content area. It defaults to `true`, but if your skins wants to
+ * place a table of contents elsewhere (for example, in a sidebar),
+ * set `toc` to `false`.
*/
public function __construct( $options = null ) {
if ( is_string( $options ) ) {
@@ -199,6 +240,7 @@ abstract class Skin extends ContextSource {
if ( isset( $options['link'] ) ) {
$this->defaultLinkOptions = $options['link'];
}
+ // Defaults are set in Skin::getOptions()
$this->options = $options;
$this->skinname = $name;
}
@@ -2578,8 +2620,12 @@ abstract class Skin extends ContextSource {
* @internal
* @return array Skin options passed into constructor
*/
- public function getOptions(): array {
- return $this->options;
+ final public function getOptions(): array {
+ return $this->options + [
+ // Whether the table of contents will be inserted on page views
+ // See ParserOutput::getText() for the implementation logic
+ 'toc' => true,
+ ];
}
/**
diff --git a/tests/phpunit/includes/parser/ParserOutputTest.php b/tests/phpunit/includes/parser/ParserOutputTest.php
index f02f0904b0b..86029cf8313 100644
--- a/tests/phpunit/includes/parser/ParserOutputTest.php
+++ b/tests/phpunit/includes/parser/ParserOutputTest.php
@@ -195,6 +195,7 @@ class ParserOutputTest extends MediaWikiLangTestCase {
/**
* @covers ParserOutput::getText
* @dataProvider provideGetText
+ * @dataProvider provideGetTextBackCompat
* @param array $options Options to getText()
* @param string $text Parser text
* @param string $expect Expected output
@@ -207,16 +208,15 @@ class ParserOutputTest extends MediaWikiLangTestCase {
] );
$po = new ParserOutput( $text );
+ $po->setTOCHTML( self::provideGetTextToC() );
$actual = $po->getText( $options );
$this->assertSame( $expect, $actual );
}
- public static function provideGetText() {
+ public static function provideGetTextToC() {
// phpcs:disable Generic.Files.LineLength
- $text = <<Test document.
-
-Contents
+ $toc = <<
Contents
-
+
+EOF;
+ return $toc;
+ }
+
+ // REMOVE THIS ONCE Parser::TOC_START IS REMOVED
+ public static function provideGetTextBackCompat() {
+ // phpcs:disable Generic.Files.LineLength
+ $toc = self::provideGetTextToc();
+ $text = <<Test document.
+
+$toc
+Section 1Section 1
+One
+
+Section 2Section 2
+Two
+
+Section 2.1Section 2.1
+Two point one
+
+Section 3Section 3
+Three
+
+EOF;
+
+ return [
+ 'No options (mw:toc)' => [
+ [], $text, <<Test document.
+
+
+
+Section 1[edit]
+One
+
+Section 2[edit]
+Two
+
+Section 2.1
+Two point one
+
+Section 3[edit]
+Three
+
+EOF
+ ],
+ 'Disable section edit links (mw:toc)' => [
+ [ 'enableSectionEditLinks' => false ], $text, <<Test document.
+
+
+
+Section 1
+One
+
+Section 2
+Two
+
+Section 2.1
+Two point one
+
+Section 3
+Three
+
+EOF
+ ],
+ 'Disable TOC, but wrap (mw:toc)' => [
+ [ 'allowTOC' => false, 'wrapperDivClass' => 'mw-parser-output' ], $text, <<Test document.
+
+
+Section 1[edit]
+One
+
+Section 2[edit]
+Two
+
+Section 2.1
+Two point one
+
+Section 3[edit]
+Three
+
+EOF
+ ],
+ ];
+ // phpcs:enable
+ }
+
+ public static function provideGetText() {
+ // phpcs:disable Generic.Files.LineLength
+ $toc = self::provideGetTextToc();
+ $text = <<Test document.
+
+
Section 1Section 1
One
diff --git a/tests/phpunit/includes/skins/SkinTemplateTest.php b/tests/phpunit/includes/skins/SkinTemplateTest.php
index 562547dfd1c..20e2a0600ee 100644
--- a/tests/phpunit/includes/skins/SkinTemplateTest.php
+++ b/tests/phpunit/includes/skins/SkinTemplateTest.php
@@ -205,6 +205,126 @@ class SkinTemplateTest extends MediaWikiIntegrationTestCase {
);
}
+ public function provideGetSectionsData(): array {
+ $END_SUBSECTION = [
+ 'is-last-item' => true,
+ ];
+ $NOT_END_SUBSECTION = [
+ 'is-last-item' => false,
+ ];
+ $WITH_SUBSECTIONS = [
+ 'has-subsections' => true,
+ ];
+ $WITHOUT_SUBSECTIONS = [
+ 'has-subsections' => false,
+ ];
+ // byteoffset and fromtitle are redacted from this test.
+ $SECTION_1 = [
+ 'toclevel' => 1,
+ 'line' => 'Section 1',
+ 'anchor' => 'section_1',
+ ];
+ $SECTION_1_1 = [
+ 'toclevel' => 2,
+ 'line' => 'Section 1.1',
+ 'anchor' => 'section_1_1',
+ ];
+ $SECTION_1_2 = [
+ 'toclevel' => 2,
+ 'line' => 'Section 1.2',
+ 'anchor' => 'section_1_2',
+ ];
+ $SECTION_1_2_1 = [
+ 'toclevel' => 3,
+ 'line' => 'Section 1.2.1',
+ 'anchor' => 'section_1_2_1',
+ ];
+ $SECTION_1_3 = [
+ 'toclevel' => 2,
+ 'line' => 'Section 1.3',
+ 'anchor' => 'section_1_3',
+ ];
+ $SECTION_2 = [
+ 'toclevel' => 1,
+ 'line' => 'Section 2',
+ 'anchor' => 'section_2',
+ ];
+
+ return [
+ [
+ // sections data
+ [],
+ []
+ ],
+ [
+ [
+ $SECTION_1,
+ $SECTION_2,
+ ],
+ [
+ $SECTION_1 + $NOT_END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ ]
+ ],
+ [
+ [
+ $SECTION_1,
+ $SECTION_1_1,
+ $SECTION_2,
+ ],
+ [
+ $SECTION_1 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS,
+ $SECTION_1_1 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ ]
+ ],
+ [
+ [
+ $SECTION_1,
+ $SECTION_1_1,
+ $SECTION_1_2,
+ $SECTION_1_2_1,
+ $SECTION_1_3,
+ $SECTION_2,
+ ],
+ [
+ $SECTION_1 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS,
+ $SECTION_1_1 + $NOT_END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ $SECTION_1_2 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS,
+ $SECTION_1_2_1 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ $SECTION_1_3 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS,
+ ]
+ ]
+ ];
+ }
+
+ /**
+ * @covers Skin::getSectionsData
+ * @dataProvider provideGetSectionsData
+ *
+ * @param array $sectionsData
+ * @param array $expected
+ */
+ public function testGetSectionsData( $sectionsData, $expected ) {
+ $skin = new SkinTemplate();
+ $context = new DerivativeContext( $skin->getContext() );
+ $mock = $this->createMock( OutputPage::class );
+ $mock->expects( $this->any() )
+ ->method( 'getSections' )
+ ->willReturn( $sectionsData );
+
+ $reflectionMethod = new ReflectionMethod( Skin::class, 'getSectionsData' );
+ $reflectionMethod->setAccessible( true );
+
+ $context->setOutput( $mock );
+ $skin->setContext( $context );
+ $data = $reflectionMethod->invoke(
+ $skin
+ );
+ $this->assertEquals( $data, $expected );
+ }
+
public function provideContentNavigation(): array {
return [
'No userpage set' => [