From 24949480eb22947298e6a82db06c55af0d262db2 Mon Sep 17 00:00:00 2001 From: jdlrobson Date: Tue, 14 Sep 2021 18:00:06 -0700 Subject: [PATCH] Give skins more flexibility over table of contents render * Do not store table of contents in parser output * Instead inject table of contents via strpos where needed inside Article based on Skin "toc" option * Use as a TOC placeholder; for Parsoid compatibility this will be replaced with a tag in a followup patch. Bug: T287767 Change-Id: I44045b3b9e78e7ab793da3f37e3c0dbc91cd7d39 --- includes/Linker.php | 2 +- includes/OutputPage.php | 35 ++++- includes/api/ApiParse.php | 7 +- includes/page/Article.php | 9 +- includes/parser/Parser.php | 73 ++++++++-- includes/parser/ParserOutput.php | 38 +++++- includes/skins/Skin.php | 50 ++++++- .../includes/parser/ParserOutputTest.php | 129 +++++++++++++++++- .../includes/skins/SkinTemplateTest.php | 120 ++++++++++++++++ 9 files changed, 438 insertions(+), 25 deletions(-) diff --git a/includes/Linker.php b/includes/Linker.php index 750b0456bb1..d0b02c4c4be 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -1644,7 +1644,7 @@ class Linker { 'class' => 'toctogglelabel', ] ) . '' - . "\n" + . "" . $toc . "\n\n"; } diff --git a/includes/OutputPage.php b/includes/OutputPage.php index e61833cabff..1df9b2c50c6 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -103,6 +103,11 @@ class OutputPage extends ContextSource { */ private $mPrintable = false; + /** + * @var array sections from ParserOutput + */ + private $mSections = []; + /** * @var array Contains the page subtitle. Special pages usually have some * links here. Don't confuse with site subtitle added by skins. @@ -1880,10 +1885,28 @@ class OutputPage extends ContextSource { ] ); } + /** + * Adds sections to OutputPage from ParserOutput + * @param array $sections + * @internal For use by Article.php + */ + public function setSections( array $sections ) { + $this->mSections = $sections; + } + + /** + * @internal For usage in Skin::getSectionsData() only. + * @return array Array of sections. + * Empty if OutputPage::setSections() has not been called. + */ + public function getSections(): array { + return $this->mSections; + } + /** * Add all metadata associated with a ParserOutput object, but without the actual HTML. This * includes categories, language links, ResourceLoader modules, effects of certain magic words, - * and so on. + * and so on. It does *not* include section information. * * @since 1.24 * @param ParserOutput $parserOutput @@ -1893,13 +1916,18 @@ class OutputPage extends ContextSource { array_merge( $this->mLanguageLinks, $parserOutput->getLanguageLinks() ); $this->addCategoryLinks( $parserOutput->getCategories() ); $this->setIndicators( $parserOutput->getIndicators() ); + + // FIXME: Best practice is for OutputPage to be an accumulator, as + // addParserOutputMetadata() may be called multiple times, but the + // following lines overwrite any previous data. These should + // be migrated to an injection pattern. $this->mNewSectionLink = $parserOutput->getNewSection(); $this->mHideNewSectionLink = $parserOutput->getHideNewSection(); + $this->mNoGallery = $parserOutput->getNoGallery(); if ( !$parserOutput->isCacheable() ) { $this->enableClientCache( false ); } - $this->mNoGallery = $parserOutput->getNoGallery(); $this->mHeadItems = array_merge( $this->mHeadItems, $parserOutput->getHeadItems() ); $this->addModules( $parserOutput->getModules() ); $this->addModuleStyles( $parserOutput->getModuleStyles() ); @@ -1969,6 +1997,9 @@ class OutputPage extends ContextSource { } // Include parser limit report + // FIXME: This should append, rather than overwrite, or else this + // data should be injected into the OutputPage like is done for the + // other page-level things (like OutputPage::setSections()). if ( !$this->limitReportJSData ) { $this->limitReportJSData = $parserOutput->getLimitReportJSData(); } diff --git a/includes/api/ApiParse.php b/includes/api/ApiParse.php index aa3a13521ce..3000632ed65 100644 --- a/includes/api/ApiParse.php +++ b/includes/api/ApiParse.php @@ -491,12 +491,17 @@ class ApiParse extends ApiBase { } if ( isset( $prop['text'] ) ) { + $skin = $context ? $context->getSkin() : null; + $options = $skin ? $skin->getOptions() : [ + 'toc' => true, + ]; $result_array['text'] = $p_result->getText( [ 'allowTOC' => !$params['disabletoc'], + 'injectTOC' => $options['toc'], 'enableSectionEditLinks' => !$params['disableeditsection'], 'wrapperDivClass' => $params['wrapoutputclass'], 'deduplicateStyles' => !$params['disablestylededuplication'], - 'skin' => $context ? $context->getSkin() : null, + 'skin' => $skin, ] ); $result_array[ApiResult::META_BC_SUBELEMENTS][] = 'text'; if ( $context ) { diff --git a/includes/page/Article.php b/includes/page/Article.php index 67af54ea783..7a13f1f16a8 100644 --- a/includes/page/Article.php +++ b/includes/page/Article.php @@ -744,8 +744,13 @@ class Article implements Page { # Ensure that UI elements requiring revision ID have # the correct version information. $outputPage->setRevisionId( $pOutput->getCacheRevisionId() ?? $this->getRevIdFetched() ); - - $outputPage->addParserOutput( $pOutput, $textOptions ); + # Ensure that the skin has the necessary ToC information + # (and do this before OutputPage::addParserOutput() calls the + # OutputPageParserOutput hook) + $outputPage->setSections( $pOutput->getSections() ); + $outputPage->addParserOutput( $pOutput, $textOptions + [ + 'injectTOC' => true, + ] ); # Preload timestamp to avoid a DB hit $cachedTimestamp = $pOutput->getTimestamp(); if ( $cachedTimestamp !== null ) { diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 1f20b91bc23..10660b4058a 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -149,10 +149,46 @@ class Parser { public const MARKER_SUFFIX = "-QINU`\"'\x7f"; public const MARKER_PREFIX = "\x7f'\"`UNIQ-"; - # Markers used for wrapping the table of contents + /** + * Internal Markers used for wrapping the table of contents. + * + * The use of the `mw:` prefix makes sure that the table of contents is + * identified as a block element, and prevents the introduction of `p` tags + * wrapping the table of contents; see BlockLevelPass. + * + * @var string + * @deprecated since 1.38. These markers are used in old cached + * content but not generated from the current parser (or from Parsoid). + * The constants will be removed in a future MediaWiki release. + */ public const TOC_START = ''; + + /** + * See ::TOC_START + * @var string + * @deprecated since 1.38. See ::TOC_START + */ public const TOC_END = ''; + /** + * Internal marker used by parser to track where the table of + * contents should be. Various magic words can change the position + * during the parse. The table of contents is generated during + * the parse, however skins have the final decision on whether the + * table of contents is injected. This placeholder element + * identifies where in the page the table of contents should be + * injected, if at all. + * @var string + * @see Keep this in sync with BlockLevelPass::execute() and + * RemexCompatMunger::isTableOfContentsMarker() + * @internal This will be made private as soon as old content + * has expired from the cache (at the moment it is needed in + * ParserOutput for a compatibility fallback). Skins should + * *not* directly reference TOC_PLACEHOLDER but instead use + * Parser::replaceTableOfContentsMarker(). + */ + public const TOC_PLACEHOLDER = ''; + # Persistent: private $mTagHooks = []; private $mFunctionHooks = []; @@ -4049,7 +4085,7 @@ class Parser { $this->mForceTocPosition = true; # Set a placeholder. At the end we'll fill it in with the TOC. - $text = $mw->replace( '', $text, 1 ); + $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 ); # Only keep the first one. $text = $mw->replace( '', $text ); @@ -4457,7 +4493,6 @@ class Parser { } $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); $this->mOutput->setTOCHTML( $toc ); - $toc = self::TOC_START . $toc . self::TOC_END; } if ( $isMain ) { @@ -4496,16 +4531,12 @@ class Parser { if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { // append the TOC at the beginning // Top anchor now in skin - $sections[0] .= $toc . "\n"; + $sections[0] .= self::TOC_PLACEHOLDER . "\n"; } $full .= implode( '', $sections ); - if ( $this->mForceTocPosition ) { - return str_replace( '', $toc, $full ); - } else { - return $full; - } + return $full; } /** @@ -4766,6 +4797,30 @@ class Parser { return $text; } + /** + * Replace table of contents marker in parsed HTML. + * + * Used to remove or replace the marker. This method should be + * used instead of direct access to Parser::TOC_PLACEHOLDER, since + * in the future the placeholder might have additional attributes + * attached which should be ignored when the replacement is made. + * + * @since 1.38 + * @stable + * + * @param string $text Parsed HTML + * @param string $toc HTML table of contents string, or else an empty + * string to remove the marker. + * @return string Result HTML + */ + public static function replaceTableOfContentsMarker( $text, $toc ) { + return str_replace( + self::TOC_PLACEHOLDER, + $toc, + $text + ); + } + /** * Set up some variables which are usually set up in parse() * so that an external function can call some class members with confidence diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 5a7f6346f49..19ef812ec5c 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -333,6 +333,10 @@ class ParserOutput extends CacheTime { * - allowTOC: (bool) Show the TOC, assuming there were enough headings * to generate one and `__NOTOC__` wasn't used. Default is true, * but might be statefully overridden. + * - injectTOC: (bool) Replace the TOC_PLACEHOLDER with TOC contents; + * otherwise the marker will be left in the article (and the skin + * will be responsible for replacing or removing it). Default is + * true. * - enableSectionEditLinks: (bool) Include section edit links, assuming * section edit link tokens are present in the HTML. Default is true, * but might be statefully overridden. @@ -356,6 +360,7 @@ class ParserOutput extends CacheTime { public function getText( $options = [] ) { $options += [ 'allowTOC' => true, + 'injectTOC' => true, 'enableSectionEditLinks' => true, 'skin' => null, 'unwrap' => false, @@ -371,7 +376,9 @@ class ParserOutput extends CacheTime { } if ( $options['enableSectionEditLinks'] ) { - // TODO: Passing the skin should be required + // TODO: Skin should not be required. + // It would be better to define one or more narrow interfaces to use here, + // so this code doesn't have to depend on all of Skin. $skin = $options['skin'] ?: RequestContext::getMain()->getSkin(); $text = preg_replace_callback( @@ -409,8 +416,32 @@ class ParserOutput extends CacheTime { } if ( $options['allowTOC'] ) { - $text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text ); + if ( $options['injectTOC'] ) { + // XXX Use DI to inject this once ::getText() is moved out + // of ParserOutput. + $tidy = MediaWikiServices::getInstance()->getTidy(); + $toc = $tidy->tidy( + $this->getTOCHTML(), + [ Sanitizer::class, 'armorFrenchSpaces' ] + ); + $text = Parser::replaceTableOfContentsMarker( $text, $toc ); + // The line below can be removed once old content has expired + // from the parser cache + $text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text ); + } else { + // The line below can be removed once old content has expired + // from the parser cache (and Parser::TOC_PLACEHOLDER should + // then be made private) + $text = preg_replace( + '#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s', + Parser::TOC_PLACEHOLDER, + $text + ); + } } else { + $text = Parser::replaceTableOfContentsMarker( $text, '' ); + // The line below can be removed once old content has expired + // from the parser cache $text = preg_replace( '#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s', '', @@ -634,6 +665,9 @@ class ParserOutput extends CacheTime { return $this->mTitleText; } + /** + * @return array + */ public function getSections() { return $this->mSections; } diff --git a/includes/skins/Skin.php b/includes/skins/Skin.php index 15f729f097e..7f8eac403ac 100644 --- a/includes/skins/Skin.php +++ b/includes/skins/Skin.php @@ -90,6 +90,40 @@ abstract class Skin extends ContextSource { return self::VERSION_MAJOR; } + /** + * Enriches section data with has-subsections and is-last-subsection + * properties so that the table of contents can be rendered in Mustache. + * + * Example Mustache template code: + * + * + * @return array + */ + private function getSectionsData(): array { + $sections = $this->getOutput()->getSections(); + $data = []; + $parent = null; + $lastLevel = 0; + foreach ( $sections as $i => $section ) { + $nextSection = $sections[$i + 1] ?? null; + $level = $section['toclevel']; + + $data[] = $section + [ + 'has-subsections' => $nextSection !== null && $nextSection['toclevel'] > $level, + 'is-last-item' => $nextSection === null || $nextSection['toclevel'] < $level, + ]; + } + + return $data; + } + /** * Subclasses may extend this method to add additional * template data. @@ -113,6 +147,9 @@ abstract class Skin extends ContextSource { $out = $this->getOutput(); $user = $this->getUser(); $data = [ + // Array values + 'array-sections' => $this->getSectionsData(), + // Boolean values 'is-anon' => $user->isAnon(), 'is-article' => $out->isArticle(), @@ -185,6 +222,10 @@ abstract class Skin extends ContextSource { * tag can be set on the skin. Note, users can disable this feature via user * preference. * `link` an array of link options that will be used in makeLink calls. See Skin::makeLink + * `toc` Whether a table of contents is included in the main article + * content area. It defaults to `true`, but if your skins wants to + * place a table of contents elsewhere (for example, in a sidebar), + * set `toc` to `false`. */ public function __construct( $options = null ) { if ( is_string( $options ) ) { @@ -199,6 +240,7 @@ abstract class Skin extends ContextSource { if ( isset( $options['link'] ) ) { $this->defaultLinkOptions = $options['link']; } + // Defaults are set in Skin::getOptions() $this->options = $options; $this->skinname = $name; } @@ -2578,8 +2620,12 @@ abstract class Skin extends ContextSource { * @internal * @return array Skin options passed into constructor */ - public function getOptions(): array { - return $this->options; + final public function getOptions(): array { + return $this->options + [ + // Whether the table of contents will be inserted on page views + // See ParserOutput::getText() for the implementation logic + 'toc' => true, + ]; } /** diff --git a/tests/phpunit/includes/parser/ParserOutputTest.php b/tests/phpunit/includes/parser/ParserOutputTest.php index f02f0904b0b..86029cf8313 100644 --- a/tests/phpunit/includes/parser/ParserOutputTest.php +++ b/tests/phpunit/includes/parser/ParserOutputTest.php @@ -195,6 +195,7 @@ class ParserOutputTest extends MediaWikiLangTestCase { /** * @covers ParserOutput::getText * @dataProvider provideGetText + * @dataProvider provideGetTextBackCompat * @param array $options Options to getText() * @param string $text Parser text * @param string $expect Expected output @@ -207,16 +208,15 @@ class ParserOutputTest extends MediaWikiLangTestCase { ] ); $po = new ParserOutput( $text ); + $po->setTOCHTML( self::provideGetTextToC() ); $actual = $po->getText( $options ); $this->assertSame( $expect, $actual ); } - public static function provideGetText() { + public static function provideGetTextToC() { // phpcs:disable Generic.Files.LineLength - $text = <<Test document. -

-

Contents

+ $toc = <<

Contents

-
+ +EOF; + return $toc; + } + + // REMOVE THIS ONCE Parser::TOC_START IS REMOVED + public static function provideGetTextBackCompat() { + // phpcs:disable Generic.Files.LineLength + $toc = self::provideGetTextToc(); + $text = <<Test document. +

+$toc +

Section 1Section 1

+

One +

+

Section 2Section 2

+

Two +

+

Section 2.1Section 2.1

+

Two point one +

+

Section 3Section 3

+

Three +

+EOF; + + return [ + 'No options (mw:toc)' => [ + [], $text, <<Test document. +

+ + +

Section 1[edit]

+

One +

+

Section 2[edit]

+

Two +

+

Section 2.1

+

Two point one +

+

Section 3[edit]

+

Three +

+EOF + ], + 'Disable section edit links (mw:toc)' => [ + [ 'enableSectionEditLinks' => false ], $text, <<Test document. +

+ + +

Section 1

+

One +

+

Section 2

+

Two +

+

Section 2.1

+

Two point one +

+

Section 3

+

Three +

+EOF + ], + 'Disable TOC, but wrap (mw:toc)' => [ + [ 'allowTOC' => false, 'wrapperDivClass' => 'mw-parser-output' ], $text, <<

Test document. +

+ +

Section 1[edit]

+

One +

+

Section 2[edit]

+

Two +

+

Section 2.1

+

Two point one +

+

Section 3[edit]

+

Three +

+EOF + ], + ]; + // phpcs:enable + } + + public static function provideGetText() { + // phpcs:disable Generic.Files.LineLength + $toc = self::provideGetTextToc(); + $text = <<Test document. +

+

Section 1Section 1

One

diff --git a/tests/phpunit/includes/skins/SkinTemplateTest.php b/tests/phpunit/includes/skins/SkinTemplateTest.php index 562547dfd1c..20e2a0600ee 100644 --- a/tests/phpunit/includes/skins/SkinTemplateTest.php +++ b/tests/phpunit/includes/skins/SkinTemplateTest.php @@ -205,6 +205,126 @@ class SkinTemplateTest extends MediaWikiIntegrationTestCase { ); } + public function provideGetSectionsData(): array { + $END_SUBSECTION = [ + 'is-last-item' => true, + ]; + $NOT_END_SUBSECTION = [ + 'is-last-item' => false, + ]; + $WITH_SUBSECTIONS = [ + 'has-subsections' => true, + ]; + $WITHOUT_SUBSECTIONS = [ + 'has-subsections' => false, + ]; + // byteoffset and fromtitle are redacted from this test. + $SECTION_1 = [ + 'toclevel' => 1, + 'line' => 'Section 1', + 'anchor' => 'section_1', + ]; + $SECTION_1_1 = [ + 'toclevel' => 2, + 'line' => 'Section 1.1', + 'anchor' => 'section_1_1', + ]; + $SECTION_1_2 = [ + 'toclevel' => 2, + 'line' => 'Section 1.2', + 'anchor' => 'section_1_2', + ]; + $SECTION_1_2_1 = [ + 'toclevel' => 3, + 'line' => 'Section 1.2.1', + 'anchor' => 'section_1_2_1', + ]; + $SECTION_1_3 = [ + 'toclevel' => 2, + 'line' => 'Section 1.3', + 'anchor' => 'section_1_3', + ]; + $SECTION_2 = [ + 'toclevel' => 1, + 'line' => 'Section 2', + 'anchor' => 'section_2', + ]; + + return [ + [ + // sections data + [], + [] + ], + [ + [ + $SECTION_1, + $SECTION_2, + ], + [ + $SECTION_1 + $NOT_END_SUBSECTION + $WITHOUT_SUBSECTIONS, + $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + ] + ], + [ + [ + $SECTION_1, + $SECTION_1_1, + $SECTION_2, + ], + [ + $SECTION_1 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS, + $SECTION_1_1 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + ] + ], + [ + [ + $SECTION_1, + $SECTION_1_1, + $SECTION_1_2, + $SECTION_1_2_1, + $SECTION_1_3, + $SECTION_2, + ], + [ + $SECTION_1 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS, + $SECTION_1_1 + $NOT_END_SUBSECTION + $WITHOUT_SUBSECTIONS, + $SECTION_1_2 + $NOT_END_SUBSECTION + $WITH_SUBSECTIONS, + $SECTION_1_2_1 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + $SECTION_1_3 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + $SECTION_2 + $END_SUBSECTION + $WITHOUT_SUBSECTIONS, + ] + ] + ]; + } + + /** + * @covers Skin::getSectionsData + * @dataProvider provideGetSectionsData + * + * @param array $sectionsData + * @param array $expected + */ + public function testGetSectionsData( $sectionsData, $expected ) { + $skin = new SkinTemplate(); + $context = new DerivativeContext( $skin->getContext() ); + $mock = $this->createMock( OutputPage::class ); + $mock->expects( $this->any() ) + ->method( 'getSections' ) + ->willReturn( $sectionsData ); + + $reflectionMethod = new ReflectionMethod( Skin::class, 'getSectionsData' ); + $reflectionMethod->setAccessible( true ); + + $context->setOutput( $mock ); + $skin->setContext( $context ); + $data = $reflectionMethod->invoke( + $skin + ); + $this->assertEquals( $data, $expected ); + } + public function provideContentNavigation(): array { return [ 'No userpage set' => [