diff --git a/includes/linker/Linker.php b/includes/linker/Linker.php index 3b6c5e0a12b..746512caaeb 100644 --- a/includes/linker/Linker.php +++ b/includes/linker/Linker.php @@ -49,6 +49,7 @@ use TitleValue; use User; use WatchedItem; use Wikimedia\IPUtils; +use Wikimedia\Parsoid\Core\SectionMetadata; use Wikimedia\Rdbms\SelectQueryBuilder; use Xml; @@ -1685,12 +1686,16 @@ class Linker { * @param string $tocline * @param string $tocnumber * @param int $level - * @param int|false $sectionIndex + * @param string|false $sectionIndex * @return string */ public static function tocLine( $linkAnchor, $tocline, $tocnumber, $level, $sectionIndex = false ) { $classes = "toclevel-$level"; - if ( $sectionIndex !== false ) { + + // Parser.php used to suppress tocLine by setting $sectionindex to false. + // In those circumstances, we can now encounter '' or a "T-" prefixed index + // for when the section comes from templates. + if ( $sectionIndex !== false && $sectionIndex !== '' && !str_starts_with( $sectionIndex, "T-" ) ) { $classes .= " tocsection-$sectionIndex"; } @@ -1758,29 +1763,43 @@ class Linker { * Generate a table of contents from a section tree. * * @since 1.16.3. $lang added in 1.17 - * @param array[] $tree Return value of ParserOutput::getSections() + * @param array[]|SectionMetadata[] $tree Return value of ParserOutput::getSections() * @param Language|null $lang Language for the toc title, defaults to user language + * @param array $options FIXME: Document * @return string HTML fragment */ - public static function generateTOC( $tree, Language $lang = null ) { + public static function generateTOC( $tree, Language $lang = null, array $options = [] ): string { $toc = ''; $lastLevel = 0; + $maxTocLevel = $options['maxtoclevel'] ?? null; foreach ( $tree as $section ) { - if ( $section['toclevel'] > $lastLevel ) { - $toc .= self::tocIndent(); - } elseif ( $section['toclevel'] < $lastLevel ) { - $toc .= self::tocUnindent( - $lastLevel - $section['toclevel'] ); - } else { - $toc .= self::tocLineEnd(); + if ( $section instanceof SectionMetadata ) { + $section = $section->toLegacy(); } + $tocLevel = $section['toclevel']; + if ( $maxTocLevel !== null && $tocLevel < $maxTocLevel ) { + if ( $tocLevel > $lastLevel ) { + $toc .= self::tocIndent(); + } elseif ( $tocLevel < $lastLevel ) { + if ( $lastLevel < $maxTocLevel ) { + $toc .= self::tocUnindent( + $lastLevel - $tocLevel ); + } else { + $toc .= self::tocLineEnd(); + } + } else { + $toc .= self::tocLineEnd(); + } - $toc .= self::tocLine( $section['linkAnchor'], - $section['line'], $section['number'], - $section['toclevel'], $section['index'] ); - $lastLevel = $section['toclevel']; + $toc .= self::tocLine( $section['linkAnchor'], + $section['line'], $section['number'], + $tocLevel, $section['index'] ); + $lastLevel = $tocLevel; + } + } + if ( $lastLevel < $maxTocLevel && $lastLevel > 0 ) { + $toc .= self::tocUnindent( $lastLevel - 1 ); } - $toc .= self::tocLineEnd(); return self::tocList( $toc, $lang ); } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index efe3ff2cc9c..fd3a04a4c67 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4225,11 +4225,10 @@ class Parser { # headline counter $headlineCount = 0; - $numVisible = 0; + $haveTocEntries = false; # Ugh .. the TOC should have neat indentation levels which can be # passed to the skin functions. These are determined here - $toc = ''; $full = ''; $head = []; $sublevelCount = []; @@ -4237,7 +4236,6 @@ class Parser { $level = 0; $prevlevel = 0; $toclevel = 0; - $prevtoclevel = 0; $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->getTitle()->getPrefixedDBkey(); $oldType = $this->mOutputType; @@ -4275,9 +4273,7 @@ class Parser { $toclevel++; $sublevelCount[$toclevel] = 0; if ( $toclevel < $maxTocLevel ) { - $prevtoclevel = $toclevel; - $toc .= Linker::tocIndent(); - $numVisible++; + $haveTocEntries = true; } } elseif ( $level < $prevlevel && $toclevel > 1 ) { # Decrease TOC level, find level to jump to @@ -4298,20 +4294,6 @@ class Parser { if ( $i == 0 ) { $toclevel = 1; } - if ( $toclevel < $maxTocLevel ) { - if ( $prevtoclevel < $maxTocLevel ) { - # Unindent only if the previous toc level was shown :p - $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); - $prevtoclevel = $toclevel; - } else { - $toc .= Linker::tocLineEnd(); - } - } - } else { - # No change in level, end TOC line - if ( $toclevel < $maxTocLevel ) { - $toc .= Linker::tocLineEnd(); - } } $levelCount[$toclevel] = $level; @@ -4422,16 +4404,6 @@ class Parser { $refers[$fallbackArrayKey] = true; } - if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) { - $toc .= Linker::tocLine( - $linkAnchor, - $tocline, - $numbering, - $toclevel, - ( $isTemplate ? false : $sectionIndex ) - ); - } - # Add the section to the section tree # Find the DOM node for this header $noOffset = ( $isTemplate || $sectionIndex === false ); @@ -4454,7 +4426,7 @@ class Parser { $level, $tocline, $numbering, - ( $isTemplate ? 'T-' : '' ) . $sectionIndex, + $sectionIndex === false ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex ), $titleText ?: null, ( $noOffset ? null : $byteOffset ), $anchor, @@ -4508,16 +4480,11 @@ class Parser { # Never ever show TOC if no headers (or suppressed) $suppressToc = $this->mOptions->getSuppressTOC(); - if ( $numVisible < 1 || $suppressToc ) { + if ( !$haveTocEntries || $suppressToc ) { $enoughToc = false; } if ( $enoughToc ) { - if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) { - $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); - } - $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); - $this->mOutput->setTOCHTML( $toc ); // Record the fact that the TOC should be shown. T294950 // (We shouldn't be looking at ::getTOCHTML() for this because // eventually that will be replaced (T293513) and diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 51d1ccbf680..3103b03ece8 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -382,6 +382,9 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector { * - enableSectionEditLinks: (bool) Include section edit links, assuming * section edit link tokens are present in the HTML. Default is true, * but might be statefully overridden. + * - userLang: (Language) Language object used for localizing UX messages, + * for example the heading of the table of contents. If omitted, will + * use the language of the main request context. * - skin: (Skin) Skin object used for transforming section edit links. * - unwrap: (bool) Return text without a wrapper div. Default is false, * meaning a wrapper div will be added if getWrapperDivClass() returns @@ -406,6 +409,7 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector { 'injectTOC' => true, 'enableSectionEditLinks' => true, 'skin' => null, + 'userLang' => null, 'unwrap' => false, 'deduplicateStyles' => true, 'wrapperDivClass' => $this->getWrapperDivClass(), @@ -466,37 +470,46 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector { if ( $options['allowTOC'] ) { if ( $options['injectTOC'] ) { - $toc = $this->getTOCHTML(); - // language conversion needs to be done on the TOC fetched - // from parser cache - if ( !$this->getOutputFlag( ParserOutputFlags::NO_TOC_CONVERSION ) ) { - // XXX Use DI to inject this once ::getText() is moved out - // of ParserOutput + if ( count( $this->getSections() ) === 0 ) { + $toc = ''; + } else { $services = MediaWikiServices::getInstance(); - $languageFactory = - $services->getLanguageFactory(); - $languageConverterFactory = - $services->getLanguageConverterFactory(); - // T303329: this should migrate out of extension data - $langCode = $this->getExtensionData( 'core:target-lang' ) - // This is a temporary fallback while the ParserCache fills - ?? $services->getContentLanguage()->getCode(); - $langConv = $languageConverterFactory->getLanguageConverter( - $languageFactory->getLanguage( $langCode ) - ); - $variant = $this->getExtensionData( 'core:target-lang-variant' ) - // This is a temporary fallback while the ParserCache fills - ?? $langConv->getPreferredVariant(); - $toc = $langConv->convertTo( $toc, $variant ); - } + $userLang = $options['userLang']; + $skin = $options['skin']; + if ( ( !$userLang ) && $skin ) { + // TODO: See above comment about replacing the use + // of 'skin' here. + $userLang = $skin->getLanguage(); + } + if ( !$userLang ) { + $userLang = RequestContext::getMain()->getLanguage(); + } + $config = $services->getMainConfig(); + $maxTocLevel = $config->get( MainConfigNames::MaxTocLevel ); + $toc = Linker::generateTOC( + $this->getSections(), $userLang, [ "maxtoclevel" => $maxTocLevel ] ); + // language conversion needs to be done on the TOC fetched + // from parser cache + if ( !$this->getOutputFlag( ParserOutputFlags::NO_TOC_CONVERSION ) ) { + $languageFactory = $services->getLanguageFactory(); + $languageConverterFactory = $services->getLanguageConverterFactory(); + // T303329: this should migrate out of extension data + $langCode = $this->getExtensionData( 'core:target-lang' ) + // This is a temporary fallback while the ParserCache fills + ?? $services->getContentLanguage()->getCode(); + $langConv = $languageConverterFactory->getLanguageConverter( + $languageFactory->getLanguage( $langCode ) + ); + $variant = $this->getExtensionData( 'core:target-lang-variant' ) + // This is a temporary fallback while the ParserCache fills + ?? $langConv->getPreferredVariant(); + $toc = $langConv->convertTo( $toc, $variant ); + } - // XXX Use DI to inject this once ::getText() is moved out - // of ParserOutput. - $tidy = MediaWikiServices::getInstance()->getTidy(); - $toc = $tidy->tidy( - $toc, - [ Sanitizer::class, 'armorFrenchSpaces' ] - ); + // XXX Use DI to inject this once ::getText() is moved out of ParserOutput. + $toc = $services->getTidy()->tidy( $toc, [ Sanitizer::class, 'armorFrenchSpaces' ] ); + } + $this->mTOCHTML = $toc; $text = Parser::replaceTableOfContentsMarker( $text, $toc ); // The line below can be removed once old content has expired // from the parser cache @@ -946,6 +959,14 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector { return $old; } + /** + * @internal + * @deprecated since 1.40 + * T293513: We can remove this once we get rid of MW 1.38 and older + * parsercache serialization tests since those serialized + * files have artificial TOC data (which we cannot replicate + * via on-demand TOC generation). + */ public function setTOCHTML( $tochtml ) { return wfSetVar( $this->mTOCHTML, $tochtml ); } diff --git a/tests/phpunit/includes/parser/ParserOutputTest.php b/tests/phpunit/includes/parser/ParserOutputTest.php index 55280652709..284a7a31669 100644 --- a/tests/phpunit/includes/parser/ParserOutputTest.php +++ b/tests/phpunit/includes/parser/ParserOutputTest.php @@ -4,6 +4,7 @@ use MediaWiki\MainConfigNames; use MediaWiki\Page\PageReferenceValue; use MediaWiki\Tests\Parser\ParserCacheSerializationTestCases; use Wikimedia\Parsoid\Core\SectionMetadata; +use Wikimedia\Parsoid\Core\TOCData; use Wikimedia\TestingAccessWrapper; use Wikimedia\Tests\SerializationTestTrait; @@ -256,32 +257,55 @@ class ParserOutputTest extends MediaWikiLangTestCase { ] ); $po = new ParserOutput( $text ); - $po->setTOCHTML( self::provideGetTextToC() ); + self::initSections( $po ); $actual = $po->getText( $options ); $this->assertSame( $expect, $actual ); } - public static function provideGetTextToC() { - $toc = <<

Contents

- - - -EOF; - return $toc; + private static function initSections( ParserOutput $po ): void { + $po->setTOCData( new TOCData( + SectionMetadata::fromLegacy( [ + 'index' => "1", + 'level' => 1, + 'toclevel' => 1, + 'number' => "1", + 'line' => "Section 1", + 'anchor' => "Section_1" + ] ), + SectionMetadata::fromLegacy( [ + 'index' => "2", + 'level' => 1, + 'toclevel' => 1, + 'number' => "2", + 'line' => "Section 2", + 'anchor' => "Section_2" + ] ), + SectionMetadata::fromLegacy( [ + 'index' => "3", + 'level' => 2, + 'toclevel' => 2, + 'number' => "2.1", + 'line' => "Section 2.1", + 'anchor' => "Section_2.1" + ] ), + SectionMetadata::fromLegacy( [ + 'index' => "4", + 'level' => 1, + 'toclevel' => 1, + 'number' => "3", + 'line' => "Section 3", + 'anchor' => "Section_3" + ] ), + ) ); } // REMOVE THIS ONCE Parser::TOC_START IS REMOVED public static function provideGetTextBackCompat() { - $toc = self::provideGetTextToC(); + $dummyPO = new ParserOutput( '' ); + self::initSections( $dummyPO ); + $dummyPO->getText(); // force TOC generation + $toc = $dummyPO->getTOCHTML(); + $text = <<Test document.

@@ -305,7 +329,7 @@ EOF; [], $text, <<Test document.

-

Contents

+