From 059e62cde6cdf4a87dfa45a79191bed531045501 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Fri, 5 Nov 2021 19:26:00 -0400 Subject: [PATCH] Regression fix: do language conversion on ToC in ParserOutput::getText() We moved the ToC insertion from the parser to ParserOutput::getText() in T287767 but forgot to ensure that the ToC contents are properly language converted -- this happens *after* the call to ParserOutput::setTOCHTML() in the old Parser code. This is a quick and dirty fix, which does the language conversion but probably misses a few corner cases of the original behavior (marked by XXX comment). For example, it doesn't disable language conversion on interface messages -- but there shouldn't be any ToC on interface messages. Not heeding __NOCONTENTCONVERT__ in the article is a legit problem, but probably not as bad as the UBN regression we're fixing. We'll clean this up in a followup (T295209), but it will involve passing some additional information from the Parser to ParserOutput which won't be present in "old" parser cache entries anyway. This is an UBN and this patch is the quickest way to ensure that existing parser cache content renders correctly. It's preferable to the alternative (Iffcff96fd9b4749794ac78414c1801979a652792) which handles all the corner cases but can't fix up existing parser cache content, which has "always" been stored without language conversion. Bug: T295187 Change-Id: Ic14b3a49a8ee7ed600485d4f8a363a206035a847 --- includes/parser/ParserOutput.php | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 19ef812ec5c..c0cd9bc5e05 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -417,11 +417,27 @@ class ParserOutput extends CacheTime { if ( $options['allowTOC'] ) { if ( $options['injectTOC'] ) { + $toc = $this->getTOCHTML(); + // language conversion needs to be done on the TOC fetched + // from parser cache + // XXX doesn't check ParserOptions::getDisableContentConversion() + // XXX doesn't check Parser::$mDoubleUnderscores['nocontentconvert'] + // XXX doesn't check ParserOptions::getInterfaceMessage() + // XXX Use DI to inject this once ::getText() is moved out + // of ParserOutput + $services = MediaWikiServices::getInstance(); + $languageConverterFactory = + $services->getLanguageConverterFactory(); + $toc = $languageConverterFactory->getLanguageConverter( + // XXX This was Parser::getTargetLanguage() + $services->getContentLanguage() + )->convert( $toc ); + // XXX Use DI to inject this once ::getText() is moved out // of ParserOutput. $tidy = MediaWikiServices::getInstance()->getTidy(); $toc = $tidy->tidy( - $this->getTOCHTML(), + $toc, [ Sanitizer::class, 'armorFrenchSpaces' ] ); $text = Parser::replaceTableOfContentsMarker( $text, $toc );