Remove codepaths which ran parser in 'untidy' mode

Disabling tidy has been deprecated since 1.33.  This cleans up the code
paths which still used untidy output.

Bug: T198214
Change-Id: I821ef3b8f59b272d983583d407b2f0794fe1e791
This commit is contained in:
C. Scott Ananian 2020-04-01 17:24:13 -04:00 committed by C. Scott Ananian
parent f92b914449
commit 83a22b7fcd
14 changed files with 45 additions and 327 deletions

View file

@ -2562,7 +2562,7 @@ TODO: No more strip, deprecated ?
&$text: text being parsed
[&]$stripState: stripState used (object)
'ParserAfterTidy': Called after Parser::tidy() in Parser::parse()
'ParserAfterTidy': Called after MWTidy::tidy() in Parser::parse()
[&]$parser: Parser object being used
&$text: text that will be returned

View file

@ -593,16 +593,7 @@ class ApiParse extends ApiBase {
$popts->enableLimitReport( !$params['disablepp'] && !$params['disablelimitreport'] );
$popts->setIsPreview( $params['preview'] || $params['sectionpreview'] );
$popts->setIsSectionPreview( $params['sectionpreview'] );
if ( $params['disabletidy'] ) {
// Don't spam the log w/ deprecation warnings caused by API requests
// The API request is already returning a deprecation warning to
// the caller.
Wikimedia\suppressWarnings();
wfDeprecated( 'disabling tidy', '1.33' );
Wikimedia\restoreWarnings();
// Allow this for now.
$popts->setTidy( false );
}
if ( $params['wrapoutputclass'] !== '' ) {
$popts->setWrapOutputClass( $params['wrapoutputclass'] );
}
@ -946,10 +937,6 @@ class ApiParse extends ApiBase {
],
'disablelimitreport' => false,
'disableeditsection' => false,
'disabletidy' => [
ApiBase::PARAM_DFLT => false,
ApiBase::PARAM_DEPRECATED => true, // Since 1.32
],
'disablestylededuplication' => false,
'generatexml' => [
ApiBase::PARAM_DFLT => false,

View file

@ -401,7 +401,6 @@
"apihelp-parse-param-disablelimitreport": "Omit the limit report (\"NewPP limit report\") from the parser output.",
"apihelp-parse-param-disablepp": "Use <var>$1disablelimitreport</var> instead.",
"apihelp-parse-param-disableeditsection": "Omit edit section links from the parser output.",
"apihelp-parse-param-disabletidy": "Do not run HTML cleanup (e.g. tidy) on the parser output.",
"apihelp-parse-param-disablestylededuplication": "Do not deduplicate inline stylesheets in the parser output.",
"apihelp-parse-param-generatexml": "Generate XML parse tree (requires content model <code>$1</code>; replaced by <kbd>$2prop=parsetree</kbd>).",
"apihelp-parse-param-preview": "Parse in preview mode.",

View file

@ -381,7 +381,6 @@
"apihelp-parse-param-disablelimitreport": "{{doc-apihelp-param|parse|disablelimitreport}}",
"apihelp-parse-param-disablepp": "{{doc-apihelp-param|parse|disablepp}}",
"apihelp-parse-param-disableeditsection": "{{doc-apihelp-param|parse|disableeditsection}}",
"apihelp-parse-param-disabletidy": "{{doc-apihelp-param|parse|disabletidy}}",
"apihelp-parse-param-disablestylededuplication": "{{doc-apihelp-param|parse|disablestylededuplication}}",
"apihelp-parse-param-generatexml": "{{doc-apihelp-param|parse|generatexml|params=* $1 - Value of the constant CONTENT_MODEL_WIKITEXT|paramstart=2}}",
"apihelp-parse-param-preview": "{{doc-apihelp-param|parse|preview}}",

View file

@ -41,10 +41,6 @@ class MWTidy {
*/
public static function tidy( $text ) {
$driver = self::singleton();
if ( !$driver ) {
throw new MWException( __METHOD__ .
': tidy is disabled, caller should have checked MWTidy::isEnabled()' );
}
return $driver->tidy( $text );
}
@ -52,7 +48,7 @@ class MWTidy {
* @return bool
*/
public static function isEnabled() {
return self::singleton() !== false;
return true;
}
/**
@ -77,16 +73,6 @@ class MWTidy {
return new MediaWiki\Tidy\RemexDriver( $config ?? [] );
}
/**
* Set the driver to be used. This is for testing.
* @param MediaWiki\Tidy\TidyDriverBase|false|null $instance
* @deprecated Since 1.33
*/
public static function setInstance( $instance ) {
wfDeprecated( __METHOD__, '1.33' );
self::$instance = $instance;
}
/**
* Destroy the current singleton instance
*/

View file

@ -1617,41 +1617,7 @@ class Parser {
$text = Sanitizer::normalizeCharReferences( $text );
if ( MWTidy::isEnabled() ) {
if ( $this->mOptions->getTidy() ) {
$text = MWTidy::tidy( $text );
} else {
wfDeprecated( 'disabling tidy', '1.33' );
}
} else {
# attempt to sanitize at least some nesting problems
# (T4702 and quite a few others)
# This code path is buggy and deprecated!
wfDeprecated( 'disabling tidy', '1.33' );
$tidyregs = [
# ''Something [http://www.cool.com cool''] -->
# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
# fix up an anchor inside another anchor, only
# at least for a single single nested link (T5695)
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
'\\1\\2</a>\\3</a>\\1\\4</a>',
# fix div inside inline elements- doBlockLevels won't wrap a line which
# contains a div, so fix it up here; replace
# div with escaped text
'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
# remove empty italic or bold tag pairs, some
# introduced by rules above
'/<([bi])><\/\\1>/' => '',
];
$text = preg_replace(
array_keys( $tidyregs ),
array_values( $tidyregs ),
$text );
}
$text = MWTidy::tidy( $text );
if ( $isMain ) {
Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );

View file

@ -512,187 +512,49 @@ class Sanitizer {
$text = self::removeHTMLcomments( $text );
$bits = explode( '<', $text );
$text = str_replace( '>', '&gt;', array_shift( $bits ) );
if ( !MWTidy::isEnabled() ) {
wfDeprecated( 'disabling tidy', '1.33' );
$tagstack = $tablestack = [];
foreach ( $bits as $x ) {
$regs = [];
# $slash: Does the current element start with a '/'?
# $t: Current element name
# $params: String between element name and >
# $brace: Ending '>' or '/>'
# $rest: Everything until the next element of $bits
if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
} else {
$slash = $t = $params = $brace = $rest = null;
}
# this might be possible using remex tidy itself
foreach ( $bits as $x ) {
if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
$badtag = false;
$t = strtolower( $t );
if ( isset( $htmlelements[$t] ) ) {
# Check our stack
if ( $slash && isset( $htmlsingleonly[$t] ) ) {
$badtag = true;
} elseif ( $slash ) {
# Closing a tag... is it the one we just opened?
Wikimedia\suppressWarnings();
$ot = array_pop( $tagstack );
Wikimedia\restoreWarnings();
if ( $ot != $t ) {
if ( isset( $htmlsingleallowed[$ot] ) ) {
# Pop all elements with an optional close tag
# and see if we find a match below them
$optstack = [];
array_push( $optstack, $ot );
Wikimedia\suppressWarnings();
$ot = array_pop( $tagstack );
Wikimedia\restoreWarnings();
while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) {
array_push( $optstack, $ot );
Wikimedia\suppressWarnings();
$ot = array_pop( $tagstack );
Wikimedia\restoreWarnings();
}
if ( $t != $ot ) {
# No match. Push the optional elements back again
$badtag = true;
Wikimedia\suppressWarnings();
$ot = array_pop( $optstack );
Wikimedia\restoreWarnings();
while ( $ot ) {
array_push( $tagstack, $ot );
Wikimedia\suppressWarnings();
$ot = array_pop( $optstack );
Wikimedia\restoreWarnings();
}
}
} else {
Wikimedia\suppressWarnings();
array_push( $tagstack, $ot );
Wikimedia\restoreWarnings();
# <li> can be nested in <ul> or <ol>, skip those cases:
if ( !isset( $htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
$badtag = true;
}
}
} elseif ( $t == 'table' ) {
$tagstack = array_pop( $tablestack );
}
$newparams = '';
} else {
# Keep track for later
if ( isset( $tabletags[$t] ) && !in_array( 'table', $tagstack ) ) {
$badtag = true;
} elseif ( in_array( $t, $tagstack ) && !isset( $htmlnest[$t] ) ) {
$badtag = true;
#  Is it a self closed htmlpair ? (T7487)
} elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
// Eventually we'll just remove the self-closing
// slash, in order to be consistent with HTML5
// semantics.
// $brace = '>';
// For now, let's just warn authors to clean up.
if ( is_callable( $warnCallback ) ) {
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
}
$badtag = true;
} elseif ( isset( $htmlsingleonly[$t] ) ) {
# Hack to force empty tag for unclosable elements
$brace = '/>';
} elseif ( isset( $htmlsingle[$t] ) ) {
# Hack to not close $htmlsingle tags
$brace = null;
# Still need to push this optionally-closed tag to
# the tag stack so that we can match end tags
# instead of marking them as bad.
array_push( $tagstack, $t );
} elseif ( isset( $tabletags[$t] ) && in_array( $t, $tagstack ) ) {
// New table tag but forgot to close the previous one
$text .= "</$t>";
} else {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
$tagstack = [];
}
array_push( $tagstack, $t );
}
# Replace any variables or template parameters with
# plaintext results.
if ( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( !self::validateTag( $params, $t ) ) {
$badtag = true;
}
# Strip non-approved attributes from the tag
$newparams = self::fixTagAttributes( $params, $t );
if ( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( $brace == '/>' && !( isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
// Eventually we'll just remove the self-closing
// slash, in order to be consistent with HTML5
// semantics.
// $brace = '>';
// For now, let's just warn authors to clean up.
if ( is_callable( $warnCallback ) ) {
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
}
}
if ( !self::validateTag( $params, $t ) ) {
$badtag = true;
}
$newparams = self::fixTagAttributes( $params, $t );
if ( !$badtag ) {
if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) {
# Interpret self-closing tags as empty tags even when
# HTML 5 would interpret them as start tags. Such input
# is commonly seen on Wikimedia wikis with this intention.
$brace = "></$t>";
}
$rest = str_replace( '>', '&gt;', $rest );
$close = ( $brace == '/>' && !$slash ) ? ' /' : '';
$text .= "<$slash$t$newparams$close>$rest";
$text .= "<$slash$t$newparams$brace$rest";
continue;
}
}
$text .= '&lt;' . str_replace( '>', '&gt;', $x );
}
# Close off any remaining tags
while ( is_array( $tagstack ) && ( $t = array_pop( $tagstack ) ) ) {
$text .= "</$t>\n";
if ( $t == 'table' ) {
$tagstack = array_pop( $tablestack );
}
}
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
$badtag = false;
$t = strtolower( $t );
if ( isset( $htmlelements[$t] ) ) {
if ( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( $brace == '/>' && !( isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
// Eventually we'll just remove the self-closing
// slash, in order to be consistent with HTML5
// semantics.
// $brace = '>';
// For now, let's just warn authors to clean up.
if ( is_callable( $warnCallback ) ) {
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
}
}
if ( !self::validateTag( $params, $t ) ) {
$badtag = true;
}
$newparams = self::fixTagAttributes( $params, $t );
if ( !$badtag ) {
if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) {
# Interpret self-closing tags as empty tags even when
# HTML 5 would interpret them as start tags. Such input
# is commonly seen on Wikimedia wikis with this intention.
$brace = "></$t>";
}
$rest = str_replace( '>', '&gt;', $rest );
$text .= "<$slash$t$newparams$brace$rest";
continue;
}
}
}
$text .= '&lt;' . str_replace( '>', '&gt;', $x );
}
$text .= '&lt;' . str_replace( '>', '&gt;', $x );
}
return $text;
}

View file

@ -118,11 +118,8 @@ class SpecialExpandTemplates extends SpecialPage {
}
$config = $this->getConfig();
if ( MWTidy::isEnabled() && $options->getTidy() ) {
$tmp = MWTidy::tidy( $tmp );
} else {
wfDeprecated( 'disabling tidy', '1.33' );
}
$tmp = MWTidy::tidy( $tmp );
$out->addHTML( $tmp );

View file

@ -58,7 +58,6 @@ class CompareParsers extends DumpIterator {
$this->addDescription( 'Run a file or dump with several parsers' );
$this->addOption( 'parser1', 'The first parser to compare.', true, true );
$this->addOption( 'parser2', 'The second parser to compare.', true, true );
$this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
$this->addOption(
'save-failed',
'Folder in which articles which differ will be stored.',
@ -106,13 +105,6 @@ class CompareParsers extends DumpIterator {
$user = new User();
$this->options = ParserOptions::newFromUser( $user );
if ( $this->hasOption( 'tidy' ) ) {
if ( !MWTidy::isEnabled() ) {
$this->fatalError( 'Tidy was requested but $wgTidyConfig is not set in LocalSettings.php' );
}
$this->options->setTidy( true );
}
$this->failed = 0;
}

View file

@ -71,7 +71,6 @@ class CLIParser extends Maintenance {
false,
true
);
$this->addOption( 'no-tidy', 'Don\'t tidy the output (deprecated)' );
$this->addArg( 'file', 'File containing wikitext (Default: stdin)', false );
}
@ -129,9 +128,6 @@ class CLIParser extends Maintenance {
protected function parse( $wikitext ) {
$options = ParserOptions::newCanonical();
$options->setOption( 'enableLimitReport', false );
if ( $this->getOption( 'no-tidy' ) ) {
$options->setTidy( false );
}
return $this->parser->parse(
$wikitext,
$this->getTitle(),

View file

@ -27,7 +27,6 @@
*/
use MediaWiki\MediaWikiServices;
use MediaWiki\Tidy\TidyDriverBase;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\ScopedCallback;
use Wikimedia\TestingAccessWrapper;
@ -76,11 +75,6 @@ class ParserTestRunner {
*/
private $dbClone;
/**
* @var TidyDriverBase
*/
private $tidyDriver = null;
/**
* @var TestRecorder
*/
@ -832,10 +826,6 @@ class ParserTestRunner {
$options = ParserOptions::newFromContext( $context );
$options->setTimestamp( $this->getFakeTimestamp() );
if ( isset( $opts['tidy'] ) ) {
$options->setTidy( true );
}
$revId = 1337; // see Parser::getRevisionId()
$title = isset( $opts['title'] )
? Title::newFromText( $opts['title'] )
@ -920,9 +910,7 @@ class ParserTestRunner {
'allowTOC' => !isset( $opts['notoc'] ),
'unwrap' => !isset( $opts['wrap'] ),
] );
if ( isset( $opts['tidy'] ) ) {
$out = preg_replace( '/\s+$/', '', $out );
}
$out = preg_replace( '/\s+$/', '', $out );
if ( isset( $opts['showtitle'] ) ) {
if ( $output->getTitleText() ) {
@ -1147,28 +1135,6 @@ class ParserTestRunner {
/** @since 1.20 */
Hooks::run( 'ParserTestGlobals', [ &$setup ] );
// Create tidy driver
if ( isset( $opts['tidy'] ) ) {
// Cache a driver instance
if ( $this->tidyDriver === null ) {
$this->tidyDriver = MWTidy::factory();
}
$tidy = $this->tidyDriver;
} else {
$tidy = false;
}
# Suppress warnings about running tests without tidy
Wikimedia\suppressWarnings();
wfDeprecated( 'disabling tidy' );
wfDeprecated( 'MWTidy::setInstance' );
Wikimedia\restoreWarnings();
MWTidy::setInstance( $tidy );
$teardown[] = function () {
MWTidy::destroySingleton();
};
// Set content language. This invalidates the magic word cache and title services
$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $langCode );
$lang->resetNamespaces();

View file

@ -157,12 +157,6 @@ class TestFileReader {
];
if ( $nonTidySection !== false ) {
// Add non-tidy test
$this->tests[] = [
'result' => $data[$nonTidySection],
'resultSection' => $nonTidySection
] + $commonInfo;
if ( $tidySection !== false ) {
// Add tidy subtest
$this->tests[] = [
@ -172,6 +166,10 @@ class TestFileReader {
'options' => $data['options'] . ' tidy',
'isSubtest' => true,
] + $commonInfo;
} else {
// We can no longer run the non-tidy test, and we don't have
// a tidy alternative.
wfDeprecated( 'skipping non-tidy test', '1.35' );
}
} elseif ( $tidySection !== false ) {
// No need to override desc when there is no subtest

View file

@ -857,29 +857,6 @@ class ApiParseTest extends ApiTestCase {
$this->assertArrayNotHasKey( 'warnings', $res[0] );
}
public function testDisableTidy() {
$this->setMwGlobals( 'wgTidyConfig', [ 'driver' => 'RemexHtml' ] );
// Check that disabletidy doesn't have an effect just because tidying
// doesn't work for some other reason
$res1 = $this->doApiRequest( [
'action' => 'parse',
'text' => "<b>Mixed <i>up</b></i>",
'contentmodel' => 'wikitext',
] );
$this->assertParsedTo( "<p><b>Mixed <i>up</i></b>\n</p>", $res1 );
$res2 = $this->doApiRequest( [
'action' => 'parse',
'text' => "<b>Mixed <i>up</b></i>",
'contentmodel' => 'wikitext',
'disabletidy' => '',
] );
$this->assertParsedTo( "<p><b>Mixed <i>up</b></i>\n</p>", $res2,
'The parameter "disabletidy" has been deprecated.' );
}
public function testFormatCategories() {
$name = ucfirst( __FUNCTION__ );

View file

@ -20,17 +20,13 @@ class SanitizerTest extends MediaWikiTestCase {
* @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
*/
public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
$this->hideDeprecated( 'disabling tidy' );
$this->hideDeprecated( 'MWTidy::setInstance' );
MWTidy::setInstance( false );
if ( $escaped ) {
$this->assertEquals( "&lt;$tag&gt;",
Sanitizer::removeHTMLtags( "<$tag>" )
);
} else {
$this->assertEquals( "<$tag></$tag>\n",
Sanitizer::removeHTMLtags( "<$tag>" )
Sanitizer::removeHTMLtags( "<$tag></$tag>\n" )
);
}
}
@ -54,7 +50,7 @@ class SanitizerTest extends MediaWikiTestCase {
// former testSelfClosingTag
[
'<div>Hello world</div />',
'<div>Hello world</div>',
'<div>Hello world</div></div>',
'Self-closing closing div'
],
// Make sure special nested HTML5 semantics are not broken
@ -84,9 +80,6 @@ class SanitizerTest extends MediaWikiTestCase {
* @covers Sanitizer::removeHTMLtags
*/
public function testRemoveHTMLtags( $input, $output, $msg = null ) {
$this->hideDeprecated( 'disabling tidy' );
$this->hideDeprecated( 'MWTidy::setInstance' );
MWTidy::setInstance( false );
$this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
}