Tidy up tidy usage
* There's a branch path in the sanitizer that depends on $wgUseTidy, which means the test output differs from on wiki. * In general, we should set these variables to match the wiki behaviour in tests. * Exposes T92892, Sanitizer removes empty tags when tidy is disabled. * Tweaked tests for T19663 to use an extension tag to show that HTML5 tags with non-word characters make it through the parser intact (before being ultimately sanitized). Change-Id: I09c72fd739e11a8b757f37dc4c790758d782ad73
This commit is contained in:
parent
510dfa7d9a
commit
78c3f2f4b1
6 changed files with 101 additions and 62 deletions
|
|
@ -174,6 +174,8 @@ production.
|
|||
This requires the fa_sha1 field being populated.
|
||||
* Removed rel="archives" from the "View history" link, as it did not pass
|
||||
HTML validation.
|
||||
* $wgUseTidy is now set when parserTests are run with the tidy option to match
|
||||
output on wiki.
|
||||
|
||||
=== Action API changes in 1.25 ===
|
||||
* (T67403) XML tag highlighting is now only performed for formats
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
* @brief Set options of the Parser
|
||||
*
|
||||
* All member variables are supposed to be private in theory, although in
|
||||
* practise this is not the case.
|
||||
* practice this is not the case.
|
||||
*
|
||||
* @ingroup Parser
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -593,6 +593,14 @@ class ParserTest {
|
|||
}
|
||||
}
|
||||
|
||||
if ( isset( $opts['tidy'] ) ) {
|
||||
if ( !$this->tidySupport->isEnabled() ) {
|
||||
return $this->showSkipped();
|
||||
} else {
|
||||
$options->setTidy( true );
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $opts['title'] ) ) {
|
||||
$titleText = $opts['title'];
|
||||
} else {
|
||||
|
|
@ -624,10 +632,6 @@ class ParserTest {
|
|||
$output->setTOCEnabled( !isset( $opts['notoc'] ) );
|
||||
$out = $output->getText();
|
||||
if ( isset( $opts['tidy'] ) ) {
|
||||
if ( !$this->tidySupport->isEnabled() ) {
|
||||
return $this->showSkipped();
|
||||
}
|
||||
$out = MWTidy::tidy( $out );
|
||||
$out = preg_replace( '/\s+$/', '', $out );
|
||||
}
|
||||
|
||||
|
|
@ -877,10 +881,7 @@ class ParserTest {
|
|||
'wgDisableLangConversion' => false,
|
||||
'wgDisableTitleConversion' => false,
|
||||
// Tidy options.
|
||||
// We always set 'wgUseTidy' to false when parsing, but certain
|
||||
// test-running modes still use tidy if available, so ensure
|
||||
// that the tidy-related options are all set to their defaults.
|
||||
'wgUseTidy' => false,
|
||||
'wgUseTidy' => isset( $opts['tidy'] ),
|
||||
'wgAlwaysUseTidy' => false,
|
||||
'wgDebugTidy' => false,
|
||||
'wgTidyConf' => $IP . '/includes/tidy.conf',
|
||||
|
|
|
|||
|
|
@ -1209,6 +1209,8 @@ Ruby markup (W3C-style)
|
|||
!! test
|
||||
Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
|
||||
!! wikitext
|
||||
<blockquote|>a</blockquote>
|
||||
|
||||
<b→> doesn't terminate </b→>
|
||||
|
||||
<bä> doesn't terminate </bä>
|
||||
|
|
@ -1219,7 +1221,8 @@ Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
|
|||
|
||||
<sub-ID#1>
|
||||
!! html
|
||||
<p><b→> doesn't terminate </b→>
|
||||
<p><blockquote|>a</blockquote>
|
||||
</p><p><b→> doesn't terminate </b→>
|
||||
</p><p><bä> doesn't terminate </bä>
|
||||
</p><p><boo> doesn't terminate </boo>
|
||||
</p><p><s.foo> doesn't terminate </s.foo>
|
||||
|
|
@ -1228,9 +1231,13 @@ Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
|
|||
!! end
|
||||
|
||||
# There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/
|
||||
# If the non-word-character tag made it through the sanitizer, tidy
|
||||
# would munge it up.
|
||||
!! test
|
||||
Non-word characters don't terminate tag names + tidy
|
||||
!! wikitext
|
||||
<blockquote|>a</blockquote>
|
||||
|
||||
<b→> doesn't terminate </b→>
|
||||
|
||||
<bä> doesn't terminate </bä>
|
||||
|
|
@ -1241,6 +1248,7 @@ Non-word characters don't terminate tag names + tidy
|
|||
|
||||
<sub-ID#1>
|
||||
!! html+tidy
|
||||
<p><blockquote|>a</p>
|
||||
<p><b→> doesn't terminate </b→></p>
|
||||
<p><bä> doesn't terminate </bä></p>
|
||||
<p><boo> doesn't terminate </boo></p>
|
||||
|
|
@ -1248,16 +1256,33 @@ Non-word characters don't terminate tag names + tidy
|
|||
<p><sub-ID#1></p>
|
||||
!! end
|
||||
|
||||
###
|
||||
### See tests/parser/parserTestsParserHook.php for the <tåg> extension)
|
||||
### This checks that HTML5 tags (with non-word characters in the tag
|
||||
### name) make it safely through the parser -- the Sanitizer will
|
||||
### munge them later, as it should.
|
||||
###
|
||||
!! test
|
||||
Non-word characters are valid in extension tags (T19663)
|
||||
!! wikitext
|
||||
<tåg>tåg</tåg>
|
||||
!! html
|
||||
<pre>
|
||||
'tåg'
|
||||
array (
|
||||
)
|
||||
</pre>
|
||||
|
||||
!! end
|
||||
|
||||
!! test
|
||||
Isolated close tags should be treated as literal text (bug 52760)
|
||||
!! wikitext
|
||||
</b>
|
||||
|
||||
<s.foo>s</s>
|
||||
!! html
|
||||
<p></b>
|
||||
</p><p><s.foo>s</s>
|
||||
</p>
|
||||
!! html+tidy
|
||||
<p><s.foo>s</p>
|
||||
!! end
|
||||
|
||||
###
|
||||
|
|
@ -1745,7 +1770,6 @@ b
|
|||
!! end
|
||||
|
||||
## PHP parser emits output which is broken
|
||||
## XXX The parsoid output doesn't match the tidy output.
|
||||
!! test
|
||||
Unclosed HTML p-tags should be handled properly
|
||||
!! wikitext
|
||||
|
|
@ -1755,9 +1779,10 @@ a
|
|||
b
|
||||
!! html/php+tidy
|
||||
<div>
|
||||
<p>foo</div></p>
|
||||
<p>foo</p>
|
||||
</div>
|
||||
<p>a</p>
|
||||
b</div>
|
||||
<p>b</p>
|
||||
!! html/parsoid
|
||||
<div data-parsoid='{"stx":"html"}'><p data-parsoid='{"stx":"html", "autoInsertedEnd":true}'>foo</p></div>
|
||||
<p>a</p>
|
||||
|
|
@ -7690,9 +7715,6 @@ Broken br tag sanitization
|
|||
!! end
|
||||
|
||||
# TODO: Fix html2html mode (bug 51055)!
|
||||
# This </br> handling was added as part of bug 50831; but it
|
||||
# differs from how PHP+tidy handles this. We should investigate
|
||||
# this.
|
||||
!! test
|
||||
Parsoid: Broken br tag recognition
|
||||
!! options
|
||||
|
|
@ -7701,12 +7723,9 @@ parsoid=wt2html
|
|||
</br>
|
||||
|
||||
<br/ >
|
||||
!! html/php+tidy
|
||||
<p></br></p>
|
||||
!! html+tidy
|
||||
<p><br /></p>
|
||||
<p><br /></p>
|
||||
!! html/parsoid
|
||||
<p><br></p>
|
||||
<p><br/></p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
|
|
@ -8303,10 +8322,6 @@ List embedded in a non-block tag
|
|||
</small>
|
||||
!!end
|
||||
|
||||
# This is a bug in the PHP parser + tidy combination.
|
||||
# (The </tr> tag gets parsed as text and html-escaped by PHP,
|
||||
# and then fostered out of the table by tidy.)
|
||||
# We believe the Parsoid output to be correct.
|
||||
!! test
|
||||
Table with missing opening <tr> tag
|
||||
!! options
|
||||
|
|
@ -8316,14 +8331,7 @@ parsoid=wt2html,wt2wt
|
|||
<td>foo</td>
|
||||
</tr>
|
||||
</table>
|
||||
!! html/php+tidy
|
||||
<p></tr></p>
|
||||
<table>
|
||||
<tr>
|
||||
<td>foo</td>
|
||||
</tr>
|
||||
</table>
|
||||
!! html/parsoid
|
||||
!! html+tidy
|
||||
<table>
|
||||
<tr>
|
||||
<td>foo</td>
|
||||
|
|
@ -13413,7 +13421,7 @@ Handling of sections up to level 6 and beyond
|
|||
!! end
|
||||
|
||||
!! test
|
||||
TOC regression (bug 9764)
|
||||
TOC regression (T11764)
|
||||
!! wikitext
|
||||
== title 1 ==
|
||||
=== title 1.1 ===
|
||||
|
|
@ -13585,7 +13593,7 @@ Link inside a section heading
|
|||
!! end
|
||||
|
||||
!! test
|
||||
TOC regression (bug 12077)
|
||||
TOC regression (T14077)
|
||||
!! wikitext
|
||||
__TOC__
|
||||
== title 1 ==
|
||||
|
|
@ -14210,16 +14218,17 @@ Media link with text
|
|||
!! end
|
||||
|
||||
# FIXME: this is still bad HTML tag nesting
|
||||
# FIXME: doBlockLevels won't wrap this in a paragraph because it contains a div
|
||||
!! test
|
||||
Media link with nasty text
|
||||
fixme: doBlockLevels won't wrap this in a paragraph because it contains a div
|
||||
!! wikitext
|
||||
[[Media:Foobar.jpg|Safe Link<div style=display:none>" onmouseover="alert(document.cookie)" onfoo="</div>]]
|
||||
!! html
|
||||
<a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div></a>
|
||||
|
||||
!! html+tidy
|
||||
<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div></a></p>
|
||||
<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">Safe Link</a></p>
|
||||
<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
|
|
@ -15433,6 +15442,7 @@ http://<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
|
|||
<li class="toclevel-1 tocsection-1"><a href="#onmouseover.3D"><span class="tocnumber">1</span> <span class="toctext">onmouseover=</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<p></p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
|
|
@ -19205,6 +19215,7 @@ __TOC__
|
|||
<h2><a href="#Quote" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="Quote"><blockquote>Quote</blockquote></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Main_Page&action=edit&section=1" title="Edit section: Quote">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
|
||||
!! html+tidy
|
||||
<p></p>
|
||||
<div id="toc" class="toc">
|
||||
<div id="toctitle">
|
||||
<h2>Contents</h2>
|
||||
|
|
@ -19213,6 +19224,7 @@ __TOC__
|
|||
<li class="toclevel-1 tocsection-1"><a href="#Quote"><span class="tocnumber">1</span> <span class="toctext">Quote</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<p></p>
|
||||
<h2><a href="#Quote" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="Quote"></span></h2>
|
||||
<blockquote>
|
||||
<p><span class="mw-headline" id="Quote">Quote</span></p>
|
||||
|
|
@ -19261,6 +19273,7 @@ __TOC__
|
|||
<h2><a href="#Foo_Bar_2" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="Foo_Bar_2"><i>Foo</i> <blockquote>Bar</blockquote></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: Foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
|
||||
!! html+tidy
|
||||
<p></p>
|
||||
<div id="toc" class="toc">
|
||||
<div id="toctitle">
|
||||
<h2>Contents</h2>
|
||||
|
|
@ -19270,6 +19283,7 @@ __TOC__
|
|||
<li class="toclevel-1 tocsection-2"><a href="#Foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext"><i>Foo</i> Bar</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<p></p>
|
||||
<h2><a href="#Foo_Bar" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="Foo_Bar"><i>Foo</i> <b>Bar</b></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
<h2><a href="#Foo_Bar_2" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="Foo_Bar_2"><i>Foo</i></span></h2>
|
||||
<blockquote>
|
||||
|
|
@ -19346,6 +19360,37 @@ __TOC__
|
|||
|
||||
!! end
|
||||
|
||||
# Note that the html output does not have the <p></p>, but the
|
||||
# html+tidy output *does*. This is because the empty <p></p> is
|
||||
# removed by the sanitizer, but only when tidy is *not* enabled (!).
|
||||
!! test
|
||||
Empty <p> tag in TOC, removed by Sanitizer (T92892)
|
||||
!! wikitext
|
||||
__TOC__
|
||||
== x ==
|
||||
!! html
|
||||
<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
|
||||
<ul>
|
||||
<li class="toclevel-1 tocsection-1"><a href="#x"><span class="tocnumber">1</span> <span class="toctext">x</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<h2><a href="#x" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="x">x</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: x">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
|
||||
!! html+tidy
|
||||
<p></p>
|
||||
<div id="toc" class="toc">
|
||||
<div id="toctitle">
|
||||
<h2>Contents</h2>
|
||||
</div>
|
||||
<ul>
|
||||
<li class="toclevel-1 tocsection-1"><a href="#x"><span class="tocnumber">1</span> <span class="toctext">x</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<p></p>
|
||||
<h2><a href="#x" class="mw-headline-anchor" aria-hidden="true" title="Link to this section">§</a><span class="mw-headline" id="x">x</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: x">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
!! end
|
||||
|
||||
!! article
|
||||
MediaWiki:Bug32057
|
||||
!! text
|
||||
|
|
@ -21892,18 +21937,6 @@ a>b
|
|||
</p>
|
||||
!! end
|
||||
|
||||
|
||||
# This was a bug in the PHP parser (see bug 17663 and its dups,
|
||||
# https://bugzilla.wikimedia.org/show_bug.cgi?id=17663)
|
||||
!! test
|
||||
Tag names followed by punctuation should not be recognized as tags
|
||||
!! wikitext
|
||||
<s.ome> text
|
||||
!! html
|
||||
<p><s.ome> text
|
||||
</p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
HTML tag with necessary entities in attributes
|
||||
!! wikitext
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ class ParserTestParserHook {
|
|||
|
||||
static function setup( &$parser ) {
|
||||
$parser->setHook( 'tag', array( __CLASS__, 'dumpHook' ) );
|
||||
$parser->setHook( 'tåg', array( __CLASS__, 'dumpHook' ) );
|
||||
$parser->setHook( 'statictag', array( __CLASS__, 'staticTagHook' ) );
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,9 +160,6 @@ class NewParserTest extends MediaWikiTestCase {
|
|||
$this->djVuSupport = new DjVuSupport();
|
||||
// Tidy support
|
||||
$this->tidySupport = new TidySupport();
|
||||
// We always set 'wgUseTidy' to false when parsing, but certain
|
||||
// test-running modes still use tidy if available, so ensure
|
||||
// that the tidy-related options are all set to their defaults.
|
||||
$tmpGlobals['wgUseTidy'] = false;
|
||||
$tmpGlobals['wgAlwaysUseTidy'] = false;
|
||||
$tmpGlobals['wgDebugTidy'] = false;
|
||||
|
|
@ -419,6 +416,7 @@ class NewParserTest extends MediaWikiTestCase {
|
|||
'wgMathDirectory' => $uploadDir . '/math',
|
||||
'wgDefaultLanguageVariant' => $variant,
|
||||
'wgLinkHolderBatchSize' => $linkHolderBatchSize,
|
||||
'wgUseTidy' => isset( $opts['tidy'] ),
|
||||
);
|
||||
|
||||
if ( $config ) {
|
||||
|
|
@ -727,12 +725,21 @@ class NewParserTest extends MediaWikiTestCase {
|
|||
. "Current configuration is:\n\$wgTexvc = '$wgTexvc'" );
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $opts['djvu'] ) ) {
|
||||
if ( !$this->djVuSupport->isEnabled() ) {
|
||||
$this->markTestSkipped( "SKIPPED: djvu binaries do not exist or are not executable.\n" );
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $opts['tidy'] ) ) {
|
||||
if ( !$this->tidySupport->isEnabled() ) {
|
||||
$this->markTestSkipped( "SKIPPED: tidy extension is not installed.\n" );
|
||||
} else {
|
||||
$options->setTidy( true );
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $opts['pst'] ) ) {
|
||||
$out = $parser->preSaveTransform( $input, $title, $user, $options );
|
||||
} elseif ( isset( $opts['msg'] ) ) {
|
||||
|
|
@ -753,12 +760,7 @@ class NewParserTest extends MediaWikiTestCase {
|
|||
$output->setTOCEnabled( !isset( $opts['notoc'] ) );
|
||||
$out = $output->getText();
|
||||
if ( isset( $opts['tidy'] ) ) {
|
||||
if ( !$this->tidySupport->isEnabled() ) {
|
||||
$this->markTestSkipped( "SKIPPED: tidy extension is not installed.\n" );
|
||||
} else {
|
||||
$out = MWTidy::tidy( $out );
|
||||
$out = preg_replace( '/\s+$/', '', $out );
|
||||
}
|
||||
$out = preg_replace( '/\s+$/', '', $out );
|
||||
}
|
||||
|
||||
if ( isset( $opts['showtitle'] ) ) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue