wiki.techinc.nl/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
Amir Sarabadani d23af35764 Unset all globals unneeded for unit tests, assert correct directory
* Unset globals to avoid tests that look like unit tests but actually rely on
  globals
* move some tests out of unit directory so that the test suite will pass.
* Assert that tests which extend MediaWikiUnitTestCase are in a directory with
  "/unit/" in its path name

Depends-On: I67b37b1bde94eaa3d4298d9bd98ac57995ce93b9
Depends-On: I90921679518ee95fe393f8b1bbd9134daf0ba032
Bug: T87781
Change-Id: I16691fc8ac063705ba0c2bc63b96c4534ca8660b
2019-07-09 14:09:29 -04:00

328 lines
9.5 KiB
PHP

<?php
/**
* @todo Tests covering decodeCharReferences can be refactored into a single
* method and dataprovider.
*
* @group Sanitizer
*/
class SanitizerUnitTest extends MediaWikiUnitTestCase {
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testDecodeNamedEntities() {
$this->assertEquals(
"\xc3\xa9cole",
Sanitizer::decodeCharReferences( '&eacute;cole' ),
'decode named entities'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testDecodeNumericEntities() {
$this->assertEquals(
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
'decode numeric entities'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testDecodeMixedEntities() {
$this->assertEquals(
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
'decode mixed numeric/named entities'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testDecodeMixedComplexEntities() {
$this->assertEquals(
"\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
Sanitizer::decodeCharReferences(
"&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
),
'decode mixed complex entities'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testInvalidAmpersand() {
$this->assertEquals(
'a & b',
Sanitizer::decodeCharReferences( 'a & b' ),
'Invalid ampersand'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testInvalidEntities() {
$this->assertEquals(
'&foo;',
Sanitizer::decodeCharReferences( '&foo;' ),
'Invalid named entity'
);
}
/**
* @covers Sanitizer::decodeCharReferences
*/
public function testInvalidNumberedEntities() {
$this->assertEquals(
UtfNormal\Constants::UTF8_REPLACEMENT,
Sanitizer::decodeCharReferences( "&#88888888888888;" ),
'Invalid numbered entity'
);
}
/**
* @dataProvider provideTagAttributesToDecode
* @covers Sanitizer::decodeTagAttributes
*/
public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
$this->assertEquals( $expected,
Sanitizer::decodeTagAttributes( $attributes ),
$message
);
}
public static function provideTagAttributesToDecode() {
return [
[ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
[ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
[ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
[ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
[ [], 'ńgh=bar', 'Combining accent is not allowed' ],
[ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
[ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
[ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
[ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
[ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
[ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
# Invalid beginning
[ [], '-foo=bar', 'Leading - is forbidden' ],
[ [], '.foo=bar', 'Leading . is forbidden' ],
[ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
[ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
[ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
[ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
[ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
# This bit is more relaxed than XML rules, but some extensions use
# it, like ProofreadPage (see T29539)
[ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
[ [], 'foo$=baz', 'Symbols are not allowed' ],
[ [], 'foo@=baz', 'Symbols are not allowed' ],
[ [], 'foo~=baz', 'Symbols are not allowed' ],
[
[ 'foo' => '1[#^`*%w/(' ],
'foo=1[#^`*%w/(',
'All kind of characters are allowed as values'
],
[
[ 'foo' => '1[#^`*%\'w/(' ],
'foo="1[#^`*%\'w/("',
'Double quotes are allowed if quoted by single quotes'
],
[
[ 'foo' => '1[#^`*%"w/(' ],
'foo=\'1[#^`*%"w/(\'',
'Single quotes are allowed if quoted by double quotes'
],
[ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
[ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
];
}
/**
* @dataProvider provideCssCommentsFixtures
* @covers Sanitizer::checkCss
*/
public function testCssCommentsChecking( $expected, $css, $message = '' ) {
$this->assertEquals( $expected,
Sanitizer::checkCss( $css ),
$message
);
}
public static function provideCssCommentsFixtures() {
/** [ <expected>, <css>, [message] ] */
return [
// Valid comments spanning entire input
[ '/**/', '/**/' ],
[ '/* comment */', '/* comment */' ],
// Weird stuff
[ ' ', '/****/' ],
[ ' ', '/* /* */' ],
[ 'display: block;', "display:/* foo */block;" ],
[ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
'Backslash-escaped comments must be stripped (T30450)' ],
[ '', '/* unfinished comment structure',
'Remove anything after a comment-start token' ],
[ '', "\\2f\\2a unifinished comment'",
'Remove anything after a backslash-escaped comment-start token' ],
[
'/* insecure input */',
'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
. '(src=\'asdf.png\',sizingMethod=\'scale\');'
],
[
'/* insecure input */',
'-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
. '(src=\'asdf.png\',sizingMethod=\'scale\')";'
],
[ '/* insecure input */', 'width: expression(1+1);' ],
[ '/* insecure input */', 'background-image: image(asdf.png);' ],
[ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
[ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
[ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
[
'/* insecure input */',
'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
],
[
'/* insecure input */',
'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
],
[ '/* insecure input */', 'foo: attr( title, url );' ],
[ '/* insecure input */', 'foo: attr( title url );' ],
[ '/* insecure input */', 'foo: var(--evil-attribute)' ],
];
}
/**
* @dataProvider provideEscapeHtmlAllowEntities
* @covers Sanitizer::escapeHtmlAllowEntities
*/
public function testEscapeHtmlAllowEntities( $expected, $html ) {
$this->assertEquals(
$expected,
Sanitizer::escapeHtmlAllowEntities( $html )
);
}
public static function provideEscapeHtmlAllowEntities() {
return [
[ 'foo', 'foo' ],
[ 'a¡b', 'a&#161;b' ],
[ 'foo&#039;bar', "foo'bar" ],
[ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
];
}
/**
* Test Sanitizer::escapeId
*
* @dataProvider provideEscapeId
* @covers Sanitizer::escapeId
*/
public function testEscapeId( $input, $output ) {
$this->assertEquals(
$output,
Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
);
}
public static function provideEscapeId() {
return [
[ '+', '.2B' ],
[ '&', '.26' ],
[ '=', '.3D' ],
[ ':', ':' ],
[ ';', '.3B' ],
[ '@', '.40' ],
[ '$', '.24' ],
[ '-_.', '-_.' ],
[ '!', '.21' ],
[ '*', '.2A' ],
[ '/', '.2F' ],
[ '[]', '.5B.5D' ],
[ '<>', '.3C.3E' ],
[ '\'', '.27' ],
[ '§', '.C2.A7' ],
[ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
[ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
];
}
/**
* @dataProvider provideIsReservedDataAttribute
* @covers Sanitizer::isReservedDataAttribute
*/
public function testIsReservedDataAttribute( $attr, $expected ) {
$this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
}
public static function provideIsReservedDataAttribute() {
return [
[ 'foo', false ],
[ 'data', false ],
[ 'data-foo', false ],
[ 'data-mw', true ],
[ 'data-ooui', true ],
[ 'data-parsoid', true ],
[ 'data-mw-foo', true ],
[ 'data-ooui-foo', true ],
[ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
];
}
/**
* @dataProvider provideStripAllTags
*
* @covers Sanitizer::stripAllTags()
* @covers RemexStripTagHandler
*
* @param string $input
* @param string $expected
*/
public function testStripAllTags( $input, $expected ) {
$this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
}
public function provideStripAllTags() {
return [
[ '<p>Foo</p>', 'Foo' ],
[ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
[ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
[ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
[
'<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
'Bar Whee!'
],
[ '1<span class="<?php">2</span>3', '123' ],
[ '1<span class="<?">2</span>3', '123' ],
[ '<th>1</th><td>2</td>', '1 2' ],
];
}
}