wiki.techinc.nl/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
C. Scott Ananian b79c1e22ad Hard-deprecate Sanitizer::escapeId()
Deprecated in MW 1.30; time to clean up any remaining uses.

Code search:
https://codesearch.wmflabs.org/deployed/?q=escapeId%5C%28&i=nope&files=&repos=

Depends-On: Ic03a5da2e1d6b8f5656555420dd573a1d698b9cc
Depends-On: I311f44a5035f73c0fb2289f727eb39b73007429b
Depends-On: I76c5b539bae5572c4ac65f28fec9c0c36381348c
Depends-On: Id4cbfc3b113b1b04f949d485187e89ffe0b487f5
Depends-On: I7d5ba4930688ed7f011a4babed5986b8e40910a0
Depends-On: I964f83ce88fb9c66a7c59037c6066f4567bcf4c9
Change-Id: I89504cfdf8e02831d54a26900bfdc63a33b4eade
2020-01-26 22:05:45 +00:00

288 lines
8.7 KiB
PHP

<?php
/**
* @group Sanitizer
*/
class SanitizerUnitTest extends MediaWikiUnitTestCase {
/**
* @dataProvider provideDecodeCharReferences
* @covers Sanitizer::decodeCharReferences
*/
public function testDecodeCharReferences( string $expected, string $input ) {
$this->assertSame( $expected, Sanitizer::decodeCharReferences( $input ) );
}
public function provideDecodeCharReferences() {
return [
'decode named entities' => [
"\xc3\xa9cole",
'&eacute;cole',
],
'decode numeric entities' => [
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
"&#x108;io bonas dans l'&#233;cole!",
],
'decode mixed numeric/named entities' => [
"\xc4\x88io bonas dans l'\xc3\xa9cole!",
"&#x108;io bonas dans l'&eacute;cole!",
],
'decode mixed complex entities' => [
"\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
"&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)",
],
'Invalid ampersand' => [
'a & b',
'a & b',
],
'Invalid named entity' => [
'&foo;',
'&foo;',
],
'Invalid numbered entity' => [
UtfNormal\Constants::UTF8_REPLACEMENT,
"&#88888888888888;",
],
];
}
/**
* @dataProvider provideTagAttributesToDecode
* @covers Sanitizer::decodeTagAttributes
*/
public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
$this->assertSame( $expected,
Sanitizer::decodeTagAttributes( $attributes ),
$message
);
}
public static function provideTagAttributesToDecode() {
return [
[ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
[ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
[ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
[ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
[ [], 'ńgh=bar', 'Combining accent is not allowed' ],
[ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
[ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
[ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[
[ 'foo' => 'bar', 'baz' => 'foo' ],
'foo=\'bar\' baz="foo"',
'Several attributes'
],
[ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
[ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
[ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
[ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
# Invalid beginning
[ [], '-foo=bar', 'Leading - is forbidden' ],
[ [], '.foo=bar', 'Leading . is forbidden' ],
[ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
[ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
[ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
[ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
[ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
# This bit is more relaxed than XML rules, but some extensions use
# it, like ProofreadPage (see T29539)
[ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
[ [], 'foo$=baz', 'Symbols are not allowed' ],
[ [], 'foo@=baz', 'Symbols are not allowed' ],
[ [], 'foo~=baz', 'Symbols are not allowed' ],
[
[ 'foo' => '1[#^`*%w/(' ],
'foo=1[#^`*%w/(',
'All kind of characters are allowed as values'
],
[
[ 'foo' => '1[#^`*%\'w/(' ],
'foo="1[#^`*%\'w/("',
'Double quotes are allowed if quoted by single quotes'
],
[
[ 'foo' => '1[#^`*%"w/(' ],
'foo=\'1[#^`*%"w/(\'',
'Single quotes are allowed if quoted by double quotes'
],
[ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
[ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
];
}
/**
* @dataProvider provideCssCommentsFixtures
* @covers Sanitizer::checkCss
*/
public function testCssCommentsChecking( $expected, $css, $message = '' ) {
$this->assertSame( $expected,
Sanitizer::checkCss( $css ),
$message
);
}
public static function provideCssCommentsFixtures() {
/** [ <expected>, <css>, [message] ] */
return [
// Valid comments spanning entire input
[ '/**/', '/**/' ],
[ '/* comment */', '/* comment */' ],
// Weird stuff
[ ' ', '/****/' ],
[ ' ', '/* /* */' ],
[ 'display: block;', "display:/* foo */block;" ],
[ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
'Backslash-escaped comments must be stripped (T30450)' ],
[ '', '/* unfinished comment structure',
'Remove anything after a comment-start token' ],
[ '', "\\2f\\2a unifinished comment'",
'Remove anything after a backslash-escaped comment-start token' ],
[
'/* insecure input */',
'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
. '(src=\'asdf.png\',sizingMethod=\'scale\');'
],
[
'/* insecure input */',
'-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
. '(src=\'asdf.png\',sizingMethod=\'scale\')";'
],
[ '/* insecure input */', 'width: expression(1+1);' ],
[ '/* insecure input */', 'background-image: image(asdf.png);' ],
[ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
[ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
[ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
[
'/* insecure input */',
'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
],
[
'/* insecure input */',
'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
],
[ '/* insecure input */', 'foo: attr( title, url );' ],
[ '/* insecure input */', 'foo: attr( title url );' ],
[ '/* insecure input */', 'foo: var(--evil-attribute)' ],
];
}
/**
* @dataProvider provideEscapeHtmlAllowEntities
* @covers Sanitizer::escapeHtmlAllowEntities
*/
public function testEscapeHtmlAllowEntities( $expected, $html ) {
$this->assertSame(
$expected,
Sanitizer::escapeHtmlAllowEntities( $html )
);
}
public static function provideEscapeHtmlAllowEntities() {
return [
[ 'foo', 'foo' ],
[ 'a¡b', 'a&#161;b' ],
[ 'foo&#039;bar', "foo'bar" ],
[ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
];
}
/**
* Test Sanitizer::escapeId
*
* @dataProvider provideEscapeId
* @covers Sanitizer::escapeId
*/
public function testEscapeId( $input, $output ) {
$this->hideDeprecated( 'Sanitizer::escapeId' );
$this->assertSame(
$output,
Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
);
}
public static function provideEscapeId() {
return [
[ '+', '.2B' ],
[ '&', '.26' ],
[ '=', '.3D' ],
[ ':', ':' ],
[ ';', '.3B' ],
[ '@', '.40' ],
[ '$', '.24' ],
[ '-_.', '-_.' ],
[ '!', '.21' ],
[ '*', '.2A' ],
[ '/', '.2F' ],
[ '[]', '.5B.5D' ],
[ '<>', '.3C.3E' ],
[ '\'', '.27' ],
[ '§', '.C2.A7' ],
[ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
[ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
];
}
/**
* @dataProvider provideIsReservedDataAttribute
* @covers Sanitizer::isReservedDataAttribute
*/
public function testIsReservedDataAttribute( $attr, $expected ) {
$this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
}
public static function provideIsReservedDataAttribute() {
return [
[ 'foo', false ],
[ 'data', false ],
[ 'data-foo', false ],
[ 'data-mw', true ],
[ 'data-ooui', true ],
[ 'data-parsoid', true ],
[ 'data-mw-foo', true ],
[ 'data-ooui-foo', true ],
[ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
];
}
/**
* @dataProvider provideStripAllTags
*
* @covers Sanitizer::stripAllTags()
* @covers RemexStripTagHandler
*
* @param string $input
* @param string $expected
*/
public function testStripAllTags( $input, $expected ) {
$this->assertSame( $expected, Sanitizer::stripAllTags( $input ) );
}
public function provideStripAllTags() {
return [
[ '<p>Foo</p>', 'Foo' ],
[ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
[ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
[ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
[
'<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
'Bar Whee!'
],
[ '1<span class="<?php">2</span>3', '123' ],
[ '1<span class="<?">2</span>3', '123' ],
[ '<th>1</th><td>2</td>', '1 2' ],
];
}
}