2016-03-15 19:13:43 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
class BalancerTest extends MediaWikiTestCase {
|
|
|
|
|
private $balancer;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Anything that needs to happen before your tests should go here.
|
|
|
|
|
*/
|
|
|
|
|
protected function setUp() {
|
|
|
|
|
// Be sure to do call the parent setup and teardown functions.
|
|
|
|
|
// This makes sure that all the various cleanup and restorations
|
|
|
|
|
// happen as they should (including the restoration for setMwGlobals).
|
|
|
|
|
parent::setUp();
|
|
|
|
|
$this->balancer = new MediaWiki\Tidy\Balancer( [
|
|
|
|
|
'strict' => false, /* not strict */
|
|
|
|
|
'allowedHtmlElements' => null, /* no sanitization */
|
2016-06-14 21:59:20 +00:00
|
|
|
'tidyCompat' => false, /* standard parser */
|
2016-07-15 16:36:35 +00:00
|
|
|
'allowComments' => true, /* comment parsing */
|
2016-03-15 19:13:43 +00:00
|
|
|
] );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Anything cleanup you need to do should go here.
|
|
|
|
|
*/
|
|
|
|
|
protected function tearDown() {
|
|
|
|
|
parent::tearDown();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @covers Balancer::balance
|
|
|
|
|
* @dataProvider provideBalancerTests
|
|
|
|
|
*/
|
|
|
|
|
public function testBalancer( $description, $input, $expected ) {
|
|
|
|
|
$output = $this->balancer->balance( $input );
|
2016-07-04 05:15:18 +00:00
|
|
|
|
|
|
|
|
// Ignore self-closing tags
|
|
|
|
|
$output = preg_replace( '/\s*\/>/', '>', $output );
|
|
|
|
|
|
2016-03-15 19:13:43 +00:00
|
|
|
$this->assertEquals( $expected, $output, $description );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static function provideBalancerTests() {
|
|
|
|
|
// Get the tests from html5lib-tests.json
|
|
|
|
|
$json = json_decode( file_get_contents(
|
|
|
|
|
__DIR__ . '/html5lib-tests.json'
|
|
|
|
|
), true );
|
|
|
|
|
// Munge this slightly into the format phpunit expects
|
|
|
|
|
// for providers, and filter out HTML constructs which
|
|
|
|
|
// the balancer doesn't support.
|
|
|
|
|
$tests = [];
|
|
|
|
|
$start = '<html><head></head><body>';
|
|
|
|
|
$end = '</body></html>';
|
|
|
|
|
foreach ( $json as $filename => $cases ) {
|
|
|
|
|
foreach ( $cases as $case ) {
|
|
|
|
|
$html = $case['document']['html'];
|
|
|
|
|
if (
|
|
|
|
|
substr( $html, 0, strlen( $start ) ) !== $start ||
|
|
|
|
|
substr( $html, -strlen( $end ) ) !== $end
|
|
|
|
|
) {
|
|
|
|
|
// Skip tests which involve stuff in the <head> or
|
|
|
|
|
// weird doctypes.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// We used to do this:
|
|
|
|
|
// $html = substr( $html, strlen( $start ), -strlen( $end ) );
|
|
|
|
|
// But now we use a different field in the test case,
|
|
|
|
|
// which reports how domino would parse this case in a
|
|
|
|
|
// no-quirks <body> context. (The original test case may
|
|
|
|
|
// have had a different context, or relied on quirks mode.)
|
|
|
|
|
$html = $case['document']['noQuirksBodyHtml'];
|
|
|
|
|
// Normalize case of SVG attributes.
|
|
|
|
|
$html = str_replace( 'foreignObject', 'foreignobject', $html );
|
|
|
|
|
|
2016-07-15 16:36:35 +00:00
|
|
|
if (
|
|
|
|
|
isset( $case['document']['props']['comment'] ) &&
|
|
|
|
|
preg_match( ',<!--[^>]*<,', $html )
|
|
|
|
|
) {
|
|
|
|
|
// Skip tests which include HTML comments containing
|
|
|
|
|
// the < character, which we don't support.
|
2016-03-15 19:13:43 +00:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
|
|
|
|
|
// Skip tests involving <![CDATA[ ]]> quoting.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if ( stripos( $case['data'], '<!DOCTYPE' ) !== false ) {
|
|
|
|
|
// Skip tests involving doctypes.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-07-14 22:00:20 +00:00
|
|
|
if ( preg_match( ',</?(html|head|body|frame|plaintext)>|<rdar:|<isindex,i', $case['data'] ) ) {
|
2016-03-15 19:13:43 +00:00
|
|
|
// Skip tests involving some literal tags, which are
|
|
|
|
|
// unsupported but don't show up in the expected output.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
isset( $case['document']['props']['tags']['iframe'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['noembed'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['noscript'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['script'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['svg script'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['svg title'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['title'] ) ||
|
|
|
|
|
isset( $case['document']['props']['tags']['xmp'] )
|
|
|
|
|
) {
|
|
|
|
|
// Skip tests with unsupported tags which *do* show
|
|
|
|
|
// up in the expected output.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
$filename === 'entities01.dat' ||
|
|
|
|
|
$filename === 'entities02.dat' ||
|
|
|
|
|
preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
|
|
|
|
|
preg_match( '/^(&|&#|&#X|&#x|-|&x-test|&)$/', $case['data'] )
|
|
|
|
|
) {
|
|
|
|
|
// Skip tests involving entity encoding.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
isset( $case['document']['props']['tagWithLt'] ) ||
|
|
|
|
|
isset( $case['document']['props']['attrWithFunnyChar'] ) ||
|
|
|
|
|
preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
|
|
|
|
|
preg_match( ':</p<p>:', $case['data'] )
|
|
|
|
|
) {
|
|
|
|
|
// Skip tests with funny tag or attribute names,
|
|
|
|
|
// which are really tests of the HTML tokenizer, not
|
|
|
|
|
// the tree builder.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
stripos( $case['data'], 'encoding=" text/html "' ) !== false
|
|
|
|
|
) {
|
|
|
|
|
// The Sanitizer normalizes whitespace in attribute
|
|
|
|
|
// values, which makes this test case invalid.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if ( $filename === 'plain-text-unsafe.dat' ) {
|
|
|
|
|
// Skip tests with ASCII null, etc.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$tests[] = [
|
|
|
|
|
$filename, # use better description?
|
|
|
|
|
$case['data'],
|
|
|
|
|
$html
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return $tests;
|
|
|
|
|
}
|
|
|
|
|
}
|