2013-09-16 16:54:32 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @group HtmlFormatter
|
|
|
|
|
*/
|
|
|
|
|
class HtmlFormatterTest extends MediaWikiTestCase {
|
2014-12-18 22:56:58 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Use TidySupport to check whether we should use $wgTidyInternal.
|
|
|
|
|
*
|
|
|
|
|
* The Tidy extension in HHVM does not support error text return, so it is
|
|
|
|
|
* nominally usable, but does not pass tests which require error text from
|
|
|
|
|
* Tidy.
|
|
|
|
|
*/
|
|
|
|
|
protected function setUp() {
|
|
|
|
|
parent::setUp();
|
|
|
|
|
$tidySupport = new TidySupport();
|
|
|
|
|
$this->setMwGlobals( 'wgTidyInternal', $tidySupport->isInternal() );
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-16 16:54:32 +00:00
|
|
|
/**
|
|
|
|
|
* @dataProvider getHtmlData
|
2014-04-30 18:46:52 +00:00
|
|
|
*
|
|
|
|
|
* @param string $input
|
2014-07-24 12:55:43 +00:00
|
|
|
* @param string $expectedText
|
2014-04-30 18:46:52 +00:00
|
|
|
* @param array $expectedRemoved
|
|
|
|
|
* @param callable|bool $callback
|
2013-09-16 16:54:32 +00:00
|
|
|
*/
|
2014-04-24 10:05:52 +00:00
|
|
|
public function testTransform( $input, $expectedText,
|
|
|
|
|
$expectedRemoved = array(), $callback = false
|
|
|
|
|
) {
|
2013-09-16 16:54:32 +00:00
|
|
|
$input = self::normalize( $input );
|
|
|
|
|
$formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
|
|
|
|
|
if ( $callback ) {
|
|
|
|
|
$callback( $formatter );
|
|
|
|
|
}
|
2014-04-18 13:16:04 +00:00
|
|
|
$removedElements = $formatter->filterContent();
|
2013-09-16 16:54:32 +00:00
|
|
|
$html = $formatter->getText();
|
2014-04-18 13:16:04 +00:00
|
|
|
$removed = array();
|
|
|
|
|
foreach ( $removedElements as $removedElement ) {
|
|
|
|
|
$removed[] = self::normalize( $formatter->getText( $removedElement ) );
|
|
|
|
|
}
|
|
|
|
|
$expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
|
2013-12-03 17:42:48 +00:00
|
|
|
|
|
|
|
|
$this->assertValidHtmlSnippet( $html );
|
2014-04-18 13:16:04 +00:00
|
|
|
$this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
|
|
|
|
|
$this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
|
2013-09-16 16:54:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static function normalize( $s ) {
|
|
|
|
|
return str_replace( "\n", '',
|
|
|
|
|
str_replace( "\r", '', $s ) // "yay" to Windows!
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function getHtmlData() {
|
2014-07-20 19:41:41 +00:00
|
|
|
$removeImages = function ( HtmlFormatter $f ) {
|
2013-09-16 16:54:32 +00:00
|
|
|
$f->setRemoveMedia();
|
|
|
|
|
};
|
2014-07-20 19:41:41 +00:00
|
|
|
$removeTags = function ( HtmlFormatter $f ) {
|
2013-09-16 16:54:32 +00:00
|
|
|
$f->remove( array( 'table', '.foo', '#bar', 'div.baz' ) );
|
|
|
|
|
};
|
2014-07-20 19:41:41 +00:00
|
|
|
$flattenSomeStuff = function ( HtmlFormatter $f ) {
|
2013-09-16 16:54:32 +00:00
|
|
|
$f->flatten( array( 's', 'div' ) );
|
|
|
|
|
};
|
2014-07-20 19:41:41 +00:00
|
|
|
$flattenEverything = function ( HtmlFormatter $f ) {
|
2013-09-16 16:54:32 +00:00
|
|
|
$f->flattenAllTags();
|
|
|
|
|
};
|
|
|
|
|
return array(
|
|
|
|
|
// remove images if asked
|
|
|
|
|
array(
|
|
|
|
|
'<img src="/foo/bar.jpg" alt="Blah"/>',
|
|
|
|
|
'',
|
2014-04-18 13:16:04 +00:00
|
|
|
array( '<img src="/foo/bar.jpg" alt="Blah">' ),
|
2013-09-16 16:54:32 +00:00
|
|
|
$removeImages,
|
|
|
|
|
),
|
|
|
|
|
// basic tag removal
|
|
|
|
|
array(
|
2014-04-24 10:05:52 +00:00
|
|
|
// @codingStandardsIgnoreStart Ignore long line warnings.
|
2013-09-16 16:54:32 +00:00
|
|
|
'<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
|
|
|
|
|
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
|
|
|
|
|
<span class="baz">baz</span>',
|
2014-04-24 10:05:52 +00:00
|
|
|
// @codingStandardsIgnoreEnd
|
2013-09-16 16:54:32 +00:00
|
|
|
'<div class="notfoo">test</div>
|
|
|
|
|
<span class="baz">baz</span>',
|
2014-04-18 13:16:04 +00:00
|
|
|
array(
|
|
|
|
|
'<table><tr><td>foo</td></tr></table>',
|
|
|
|
|
'<div class="foo">foo</div>',
|
|
|
|
|
'<div class="foo quux">foo</div>',
|
|
|
|
|
'<span id="bar">bar</span>',
|
|
|
|
|
'<strong class="foo" id="bar">foobar</strong>',
|
|
|
|
|
'<div class="baz"/>',
|
|
|
|
|
),
|
2013-09-16 16:54:32 +00:00
|
|
|
$removeTags,
|
|
|
|
|
),
|
|
|
|
|
// don't flatten tags that start like chosen ones
|
|
|
|
|
array(
|
|
|
|
|
'<div><s>foo</s> <span>bar</span></div>',
|
|
|
|
|
'foo <span>bar</span>',
|
2014-04-18 13:16:04 +00:00
|
|
|
array(),
|
2013-09-16 16:54:32 +00:00
|
|
|
$flattenSomeStuff,
|
|
|
|
|
),
|
|
|
|
|
// total flattening
|
|
|
|
|
array(
|
|
|
|
|
'<div style="foo">bar<sup>2</sup></div>',
|
|
|
|
|
'bar2',
|
2014-04-18 13:16:04 +00:00
|
|
|
array(),
|
2013-09-16 16:54:32 +00:00
|
|
|
$flattenEverything,
|
|
|
|
|
),
|
|
|
|
|
// UTF-8 preservation and security
|
|
|
|
|
array(
|
|
|
|
|
'<span title="" \' &"><Тест!></span> &<&&&&',
|
|
|
|
|
'<span title="" \' &"><Тест!></span> &<&&&&',
|
2014-04-30 18:46:52 +00:00
|
|
|
array(),
|
|
|
|
|
$removeTags, // Have some rules to trigger a DOM parse
|
2013-09-16 16:54:32 +00:00
|
|
|
),
|
2015-09-12 13:54:13 +00:00
|
|
|
// https://phabricator.wikimedia.org/T55086
|
2013-09-16 16:54:32 +00:00
|
|
|
array(
|
2014-04-24 10:05:52 +00:00
|
|
|
'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
|
|
|
|
|
. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
|
|
|
|
|
'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
|
|
|
|
|
. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
|
2013-09-16 16:54:32 +00:00
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
}
|
2014-04-30 18:46:52 +00:00
|
|
|
|
|
|
|
|
public function testQuickProcessing() {
|
|
|
|
|
$f = new MockHtmlFormatter( 'foo' );
|
|
|
|
|
$f->filterContent();
|
|
|
|
|
$this->assertFalse( $f->hasDoc, 'HtmlFormatter should not needlessly parse HTML' );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class MockHtmlFormatter extends HtmlFormatter {
|
|
|
|
|
public $hasDoc = false;
|
|
|
|
|
|
|
|
|
|
public function getDoc() {
|
|
|
|
|
$this->hasDoc = true;
|
|
|
|
|
return parent::getDoc();
|
|
|
|
|
}
|
2013-09-16 16:54:32 +00:00
|
|
|
}
|