Make HtmlFormatter return removed elements
This shouldn't cause much overhead and is useful for getting the text of the removed tags. Change-Id: I97cf66014719244b8bb2b0509b419c82202bdb01
This commit is contained in:
parent
0c04628d6c
commit
6289f3c48c
2 changed files with 34 additions and 12 deletions
|
|
@ -128,7 +128,9 @@ class HtmlFormatter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Removes content we've chosen to remove
|
||||
* Removes content we've chosen to remove. The text of the removed elements can be
|
||||
* extracted with the getText method.
|
||||
* @return array of removed DOMElements
|
||||
*/
|
||||
public function filterContent() {
|
||||
wfProfileIn( __METHOD__ );
|
||||
|
|
@ -156,8 +158,7 @@ class HtmlFormatter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->removeElements( $domElemsToRemove );
|
||||
$removed = $this->removeElements( $domElemsToRemove );
|
||||
|
||||
// Elements with named IDs
|
||||
$domElemsToRemove = array();
|
||||
|
|
@ -167,7 +168,7 @@ class HtmlFormatter {
|
|||
$domElemsToRemove[] = $itemToRemoveNode;
|
||||
}
|
||||
}
|
||||
$this->removeElements( $domElemsToRemove );
|
||||
$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
|
||||
|
||||
// CSS Classes
|
||||
$domElemsToRemove = array();
|
||||
|
|
@ -183,7 +184,7 @@ class HtmlFormatter {
|
|||
}
|
||||
}
|
||||
}
|
||||
$this->removeElements( $domElemsToRemove );
|
||||
$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
|
||||
|
||||
// Tags with CSS Classes
|
||||
foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
|
||||
|
|
@ -192,16 +193,17 @@ class HtmlFormatter {
|
|||
$elements = $xpath->query(
|
||||
'//' . $parts[0] . '[@class="' . $parts[1] . '"]'
|
||||
);
|
||||
|
||||
$this->removeElements( $elements );
|
||||
$removed = array_merge( $removed, $this->removeElements( $elements ) );
|
||||
}
|
||||
|
||||
wfProfileOut( __METHOD__ );
|
||||
return $removed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a list of elelments from DOMDocument
|
||||
* @param array|DOMNodeList $elements
|
||||
* @return array of removed elements
|
||||
*/
|
||||
private function removeElements( $elements ) {
|
||||
$list = $elements;
|
||||
|
|
@ -217,6 +219,7 @@ class HtmlFormatter {
|
|||
$element->parentNode->removeChild( $element );
|
||||
}
|
||||
}
|
||||
return $list;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,7 +248,10 @@ class HtmlFormatter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Performs final transformations and returns resulting HTML
|
||||
* Performs final transformations and returns resulting HTML. Note that if you want to call this
|
||||
* both without an element and with an element you should call it without an element first. If you
|
||||
* specify the $element in the method it'll change the underlying dom and you won't be able to get
|
||||
* it back.
|
||||
*
|
||||
* @param DOMElement|string|null $element ID of element to get HTML from or false to get it from the whole tree
|
||||
* @return string Processed HTML
|
||||
|
|
|
|||
|
|
@ -8,17 +8,23 @@ class HtmlFormatterTest extends MediaWikiTestCase {
|
|||
* @dataProvider getHtmlData
|
||||
* @covers HtmlFormatter::getText
|
||||
*/
|
||||
public function testTransform( $input, $expected, $callback = false ) {
|
||||
public function testTransform( $input, $expectedText, $expectedRemoved = array(), $callback = false ) {
|
||||
$input = self::normalize( $input );
|
||||
$formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
|
||||
if ( $callback ) {
|
||||
$callback( $formatter );
|
||||
}
|
||||
$formatter->filterContent();
|
||||
$removedElements = $formatter->filterContent();
|
||||
$html = $formatter->getText();
|
||||
$removed = array();
|
||||
foreach ( $removedElements as $removedElement ) {
|
||||
$removed[] = self::normalize( $formatter->getText( $removedElement ) );
|
||||
}
|
||||
$expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
|
||||
|
||||
$this->assertValidHtmlSnippet( $html );
|
||||
$this->assertEquals( self::normalize( $expected ), self::normalize( $html ) );
|
||||
$this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
|
||||
$this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
|
||||
}
|
||||
|
||||
private static function normalize( $s ) {
|
||||
|
|
@ -45,6 +51,7 @@ class HtmlFormatterTest extends MediaWikiTestCase {
|
|||
array(
|
||||
'<img src="/foo/bar.jpg" alt="Blah"/>',
|
||||
'',
|
||||
array( '<img src="/foo/bar.jpg" alt="Blah">' ),
|
||||
$removeImages,
|
||||
),
|
||||
// basic tag removal
|
||||
|
|
@ -52,21 +59,30 @@ class HtmlFormatterTest extends MediaWikiTestCase {
|
|||
'<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
|
||||
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
|
||||
<span class="baz">baz</span>',
|
||||
|
||||
'<div class="notfoo">test</div>
|
||||
<span class="baz">baz</span>',
|
||||
array(
|
||||
'<table><tr><td>foo</td></tr></table>',
|
||||
'<div class="foo">foo</div>',
|
||||
'<div class="foo quux">foo</div>',
|
||||
'<span id="bar">bar</span>',
|
||||
'<strong class="foo" id="bar">foobar</strong>',
|
||||
'<div class="baz"/>',
|
||||
),
|
||||
$removeTags,
|
||||
),
|
||||
// don't flatten tags that start like chosen ones
|
||||
array(
|
||||
'<div><s>foo</s> <span>bar</span></div>',
|
||||
'foo <span>bar</span>',
|
||||
array(),
|
||||
$flattenSomeStuff,
|
||||
),
|
||||
// total flattening
|
||||
array(
|
||||
'<div style="foo">bar<sup>2</sup></div>',
|
||||
'bar2',
|
||||
array(),
|
||||
$flattenEverything,
|
||||
),
|
||||
// UTF-8 preservation and security
|
||||
|
|
|
|||
Loading…
Reference in a new issue