Walk the dom instead of using a queryselector in cleanUpTocLine
Change-Id: Ic59a4883f5b830c0c513e1836ad0de7c29a4b96d
This commit is contained in:
parent
89ddae6805
commit
b05e4b98ce
1 changed files with 36 additions and 27 deletions
|
|
@ -102,6 +102,8 @@ use Wikimedia\IPUtils;
|
|||
use Wikimedia\Parsoid\Core\SectionMetadata;
|
||||
use Wikimedia\Parsoid\Core\TOCData;
|
||||
use Wikimedia\Parsoid\DOM\DocumentFragment;
|
||||
use Wikimedia\Parsoid\DOM\Element;
|
||||
use Wikimedia\Parsoid\DOM\Node;
|
||||
use Wikimedia\Parsoid\Utils\DOMCompat;
|
||||
use Wikimedia\Parsoid\Utils\DOMUtils;
|
||||
use Wikimedia\ScopedCallback;
|
||||
|
|
@ -4142,7 +4144,8 @@ class Parser {
|
|||
->page( $this->getPage() );
|
||||
}
|
||||
|
||||
private function cleanUpTocLine( DocumentFragment $container ) {
|
||||
private function cleanUpTocLine( Node $container ) {
|
||||
'@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
|
||||
# Strip out HTML
|
||||
# Allowed tags are:
|
||||
# * <sup> and <sub> (T10393)
|
||||
|
|
@ -4155,35 +4158,41 @@ class Parser {
|
|||
# We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
|
||||
# to allow setting directionality in toc items.
|
||||
$allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
|
||||
$allNodes = DOMCompat::querySelectorAll( $container, '*' );
|
||||
foreach ( $allNodes as $node ) {
|
||||
$nodeName = DOMCompat::nodeName( $node );
|
||||
if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
|
||||
# Remove any <style> or <script> tags (T198618)
|
||||
DOMCompat::remove( $node );
|
||||
} elseif ( in_array( $nodeName, $allowedTags, true ) ) {
|
||||
// Keep tag, remove attributes
|
||||
$removeAttrs = [];
|
||||
foreach ( $node->attributes as $attr ) {
|
||||
if (
|
||||
$nodeName === 'span' && $attr->name === 'dir'
|
||||
&& ( $attr->value === 'rtl' || $attr->value === 'ltr' )
|
||||
) {
|
||||
// Keep <span dir="rtl"> and <span dir="ltr">
|
||||
continue;
|
||||
$node = $container->firstChild;
|
||||
while ( $node !== null ) {
|
||||
$next = $node->nextSibling;
|
||||
if ( $node instanceof Element ) {
|
||||
$nodeName = DOMCompat::nodeName( $node );
|
||||
if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
|
||||
# Remove any <style> or <script> tags (T198618)
|
||||
DOMCompat::remove( $node );
|
||||
} elseif ( in_array( $nodeName, $allowedTags, true ) ) {
|
||||
// Keep tag, remove attributes
|
||||
$removeAttrs = [];
|
||||
foreach ( $node->attributes as $attr ) {
|
||||
if (
|
||||
$nodeName === 'span' && $attr->name === 'dir'
|
||||
&& ( $attr->value === 'rtl' || $attr->value === 'ltr' )
|
||||
) {
|
||||
// Keep <span dir="rtl"> and <span dir="ltr">
|
||||
continue;
|
||||
}
|
||||
$removeAttrs[] = $attr;
|
||||
}
|
||||
$removeAttrs[] = $attr;
|
||||
foreach ( $removeAttrs as $attr ) {
|
||||
$node->removeAttributeNode( $attr );
|
||||
}
|
||||
$this->cleanUpTocLine( $node );
|
||||
} else {
|
||||
// Strip tag
|
||||
$next = $node->firstChild;
|
||||
while ( $childNode = $node->firstChild ) {
|
||||
$node->parentNode->insertBefore( $childNode, $node );
|
||||
}
|
||||
DOMCompat::remove( $node );
|
||||
}
|
||||
foreach ( $removeAttrs as $attr ) {
|
||||
$node->removeAttributeNode( $attr );
|
||||
}
|
||||
} else {
|
||||
// Strip tag
|
||||
while ( $childNode = $node->firstChild ) {
|
||||
$node->parentNode->insertBefore( $childNode, $node );
|
||||
}
|
||||
DOMCompat::remove( $node );
|
||||
}
|
||||
$node = $next;
|
||||
}
|
||||
|
||||
# Strip '<span></span>', which is the result from the above if
|
||||
|
|
|
|||
Loading…
Reference in a new issue