2017-02-17 04:10:15 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace MediaWiki\Tidy;
|
|
|
|
|
|
2024-02-08 23:12:50 +00:00
|
|
|
use InvalidArgumentException;
|
2021-08-08 20:50:34 +00:00
|
|
|
use Wikimedia\RemexHtml\HTMLData;
|
|
|
|
|
use Wikimedia\RemexHtml\Serializer\Serializer;
|
|
|
|
|
use Wikimedia\RemexHtml\Serializer\SerializerNode;
|
|
|
|
|
use Wikimedia\RemexHtml\Tokenizer\Attributes;
|
|
|
|
|
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
|
|
|
|
|
use Wikimedia\RemexHtml\TreeBuilder\Element;
|
|
|
|
|
use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
|
|
|
|
|
use Wikimedia\RemexHtml\TreeBuilder\TreeHandler;
|
2017-02-17 04:10:15 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @internal
|
|
|
|
|
*/
|
|
|
|
|
class RemexCompatMunger implements TreeHandler {
|
2024-09-11 20:54:17 +00:00
|
|
|
private const ONLY_INLINE_ELEMENTS = [
|
2017-02-17 04:10:15 +00:00
|
|
|
"a" => true,
|
|
|
|
|
"abbr" => true,
|
|
|
|
|
"acronym" => true,
|
|
|
|
|
"applet" => true,
|
|
|
|
|
"b" => true,
|
|
|
|
|
"basefont" => true,
|
|
|
|
|
"bdo" => true,
|
|
|
|
|
"big" => true,
|
|
|
|
|
"br" => true,
|
|
|
|
|
"button" => true,
|
|
|
|
|
"cite" => true,
|
|
|
|
|
"code" => true,
|
2018-07-12 18:31:04 +00:00
|
|
|
"del" => true,
|
2017-02-17 04:10:15 +00:00
|
|
|
"dfn" => true,
|
|
|
|
|
"em" => true,
|
|
|
|
|
"font" => true,
|
|
|
|
|
"i" => true,
|
|
|
|
|
"iframe" => true,
|
|
|
|
|
"img" => true,
|
|
|
|
|
"input" => true,
|
2018-07-12 18:31:04 +00:00
|
|
|
"ins" => true,
|
2017-02-17 04:10:15 +00:00
|
|
|
"kbd" => true,
|
|
|
|
|
"label" => true,
|
|
|
|
|
"legend" => true,
|
|
|
|
|
"map" => true,
|
|
|
|
|
"object" => true,
|
|
|
|
|
"param" => true,
|
|
|
|
|
"q" => true,
|
|
|
|
|
"rb" => true,
|
|
|
|
|
"rbc" => true,
|
|
|
|
|
"rp" => true,
|
|
|
|
|
"rt" => true,
|
|
|
|
|
"rtc" => true,
|
|
|
|
|
"ruby" => true,
|
|
|
|
|
"s" => true,
|
|
|
|
|
"samp" => true,
|
|
|
|
|
"select" => true,
|
|
|
|
|
"small" => true,
|
|
|
|
|
"span" => true,
|
|
|
|
|
"strike" => true,
|
|
|
|
|
"strong" => true,
|
|
|
|
|
"sub" => true,
|
|
|
|
|
"sup" => true,
|
|
|
|
|
"textarea" => true,
|
|
|
|
|
"tt" => true,
|
|
|
|
|
"u" => true,
|
|
|
|
|
"var" => true,
|
2018-04-05 00:20:38 +00:00
|
|
|
// Those defined in tidy.conf
|
|
|
|
|
"video" => true,
|
|
|
|
|
"audio" => true,
|
|
|
|
|
"bdi" => true,
|
|
|
|
|
"data" => true,
|
|
|
|
|
"time" => true,
|
|
|
|
|
"mark" => true,
|
2017-02-17 04:10:15 +00:00
|
|
|
];
|
|
|
|
|
|
2019-01-27 22:13:10 +00:00
|
|
|
/**
|
|
|
|
|
* For the purposes of this class, "metadata" elements are those that
|
|
|
|
|
* should neither trigger p-wrapping nor stop an outer p-wrapping,
|
|
|
|
|
* typically those that are themselves invisible in a browser's rendering.
|
|
|
|
|
* This isn't a complete list, it's just the tags that we're likely to
|
|
|
|
|
* encounter in practice.
|
|
|
|
|
*/
|
2024-09-11 20:54:17 +00:00
|
|
|
private const METADATA_ELEMENTS = [
|
2019-01-27 22:13:10 +00:00
|
|
|
'style' => true,
|
|
|
|
|
'script' => true,
|
|
|
|
|
'link' => true,
|
2021-12-21 03:26:38 +00:00
|
|
|
// Except for the TableOfContentsMarker (see ::isTableOfContentsMarker()
|
|
|
|
|
// and Parser::TOC_PLACEHOLDER) which should break a paragraph.
|
2019-01-27 22:13:10 +00:00
|
|
|
'meta' => true,
|
|
|
|
|
];
|
|
|
|
|
|
2024-09-11 20:54:17 +00:00
|
|
|
private const FORMATTING_ELEMENTS = [
|
2017-02-17 04:10:15 +00:00
|
|
|
'a' => true,
|
|
|
|
|
'b' => true,
|
|
|
|
|
'big' => true,
|
|
|
|
|
'code' => true,
|
|
|
|
|
'em' => true,
|
|
|
|
|
'font' => true,
|
|
|
|
|
'i' => true,
|
|
|
|
|
'nobr' => true,
|
|
|
|
|
's' => true,
|
|
|
|
|
'small' => true,
|
|
|
|
|
'strike' => true,
|
|
|
|
|
'strong' => true,
|
|
|
|
|
'tt' => true,
|
|
|
|
|
'u' => true,
|
|
|
|
|
];
|
|
|
|
|
|
2018-08-06 02:15:25 +00:00
|
|
|
/** @var Serializer */
|
|
|
|
|
private $serializer;
|
|
|
|
|
|
|
|
|
|
/** @var bool */
|
|
|
|
|
private $trace;
|
|
|
|
|
|
2017-02-17 04:10:15 +00:00
|
|
|
/**
|
|
|
|
|
* @param Serializer $serializer
|
2018-08-06 02:15:25 +00:00
|
|
|
* @param bool $trace
|
2017-02-17 04:10:15 +00:00
|
|
|
*/
|
2018-08-06 02:15:25 +00:00
|
|
|
public function __construct( Serializer $serializer, $trace = false ) {
|
2017-02-17 04:10:15 +00:00
|
|
|
$this->serializer = $serializer;
|
2018-08-06 02:15:25 +00:00
|
|
|
$this->trace = $trace;
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function startDocument( $fragmentNamespace, $fragmentName ) {
|
|
|
|
|
$this->serializer->startDocument( $fragmentNamespace, $fragmentName );
|
|
|
|
|
$root = $this->serializer->getRootNode();
|
|
|
|
|
$root->snData = new RemexMungerData;
|
|
|
|
|
$root->snData->needsPWrapping = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function endDocument( $pos ) {
|
|
|
|
|
$this->serializer->endDocument( $pos );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function getParentForInsert( $preposition, $refElement ) {
|
|
|
|
|
if ( $preposition === TreeBuilder::ROOT ) {
|
|
|
|
|
return [ $this->serializer->getRootNode(), null ];
|
|
|
|
|
} elseif ( $preposition === TreeBuilder::BEFORE ) {
|
|
|
|
|
$refNode = $refElement->userData;
|
|
|
|
|
return [ $this->serializer->getParentNode( $refNode ), $refNode ];
|
|
|
|
|
} else {
|
|
|
|
|
$refNode = $refElement->userData;
|
|
|
|
|
$refData = $refNode->snData;
|
|
|
|
|
if ( $refData->currentCloneElement ) {
|
|
|
|
|
// Follow a chain of clone links if necessary
|
|
|
|
|
$origRefData = $refData;
|
|
|
|
|
while ( $refData->currentCloneElement ) {
|
|
|
|
|
$refElement = $refData->currentCloneElement;
|
|
|
|
|
$refNode = $refElement->userData;
|
|
|
|
|
$refData = $refNode->snData;
|
|
|
|
|
}
|
|
|
|
|
// Cache the end of the chain in the requested element
|
|
|
|
|
$origRefData->currentCloneElement = $refElement;
|
|
|
|
|
} elseif ( $refData->childPElement ) {
|
|
|
|
|
$refElement = $refData->childPElement;
|
|
|
|
|
$refNode = $refElement->userData;
|
|
|
|
|
}
|
|
|
|
|
return [ $refNode, $refNode ];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Insert a p-wrapper
|
|
|
|
|
*
|
|
|
|
|
* @param SerializerNode $parent
|
2017-08-20 11:20:59 +00:00
|
|
|
* @param int $sourceStart
|
2017-02-17 04:10:15 +00:00
|
|
|
* @return SerializerNode
|
|
|
|
|
*/
|
|
|
|
|
private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
|
|
|
|
|
$pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
|
|
|
|
|
$this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
|
|
|
|
|
$sourceStart, 0 );
|
|
|
|
|
$data = new RemexMungerData;
|
|
|
|
|
$data->isPWrapper = true;
|
|
|
|
|
$data->wrapBaseNode = $parent;
|
|
|
|
|
$pWrap->userData->snData = $data;
|
|
|
|
|
$parent->snData->childPElement = $pWrap;
|
|
|
|
|
return $pWrap->userData;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function characters( $preposition, $refElement, $text, $start, $length,
|
|
|
|
|
$sourceStart, $sourceLength
|
|
|
|
|
) {
|
|
|
|
|
$isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
|
|
|
|
|
|
2022-10-21 04:32:38 +00:00
|
|
|
[ $parent, $refNode ] = $this->getParentForInsert( $preposition, $refElement );
|
2017-02-17 04:10:15 +00:00
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
|
|
|
|
|
if ( $preposition === TreeBuilder::UNDER ) {
|
|
|
|
|
if ( $parentData->needsPWrapping && !$isBlank ) {
|
|
|
|
|
// Add a p-wrapper for bare text under body/blockquote
|
|
|
|
|
$refNode = $this->insertPWrapper( $refNode, $sourceStart );
|
|
|
|
|
$parent = $refNode;
|
|
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
} elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
|
|
|
|
|
// The parent is splittable and in block mode, so split the tag stack
|
|
|
|
|
$refNode = $this->splitTagStack( $refNode, true, $sourceStart );
|
|
|
|
|
$parent = $refNode;
|
|
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( !$isBlank ) {
|
|
|
|
|
// Non-whitespace characters detected
|
|
|
|
|
$parentData->nonblankNodeCount++;
|
|
|
|
|
}
|
|
|
|
|
$this->serializer->characters( $preposition, $refNode, $text, $start,
|
|
|
|
|
$length, $sourceStart, $sourceLength );
|
|
|
|
|
}
|
|
|
|
|
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
private function trace( $msg ) {
|
2018-08-06 02:15:25 +00:00
|
|
|
if ( $this->trace ) {
|
|
|
|
|
wfDebug( "[RCM] $msg" );
|
|
|
|
|
}
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
}
|
|
|
|
|
|
2017-02-17 04:10:15 +00:00
|
|
|
/**
|
|
|
|
|
* Insert or reparent an element. Create p-wrappers or split the tag stack
|
|
|
|
|
* as necessary.
|
|
|
|
|
*
|
|
|
|
|
* Consider the following insertion locations. The parent may be:
|
|
|
|
|
*
|
|
|
|
|
* - A: A body or blockquote (!!needsPWrapping)
|
|
|
|
|
* - B: A p-wrapper (!!isPWrapper)
|
|
|
|
|
* - C: A descendant of a p-wrapper (!!ancestorPNode)
|
|
|
|
|
* - CS: With splittable formatting elements in the stack region up to
|
|
|
|
|
* the p-wrapper
|
|
|
|
|
* - CU: With one or more unsplittable elements in the stack region up
|
|
|
|
|
* to the p-wrapper
|
|
|
|
|
* - D: Not a descendant of a p-wrapper (!ancestorNode)
|
|
|
|
|
* - DS: With splittable formatting elements in the stack region up to
|
|
|
|
|
* the body or blockquote
|
|
|
|
|
* - DU: With one or more unsplittable elements in the stack region up
|
|
|
|
|
* to the body or blockquote
|
|
|
|
|
*
|
|
|
|
|
* And consider that we may insert two types of element:
|
|
|
|
|
* - b: block
|
|
|
|
|
* - i: inline
|
|
|
|
|
*
|
|
|
|
|
* We handle the insertion as follows:
|
|
|
|
|
*
|
|
|
|
|
* - A/i: Create a p-wrapper, insert under it
|
|
|
|
|
* - A/b: Insert as normal
|
|
|
|
|
* - B/i: Insert as normal
|
|
|
|
|
* - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
|
|
|
|
|
* base) instead)
|
|
|
|
|
* - C/i: Insert as normal
|
|
|
|
|
* - CS/b: Split the tag stack, insert the block under cloned formatting
|
|
|
|
|
* elements which have the wrap base (the parent of the p-wrap) as
|
|
|
|
|
* their ultimate parent.
|
|
|
|
|
* - CU/b: Disable the p-wrap, by reparenting the currently open child
|
|
|
|
|
* of the p-wrap under the p-wrap's parent. Then insert the block as
|
|
|
|
|
* normal.
|
|
|
|
|
* - D/b: Insert as normal
|
|
|
|
|
* - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
|
|
|
|
|
* parent of the formatting elements thus cloned. The parent of the
|
|
|
|
|
* p-wrapper is the body or blockquote.
|
|
|
|
|
* - DU/i: Insert as normal
|
|
|
|
|
*
|
|
|
|
|
* FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
|
|
|
|
|
* normal, the full algorithm is not followed.
|
|
|
|
|
*
|
2017-08-20 11:20:59 +00:00
|
|
|
* @param int $preposition
|
2017-02-17 04:10:15 +00:00
|
|
|
* @param Element|SerializerNode|null $refElement
|
|
|
|
|
* @param Element $element
|
|
|
|
|
* @param bool $void
|
2017-08-20 11:20:59 +00:00
|
|
|
* @param int $sourceStart
|
|
|
|
|
* @param int $sourceLength
|
2017-02-17 04:10:15 +00:00
|
|
|
*/
|
|
|
|
|
public function insertElement( $preposition, $refElement, Element $element, $void,
|
|
|
|
|
$sourceStart, $sourceLength
|
|
|
|
|
) {
|
2022-10-21 04:32:38 +00:00
|
|
|
[ $parent, $newRef ] = $this->getParentForInsert( $preposition, $refElement );
|
2017-02-17 04:10:15 +00:00
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
$elementName = $element->htmlName;
|
|
|
|
|
|
2024-09-11 20:54:17 +00:00
|
|
|
$inline = isset( self::ONLY_INLINE_ELEMENTS[$elementName] );
|
2017-02-17 04:10:15 +00:00
|
|
|
$under = $preposition === TreeBuilder::UNDER;
|
|
|
|
|
|
2024-09-11 20:54:17 +00:00
|
|
|
if ( isset( self::METADATA_ELEMENTS[$elementName] )
|
2021-12-21 03:26:38 +00:00
|
|
|
&& !self::isTableOfContentsMarker( $element )
|
|
|
|
|
) {
|
2019-01-27 22:13:10 +00:00
|
|
|
// The element is a metadata element, that we allow to appear in
|
|
|
|
|
// both inline and block contexts.
|
|
|
|
|
$this->trace( 'insert metadata' );
|
|
|
|
|
} elseif ( $under && $parentData->isPWrapper && !$inline ) {
|
2017-02-17 04:10:15 +00:00
|
|
|
// [B/b] The element is non-inline and the parent is a p-wrapper,
|
|
|
|
|
// close the parent and insert into its parent instead
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
$this->trace( 'insert B/b' );
|
2017-02-17 04:10:15 +00:00
|
|
|
$newParent = $this->serializer->getParentNode( $parent );
|
|
|
|
|
$parent = $newParent;
|
|
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
$parentData->childPElement = null;
|
|
|
|
|
$newRef = $refElement->userData;
|
|
|
|
|
} elseif ( $under && $parentData->isSplittable
|
|
|
|
|
&& (bool)$parentData->ancestorPNode !== $inline
|
|
|
|
|
) {
|
|
|
|
|
// [CS/b, DS/i] The parent is splittable and the current element is
|
|
|
|
|
// inline in block context, or if the current element is a block
|
|
|
|
|
// under a p-wrapper, split the tag stack.
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
$this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
|
2017-02-17 04:10:15 +00:00
|
|
|
$newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
|
|
|
|
|
$parent = $newRef;
|
|
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
} elseif ( $under && $parentData->needsPWrapping && $inline ) {
|
|
|
|
|
// [A/i] If the element is inline and we are in body/blockquote,
|
|
|
|
|
// we need to create a p-wrapper
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
$this->trace( 'insert A/i' );
|
2017-02-17 04:10:15 +00:00
|
|
|
$newRef = $this->insertPWrapper( $newRef, $sourceStart );
|
|
|
|
|
$parent = $newRef;
|
|
|
|
|
$parentData = $parent->snData;
|
|
|
|
|
} elseif ( $parentData->ancestorPNode && !$inline ) {
|
|
|
|
|
// [CU/b] If the element is non-inline and (despite attempting to
|
|
|
|
|
// split above) there is still an ancestor p-wrap, disable that
|
|
|
|
|
// p-wrap
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
$this->trace( 'insert CU/b' );
|
2017-02-17 04:10:15 +00:00
|
|
|
$this->disablePWrapper( $parent, $sourceStart );
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
} else {
|
|
|
|
|
// [A/b, B/i, C/i, D/b, DU/i] insert as normal
|
|
|
|
|
$this->trace( 'insert normal' );
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// An element with element children is a non-blank element
|
|
|
|
|
$parentData->nonblankNodeCount++;
|
|
|
|
|
|
|
|
|
|
// Insert the element downstream and so initialise its userData
|
|
|
|
|
$this->serializer->insertElement( $preposition, $newRef,
|
|
|
|
|
$element, $void, $sourceStart, $sourceLength );
|
|
|
|
|
|
|
|
|
|
// Initialise snData
|
|
|
|
|
if ( !$element->userData->snData ) {
|
|
|
|
|
$elementData = $element->userData->snData = new RemexMungerData;
|
|
|
|
|
} else {
|
|
|
|
|
$elementData = $element->userData->snData;
|
|
|
|
|
}
|
|
|
|
|
if ( ( $parentData->isPWrapper || $parentData->isSplittable )
|
2024-09-11 20:54:17 +00:00
|
|
|
&& isset( self::FORMATTING_ELEMENTS[$elementName] )
|
2017-02-17 04:10:15 +00:00
|
|
|
) {
|
|
|
|
|
$elementData->isSplittable = true;
|
|
|
|
|
}
|
|
|
|
|
if ( $parentData->isPWrapper ) {
|
|
|
|
|
$elementData->ancestorPNode = $parent;
|
|
|
|
|
} elseif ( $parentData->ancestorPNode ) {
|
|
|
|
|
$elementData->ancestorPNode = $parentData->ancestorPNode;
|
|
|
|
|
}
|
|
|
|
|
if ( $parentData->wrapBaseNode ) {
|
|
|
|
|
$elementData->wrapBaseNode = $parentData->wrapBaseNode;
|
|
|
|
|
} elseif ( $parentData->needsPWrapping ) {
|
|
|
|
|
$elementData->wrapBaseNode = $parent;
|
|
|
|
|
}
|
|
|
|
|
if ( $elementName === 'body'
|
|
|
|
|
|| $elementName === 'blockquote'
|
|
|
|
|
|| $elementName === 'html'
|
|
|
|
|
) {
|
|
|
|
|
$elementData->needsPWrapping = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Clone nodes in a stack range and return the new parent
|
|
|
|
|
*
|
|
|
|
|
* @param SerializerNode $parentNode
|
|
|
|
|
* @param bool $inline
|
2017-08-20 11:20:59 +00:00
|
|
|
* @param int $pos The source position
|
2017-02-17 04:10:15 +00:00
|
|
|
* @return SerializerNode
|
|
|
|
|
*/
|
|
|
|
|
private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
|
|
|
|
|
$parentData = $parentNode->snData;
|
|
|
|
|
$wrapBase = $parentData->wrapBaseNode;
|
|
|
|
|
$pWrap = $parentData->ancestorPNode;
|
|
|
|
|
if ( !$pWrap ) {
|
|
|
|
|
$cloneEnd = $wrapBase;
|
|
|
|
|
} else {
|
|
|
|
|
$cloneEnd = $parentData->ancestorPNode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$serializer = $this->serializer;
|
|
|
|
|
$node = $parentNode;
|
|
|
|
|
$root = $serializer->getRootNode();
|
|
|
|
|
$nodes = [];
|
|
|
|
|
$removableNodes = [];
|
|
|
|
|
while ( $node !== $cloneEnd ) {
|
|
|
|
|
$nextParent = $serializer->getParentNode( $node );
|
|
|
|
|
if ( $nextParent === $root ) {
|
2024-02-08 23:12:50 +00:00
|
|
|
throw new InvalidArgumentException( 'Did not find end of clone range' );
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|
|
|
|
|
$nodes[] = $node;
|
|
|
|
|
if ( $node->snData->nonblankNodeCount === 0 ) {
|
|
|
|
|
$removableNodes[] = $node;
|
|
|
|
|
$nextParent->snData->nonblankNodeCount--;
|
|
|
|
|
}
|
|
|
|
|
$node = $nextParent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( $inline ) {
|
|
|
|
|
$pWrap = $this->insertPWrapper( $wrapBase, $pos );
|
|
|
|
|
$node = $pWrap;
|
|
|
|
|
} else {
|
|
|
|
|
if ( $pWrap ) {
|
|
|
|
|
// End the p-wrap which was open, cancel the diversion
|
|
|
|
|
$wrapBase->snData->childPElement = null;
|
|
|
|
|
}
|
|
|
|
|
$pWrap = null;
|
|
|
|
|
$node = $wrapBase;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
|
|
|
|
|
$oldNode = $nodes[$i];
|
|
|
|
|
$oldData = $oldNode->snData;
|
|
|
|
|
$nodeParent = $node;
|
|
|
|
|
$element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
|
|
|
|
|
$this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
|
|
|
|
|
$element, false, $pos, 0 );
|
|
|
|
|
$oldData->currentCloneElement = $element;
|
|
|
|
|
|
|
|
|
|
$newNode = $element->userData;
|
|
|
|
|
$newData = $newNode->snData = new RemexMungerData;
|
|
|
|
|
if ( $pWrap ) {
|
|
|
|
|
$newData->ancestorPNode = $pWrap;
|
|
|
|
|
}
|
|
|
|
|
$newData->isSplittable = true;
|
|
|
|
|
$newData->wrapBaseNode = $wrapBase;
|
|
|
|
|
$newData->isPWrapper = $oldData->isPWrapper;
|
|
|
|
|
|
|
|
|
|
$nodeParent->snData->nonblankNodeCount++;
|
|
|
|
|
|
|
|
|
|
$node = $newNode;
|
|
|
|
|
}
|
|
|
|
|
foreach ( $removableNodes as $rNode ) {
|
|
|
|
|
$fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
|
|
|
|
|
$fakeElement->userData = $rNode;
|
|
|
|
|
$this->serializer->removeNode( $fakeElement, $pos );
|
|
|
|
|
}
|
2021-10-25 19:15:52 +00:00
|
|
|
// @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive
|
2017-02-17 04:10:15 +00:00
|
|
|
return $node;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Find the ancestor of $node which is a child of a p-wrapper, and
|
|
|
|
|
* reparent that node so that it is placed after the end of the p-wrapper
|
2019-11-23 22:28:57 +00:00
|
|
|
* @param SerializerNode $node
|
|
|
|
|
* @param int $sourceStart
|
2017-02-17 04:10:15 +00:00
|
|
|
*/
|
|
|
|
|
private function disablePWrapper( SerializerNode $node, $sourceStart ) {
|
|
|
|
|
$nodeData = $node->snData;
|
|
|
|
|
$pWrapNode = $nodeData->ancestorPNode;
|
|
|
|
|
$newParent = $this->serializer->getParentNode( $pWrapNode );
|
|
|
|
|
if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
|
|
|
|
|
// Fostering or something? Abort!
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$nextParent = $node;
|
|
|
|
|
do {
|
|
|
|
|
$victim = $nextParent;
|
|
|
|
|
$victim->snData->ancestorPNode = null;
|
|
|
|
|
$nextParent = $this->serializer->getParentNode( $victim );
|
|
|
|
|
} while ( $nextParent !== $pWrapNode );
|
|
|
|
|
|
|
|
|
|
// Make a fake Element to use in a reparenting operation
|
|
|
|
|
$victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
|
|
|
|
|
$victimElement->userData = $victim;
|
|
|
|
|
|
|
|
|
|
// Reparent
|
|
|
|
|
$this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
|
|
|
|
|
false, $sourceStart, 0 );
|
|
|
|
|
|
|
|
|
|
// Decrement nonblank node count
|
|
|
|
|
$pWrapNode->snData->nonblankNodeCount--;
|
|
|
|
|
|
|
|
|
|
// Cancel the diversion so that no more elements are inserted under this p-wrap
|
|
|
|
|
$newParent->snData->childPElement = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function endTag( Element $element, $sourceStart, $sourceLength ) {
|
2017-03-23 01:50:14 +00:00
|
|
|
$data = $element->userData->snData;
|
|
|
|
|
if ( $data->childPElement ) {
|
|
|
|
|
$this->endTag( $data->childPElement, $sourceStart, 0 );
|
|
|
|
|
}
|
2017-02-17 04:10:15 +00:00
|
|
|
$this->serializer->endTag( $element, $sourceStart, $sourceLength );
|
2017-03-23 01:50:14 +00:00
|
|
|
$element->userData->snData = null;
|
|
|
|
|
$element->userData = null;
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
|
2020-06-27 01:13:01 +00:00
|
|
|
$this->serializer->doctype( $name, $public, $system, $quirks,
|
2017-02-17 04:10:15 +00:00
|
|
|
$sourceStart, $sourceLength );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
|
2022-10-21 04:32:38 +00:00
|
|
|
[ , $refNode ] = $this->getParentForInsert( $preposition, $refElement );
|
2019-04-15 16:07:31 +00:00
|
|
|
$this->serializer->comment( $preposition, $refNode, $text, $sourceStart, $sourceLength );
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function error( $text, $pos ) {
|
|
|
|
|
$this->serializer->error( $text, $pos );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
|
|
|
|
|
$this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function removeNode( Element $element, $sourceStart ) {
|
|
|
|
|
$this->serializer->removeNode( $element, $sourceStart );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
|
|
|
|
|
$self = $element->userData;
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
if ( $self->snData->childPElement ) {
|
|
|
|
|
// Reparent under the p-wrapper instead, so that e.g.
|
|
|
|
|
// <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
|
|
|
|
|
// becomes
|
|
|
|
|
// <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
|
|
|
|
|
|
|
|
|
|
// The formatting element should not be the parent of the p-wrap.
|
|
|
|
|
// Without this special case, the insertElement() of the <i> below
|
|
|
|
|
// would be diverted into the p-wrapper, causing infinite recursion
|
|
|
|
|
// (T178632)
|
|
|
|
|
$this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-17 04:10:15 +00:00
|
|
|
$children = $self->children;
|
|
|
|
|
$self->children = [];
|
|
|
|
|
$this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
|
|
|
|
|
$newParentNode = $newParent->userData;
|
|
|
|
|
$newParentId = $newParentNode->id;
|
|
|
|
|
foreach ( $children as $child ) {
|
|
|
|
|
if ( is_object( $child ) ) {
|
Fix RemexCompatMunger infinite recursion
When TreeBuilder requests reparenting of all child nodes of a given
element, we do this by removing the existing child nodes, and then
inserting the proposed new parent under the old parent. However, when a
p-wrap diversion is in place, the insertion of the new parent is
diverted into the p-wrap, and the p-wrap then becomes a child of the new
parent, causing a reference loop, and ultimately infinite recursion in
Serializer.
Instead, divert the entire reparent request to the p-wrap, so that the
new parent is a child of the p-wrap. This makes sense since the new
parent is always a formatting element. The only caller of
reparentChildren(), apart from proxies, is AAA step 17, which reparents
children under the formatting element cloned from the AFE list.
Left in some debug code for next time.
Bug: T178632
Change-Id: Id77d21d99748e94c064ef24c43ee0033de627b8e
2017-11-17 11:15:59 +00:00
|
|
|
$this->trace( "reparent <{$child->name}>" );
|
2017-02-17 04:10:15 +00:00
|
|
|
$child->parentId = $newParentId;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
$newParentNode->children = $children;
|
|
|
|
|
}
|
2021-12-21 03:26:38 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Helper function to match the Parser::TOC_PLACEHOLDER.
|
|
|
|
|
* Note that Parsoid's version of this placeholder might
|
|
|
|
|
* include additional attributes.
|
|
|
|
|
* @param Element $element
|
|
|
|
|
* @return bool If the given element is a Parser::TOC_PLACEHOLDER
|
|
|
|
|
*/
|
|
|
|
|
private function isTableOfContentsMarker( Element $element ): bool {
|
|
|
|
|
// Keep this in sync with Parser::TOC_PLACEHOLDER
|
|
|
|
|
return (
|
|
|
|
|
$element->htmlName === 'meta' &&
|
|
|
|
|
isset( $element->attrs['property'] ) &&
|
|
|
|
|
$element->attrs['property'] === 'mw:PageProp/toc'
|
|
|
|
|
);
|
|
|
|
|
}
|
2017-02-17 04:10:15 +00:00
|
|
|
}
|