wiki.techinc.nl/includes/tidy/RemexCompatMunger.php
Thiemo Mättig 91a920fd85 Remove auto-generated "Constructor" documentation on constructors
Having such comments is worse than not having them. They add zero
information. But you must read the text to understand there is
nothing you don't already know from the class and the method name.

This is similar to I994d11e. Even more trivial, because this here is
about comments that don't say anything but "constructor".

Change-Id: I474dcdb5997bea3aafd11c0760ee072dfaff124c
2017-07-21 12:19:30 +02:00

472 lines
15 KiB
PHP

<?php
namespace MediaWiki\Tidy;
use RemexHtml\HTMLData;
use RemexHtml\Serializer\Serializer;
use RemexHtml\Serializer\SerializerNode;
use RemexHtml\Tokenizer\Attributes;
use RemexHtml\Tokenizer\PlainAttributes;
use RemexHtml\TreeBuilder\TreeBuilder;
use RemexHtml\TreeBuilder\TreeHandler;
use RemexHtml\TreeBuilder\Element;
/**
* @internal
*/
class RemexCompatMunger implements TreeHandler {
private static $onlyInlineElements = [
"a" => true,
"abbr" => true,
"acronym" => true,
"applet" => true,
"b" => true,
"basefont" => true,
"bdo" => true,
"big" => true,
"br" => true,
"button" => true,
"cite" => true,
"code" => true,
"dfn" => true,
"em" => true,
"font" => true,
"i" => true,
"iframe" => true,
"img" => true,
"input" => true,
"kbd" => true,
"label" => true,
"legend" => true,
"map" => true,
"object" => true,
"param" => true,
"q" => true,
"rb" => true,
"rbc" => true,
"rp" => true,
"rt" => true,
"rtc" => true,
"ruby" => true,
"s" => true,
"samp" => true,
"select" => true,
"small" => true,
"span" => true,
"strike" => true,
"strong" => true,
"sub" => true,
"sup" => true,
"textarea" => true,
"tt" => true,
"u" => true,
"var" => true,
];
private static $formattingElements = [
'a' => true,
'b' => true,
'big' => true,
'code' => true,
'em' => true,
'font' => true,
'i' => true,
'nobr' => true,
's' => true,
'small' => true,
'strike' => true,
'strong' => true,
'tt' => true,
'u' => true,
];
/**
* @param Serializer $serializer
*/
public function __construct( Serializer $serializer ) {
$this->serializer = $serializer;
}
public function startDocument( $fragmentNamespace, $fragmentName ) {
$this->serializer->startDocument( $fragmentNamespace, $fragmentName );
$root = $this->serializer->getRootNode();
$root->snData = new RemexMungerData;
$root->snData->needsPWrapping = true;
}
public function endDocument( $pos ) {
$this->serializer->endDocument( $pos );
}
private function getParentForInsert( $preposition, $refElement ) {
if ( $preposition === TreeBuilder::ROOT ) {
return [ $this->serializer->getRootNode(), null ];
} elseif ( $preposition === TreeBuilder::BEFORE ) {
$refNode = $refElement->userData;
return [ $this->serializer->getParentNode( $refNode ), $refNode ];
} else {
$refNode = $refElement->userData;
$refData = $refNode->snData;
if ( $refData->currentCloneElement ) {
// Follow a chain of clone links if necessary
$origRefData = $refData;
while ( $refData->currentCloneElement ) {
$refElement = $refData->currentCloneElement;
$refNode = $refElement->userData;
$refData = $refNode->snData;
}
// Cache the end of the chain in the requested element
$origRefData->currentCloneElement = $refElement;
} elseif ( $refData->childPElement ) {
$refElement = $refData->childPElement;
$refNode = $refElement->userData;
}
return [ $refNode, $refNode ];
}
}
/**
* Insert a p-wrapper
*
* @param SerializerNode $parent
* @param integer $sourceStart
* @return SerializerNode
*/
private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
$pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
$this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
$sourceStart, 0 );
$data = new RemexMungerData;
$data->isPWrapper = true;
$data->wrapBaseNode = $parent;
$pWrap->userData->snData = $data;
$parent->snData->childPElement = $pWrap;
return $pWrap->userData;
}
public function characters( $preposition, $refElement, $text, $start, $length,
$sourceStart, $sourceLength
) {
$isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
$parentData = $parent->snData;
if ( $preposition === TreeBuilder::UNDER ) {
if ( $parentData->needsPWrapping && !$isBlank ) {
// Add a p-wrapper for bare text under body/blockquote
$refNode = $this->insertPWrapper( $refNode, $sourceStart );
$parent = $refNode;
$parentData = $parent->snData;
} elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
// The parent is splittable and in block mode, so split the tag stack
$refNode = $this->splitTagStack( $refNode, true, $sourceStart );
$parent = $refNode;
$parentData = $parent->snData;
}
}
if ( !$isBlank ) {
// Non-whitespace characters detected
$parentData->nonblankNodeCount++;
}
$this->serializer->characters( $preposition, $refNode, $text, $start,
$length, $sourceStart, $sourceLength );
}
/**
* Insert or reparent an element. Create p-wrappers or split the tag stack
* as necessary.
*
* Consider the following insertion locations. The parent may be:
*
* - A: A body or blockquote (!!needsPWrapping)
* - B: A p-wrapper (!!isPWrapper)
* - C: A descendant of a p-wrapper (!!ancestorPNode)
* - CS: With splittable formatting elements in the stack region up to
* the p-wrapper
* - CU: With one or more unsplittable elements in the stack region up
* to the p-wrapper
* - D: Not a descendant of a p-wrapper (!ancestorNode)
* - DS: With splittable formatting elements in the stack region up to
* the body or blockquote
* - DU: With one or more unsplittable elements in the stack region up
* to the body or blockquote
*
* And consider that we may insert two types of element:
* - b: block
* - i: inline
*
* We handle the insertion as follows:
*
* - A/i: Create a p-wrapper, insert under it
* - A/b: Insert as normal
* - B/i: Insert as normal
* - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
* base) instead)
* - C/i: Insert as normal
* - CS/b: Split the tag stack, insert the block under cloned formatting
* elements which have the wrap base (the parent of the p-wrap) as
* their ultimate parent.
* - CU/b: Disable the p-wrap, by reparenting the currently open child
* of the p-wrap under the p-wrap's parent. Then insert the block as
* normal.
* - D/b: Insert as normal
* - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
* parent of the formatting elements thus cloned. The parent of the
* p-wrapper is the body or blockquote.
* - DU/i: Insert as normal
*
* FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
* normal, the full algorithm is not followed.
*
* @param integer $preposition
* @param Element|SerializerNode|null $refElement
* @param Element $element
* @param bool $void
* @param integer $sourceStart
* @param integer $sourceLength
*/
public function insertElement( $preposition, $refElement, Element $element, $void,
$sourceStart, $sourceLength
) {
list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
$parentData = $parent->snData;
$parentNs = $parent->namespace;
$parentName = $parent->name;
$elementName = $element->htmlName;
$inline = isset( self::$onlyInlineElements[$elementName] );
$under = $preposition === TreeBuilder::UNDER;
if ( $under && $parentData->isPWrapper && !$inline ) {
// [B/b] The element is non-inline and the parent is a p-wrapper,
// close the parent and insert into its parent instead
$newParent = $this->serializer->getParentNode( $parent );
$parent = $newParent;
$parentData = $parent->snData;
$pElement = $parentData->childPElement;
$parentData->childPElement = null;
$newRef = $refElement->userData;
$this->endTag( $pElement, $sourceStart, 0 );
} elseif ( $under && $parentData->isSplittable
&& (bool)$parentData->ancestorPNode !== $inline
) {
// [CS/b, DS/i] The parent is splittable and the current element is
// inline in block context, or if the current element is a block
// under a p-wrapper, split the tag stack.
$newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
$parent = $newRef;
$parentData = $parent->snData;
} elseif ( $under && $parentData->needsPWrapping && $inline ) {
// [A/i] If the element is inline and we are in body/blockquote,
// we need to create a p-wrapper
$newRef = $this->insertPWrapper( $newRef, $sourceStart );
$parent = $newRef;
$parentData = $parent->snData;
} elseif ( $parentData->ancestorPNode && !$inline ) {
// [CU/b] If the element is non-inline and (despite attempting to
// split above) there is still an ancestor p-wrap, disable that
// p-wrap
$this->disablePWrapper( $parent, $sourceStart );
}
// else [A/b, B/i, C/i, D/b, DU/i] insert as normal
// An element with element children is a non-blank element
$parentData->nonblankNodeCount++;
// Insert the element downstream and so initialise its userData
$this->serializer->insertElement( $preposition, $newRef,
$element, $void, $sourceStart, $sourceLength );
// Initialise snData
if ( !$element->userData->snData ) {
$elementData = $element->userData->snData = new RemexMungerData;
} else {
$elementData = $element->userData->snData;
}
if ( ( $parentData->isPWrapper || $parentData->isSplittable )
&& isset( self::$formattingElements[$elementName] )
) {
$elementData->isSplittable = true;
}
if ( $parentData->isPWrapper ) {
$elementData->ancestorPNode = $parent;
} elseif ( $parentData->ancestorPNode ) {
$elementData->ancestorPNode = $parentData->ancestorPNode;
}
if ( $parentData->wrapBaseNode ) {
$elementData->wrapBaseNode = $parentData->wrapBaseNode;
} elseif ( $parentData->needsPWrapping ) {
$elementData->wrapBaseNode = $parent;
}
if ( $elementName === 'body'
|| $elementName === 'blockquote'
|| $elementName === 'html'
) {
$elementData->needsPWrapping = true;
}
}
/**
* Clone nodes in a stack range and return the new parent
*
* @param SerializerNode $parentNode
* @param bool $inline
* @param integer $pos The source position
* @return SerializerNode
*/
private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
$parentData = $parentNode->snData;
$wrapBase = $parentData->wrapBaseNode;
$pWrap = $parentData->ancestorPNode;
if ( !$pWrap ) {
$cloneEnd = $wrapBase;
} else {
$cloneEnd = $parentData->ancestorPNode;
}
$serializer = $this->serializer;
$node = $parentNode;
$root = $serializer->getRootNode();
$nodes = [];
$removableNodes = [];
$haveContent = false;
while ( $node !== $cloneEnd ) {
$nextParent = $serializer->getParentNode( $node );
if ( $nextParent === $root ) {
throw new \Exception( 'Did not find end of clone range' );
}
$nodes[] = $node;
if ( $node->snData->nonblankNodeCount === 0 ) {
$removableNodes[] = $node;
$nextParent->snData->nonblankNodeCount--;
}
$node = $nextParent;
}
if ( $inline ) {
$pWrap = $this->insertPWrapper( $wrapBase, $pos );
$node = $pWrap;
} else {
if ( $pWrap ) {
// End the p-wrap which was open, cancel the diversion
$wrapBase->snData->childPElement = null;
}
$pWrap = null;
$node = $wrapBase;
}
for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
$oldNode = $nodes[$i];
$oldData = $oldNode->snData;
$nodeParent = $node;
$element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
$this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
$element, false, $pos, 0 );
$oldData->currentCloneElement = $element;
$newNode = $element->userData;
$newData = $newNode->snData = new RemexMungerData;
if ( $pWrap ) {
$newData->ancestorPNode = $pWrap;
}
$newData->isSplittable = true;
$newData->wrapBaseNode = $wrapBase;
$newData->isPWrapper = $oldData->isPWrapper;
$nodeParent->snData->nonblankNodeCount++;
$node = $newNode;
}
foreach ( $removableNodes as $rNode ) {
$fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
$fakeElement->userData = $rNode;
$this->serializer->removeNode( $fakeElement, $pos );
}
return $node;
}
/**
* Find the ancestor of $node which is a child of a p-wrapper, and
* reparent that node so that it is placed after the end of the p-wrapper
*/
private function disablePWrapper( SerializerNode $node, $sourceStart ) {
$nodeData = $node->snData;
$pWrapNode = $nodeData->ancestorPNode;
$newParent = $this->serializer->getParentNode( $pWrapNode );
if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
// Fostering or something? Abort!
return;
}
$nextParent = $node;
do {
$victim = $nextParent;
$victim->snData->ancestorPNode = null;
$nextParent = $this->serializer->getParentNode( $victim );
} while ( $nextParent !== $pWrapNode );
// Make a fake Element to use in a reparenting operation
$victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
$victimElement->userData = $victim;
// Reparent
$this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
false, $sourceStart, 0 );
// Decrement nonblank node count
$pWrapNode->snData->nonblankNodeCount--;
// Cancel the diversion so that no more elements are inserted under this p-wrap
$newParent->snData->childPElement = null;
}
public function endTag( Element $element, $sourceStart, $sourceLength ) {
$data = $element->userData->snData;
if ( $data->childPElement ) {
$this->endTag( $data->childPElement, $sourceStart, 0 );
}
$this->serializer->endTag( $element, $sourceStart, $sourceLength );
$element->userData->snData = null;
$element->userData = null;
}
public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
$this->serializer->doctype( $name, $public, $system, $quirks,
$sourceStart, $sourceLength );
}
public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
$this->serializer->comment( $preposition, $refNode, $text,
$sourceStart, $sourceLength );
}
public function error( $text, $pos ) {
$this->serializer->error( $text, $pos );
}
public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
$this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
}
public function removeNode( Element $element, $sourceStart ) {
$this->serializer->removeNode( $element, $sourceStart );
}
public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
$self = $element->userData;
$children = $self->children;
$self->children = [];
$this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
$newParentNode = $newParent->userData;
$newParentId = $newParentNode->id;
foreach ( $children as $child ) {
if ( is_object( $child ) ) {
$child->parentId = $newParentId;
}
}
$newParentNode->children = $children;
}
}