wiki.techinc.nl/includes/tidy/RemexCompatFormatter.php
Arlo Breault c44a3958a3 Don't apply French spacing in raw text elements
This also means we don't need to take special care for French spacing in
attributes, since it's no longer applied there.

Adds a test that captures this change.

Note that the test "Nowiki and french spacing" wonders whether this
escaping should be applied to nowiki content.

Bug: T255007
Change-Id: Ic8965e81882d7cf024bdced437f684064a30ac86
2021-02-16 19:26:29 -05:00

86 lines
2.2 KiB
PHP

<?php
namespace MediaWiki\Tidy;
use RemexHtml\HTMLData;
use RemexHtml\Serializer\HtmlFormatter;
use RemexHtml\Serializer\SerializerNode;
/**
* @internal
*/
class RemexCompatFormatter extends HtmlFormatter {
private static $markedEmptyElements = [
'li' => true,
'p' => true,
'tr' => true,
];
/* @var ?callable */
private $textProcessor;
public function __construct( $options = [] ) {
parent::__construct( $options );
$this->attributeEscapes["\u{00A0}"] = '&#160;';
unset( $this->attributeEscapes["&"] );
$this->textEscapes["\u{00A0}"] = '&#160;';
unset( $this->textEscapes["&"] );
$this->textProcessor = $options['textProcessor'] ?? null;
}
public function startDocument( $fragmentNamespace, $fragmentName ) {
return '';
}
public function characters( SerializerNode $parent, $text, $start, $length ) {
$text = parent::characters( $parent, $text, $start, $length );
if ( $parent->namespace !== HTMLData::NS_HTML
|| !isset( $this->rawTextElements[$parent->name] )
) {
if ( $this->textProcessor !== null ) {
$text = call_user_func( $this->textProcessor, $text );
}
}
return $text;
}
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
$data = $node->snData;
if ( $data && $data->isPWrapper ) {
if ( $data->nonblankNodeCount ) {
return "<p>$contents</p>";
} else {
return $contents;
}
}
$name = $node->name;
$attrs = $node->attrs;
if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0
&& strspn( $contents, "\t\n\f\r " ) === strlen( $contents )
) {
return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>";
}
$s = "<$name";
foreach ( $attrs->getValues() as $attrName => $attrValue ) {
$encValue = strtr( $attrValue, $this->attributeEscapes );
$s .= " $attrName=\"$encValue\"";
}
if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
$s .= ' />';
return $s;
}
$s .= '>';
if ( $node->namespace === HTMLData::NS_HTML
&& isset( $contents[0] ) && $contents[0] === "\n"
&& isset( $this->prefixLfElements[$name] )
) {
$s .= "\n$contents</$name>";
} else {
$s .= "$contents</$name>";
}
return $s;
}
}