wiki.techinc.nl/includes/tidy/RemexDriver.php
C. Scott Ananian b1f53045d7 Bump wikimedia/remex-html to 2.3.2 and drop 2.3.1
This is a bug fix release of RemexHtml, required by the latest version
of Parsoid.

RemexHtml migrated to a new namespace in 2.3.2.  Since we don't
support aliases in our phan configuration in core, update all uses to
the new namespace to satisfy phan.

Depends-On: I30f01f4a2a5479bb82c9b952ffa68a478215828a
Depends-On: Iedf446635ee2112cfe637d8ebcf8092f0976bd17
Change-Id: I74fc929e4a66b28bfb1800ff0cd751c86e4a9f50
2021-08-08 18:07:29 -04:00

88 lines
2.5 KiB
PHP

<?php
namespace MediaWiki\Tidy;
use MediaWiki\Config\ServiceOptions;
use Wikimedia\RemexHtml\HTMLData;
use Wikimedia\RemexHtml\Serializer\Serializer;
use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
class RemexDriver extends TidyDriverBase {
private $treeMutationTrace;
private $serializerTrace;
private $mungerTrace;
private $pwrap;
/** @internal */
public const CONSTRUCTOR_OPTIONS = [
'TidyConfig',
];
/**
* @param ServiceOptions|array $options Passing an array is deprecated.
*/
public function __construct( $options ) {
if ( is_array( $options ) ) {
wfDeprecated( __METHOD__ . " with array argument", '1.36' );
$config = $options;
} else {
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
$config = $options->get( 'TidyConfig' );
}
$config += [
'treeMutationTrace' => false,
'serializerTrace' => false,
'mungerTrace' => false,
'pwrap' => true
];
$this->treeMutationTrace = $config['treeMutationTrace'];
$this->serializerTrace = $config['serializerTrace'];
$this->mungerTrace = $config['mungerTrace'];
$this->pwrap = $config['pwrap'];
parent::__construct( $config );
}
/** @inheritDoc */
public function tidy( $text, ?callable $textProcessor = null ) {
$traceCallback = static function ( $msg ) {
wfDebug( "RemexHtml: $msg" );
};
$formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
if ( $this->serializerTrace ) {
$serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
} else {
$serializer = new Serializer( $formatter );
}
if ( $this->pwrap ) {
$munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
} else {
$munger = $serializer;
}
if ( $this->treeMutationTrace ) {
$tracer = new TreeMutationTracer( $munger, $traceCallback );
} else {
$tracer = $munger;
}
$treeBuilder = new TreeBuilder( $tracer, [
'ignoreErrors' => true,
'ignoreNulls' => true,
] );
$dispatcher = new Dispatcher( $treeBuilder );
$tokenizer = new Tokenizer( $dispatcher, $text, [
'ignoreErrors' => true,
'ignoreCharRefs' => true,
'ignoreNulls' => true,
'skipPreprocess' => true,
] );
$tokenizer->execute( [
'fragmentNamespace' => HTMLData::NS_HTML,
'fragmentName' => 'body'
] );
return $serializer->getResult();
}
}