This is a bug fix release of RemexHtml, required by the latest version of Parsoid. RemexHtml migrated to a new namespace in 2.3.2. Since we don't support aliases in our phan configuration in core, update all uses to the new namespace to satisfy phan. Depends-On: I30f01f4a2a5479bb82c9b952ffa68a478215828a Depends-On: Iedf446635ee2112cfe637d8ebcf8092f0976bd17 Change-Id: I74fc929e4a66b28bfb1800ff0cd751c86e4a9f50
88 lines
2.5 KiB
PHP
88 lines
2.5 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Tidy;
|
|
|
|
use MediaWiki\Config\ServiceOptions;
|
|
use Wikimedia\RemexHtml\HTMLData;
|
|
use Wikimedia\RemexHtml\Serializer\Serializer;
|
|
use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
|
|
use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
|
|
use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
|
|
use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
|
|
use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
|
|
|
|
class RemexDriver extends TidyDriverBase {
|
|
private $treeMutationTrace;
|
|
private $serializerTrace;
|
|
private $mungerTrace;
|
|
private $pwrap;
|
|
|
|
/** @internal */
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
'TidyConfig',
|
|
];
|
|
|
|
/**
|
|
* @param ServiceOptions|array $options Passing an array is deprecated.
|
|
*/
|
|
public function __construct( $options ) {
|
|
if ( is_array( $options ) ) {
|
|
wfDeprecated( __METHOD__ . " with array argument", '1.36' );
|
|
$config = $options;
|
|
} else {
|
|
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
|
$config = $options->get( 'TidyConfig' );
|
|
}
|
|
$config += [
|
|
'treeMutationTrace' => false,
|
|
'serializerTrace' => false,
|
|
'mungerTrace' => false,
|
|
'pwrap' => true
|
|
];
|
|
$this->treeMutationTrace = $config['treeMutationTrace'];
|
|
$this->serializerTrace = $config['serializerTrace'];
|
|
$this->mungerTrace = $config['mungerTrace'];
|
|
$this->pwrap = $config['pwrap'];
|
|
parent::__construct( $config );
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function tidy( $text, ?callable $textProcessor = null ) {
|
|
$traceCallback = static function ( $msg ) {
|
|
wfDebug( "RemexHtml: $msg" );
|
|
};
|
|
$formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
|
|
if ( $this->serializerTrace ) {
|
|
$serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
|
|
} else {
|
|
$serializer = new Serializer( $formatter );
|
|
}
|
|
if ( $this->pwrap ) {
|
|
$munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
|
|
} else {
|
|
$munger = $serializer;
|
|
}
|
|
if ( $this->treeMutationTrace ) {
|
|
$tracer = new TreeMutationTracer( $munger, $traceCallback );
|
|
} else {
|
|
$tracer = $munger;
|
|
}
|
|
$treeBuilder = new TreeBuilder( $tracer, [
|
|
'ignoreErrors' => true,
|
|
'ignoreNulls' => true,
|
|
] );
|
|
$dispatcher = new Dispatcher( $treeBuilder );
|
|
$tokenizer = new Tokenizer( $dispatcher, $text, [
|
|
'ignoreErrors' => true,
|
|
'ignoreCharRefs' => true,
|
|
'ignoreNulls' => true,
|
|
'skipPreprocess' => true,
|
|
] );
|
|
|
|
$tokenizer->execute( [
|
|
'fragmentNamespace' => HTMLData::NS_HTML,
|
|
'fragmentName' => 'body'
|
|
] );
|
|
return $serializer->getResult();
|
|
}
|
|
}
|