Add Html5Depurate tidy driver

Also document input format for MWTidy::tidy().

Change-Id: I77071d3db0524695c2baf9a4670ca2455438c83d
This commit is contained in:
Tim Starling 2015-09-03 14:46:48 +10:00 committed by Ori.livneh
parent 7ef1e5fb30
commit e9d523b9bd
3 changed files with 51 additions and 1 deletions

View file

@ -760,6 +760,7 @@ $wgAutoloadLocalClasses = array(
'MediaWiki\\Logger\\Monolog\\WikiProcessor' => __DIR__ . '/includes/debug/logger/monolog/WikiProcessor.php',
'MediaWiki\\Logger\\NullSpi' => __DIR__ . '/includes/debug/logger/NullSpi.php',
'MediaWiki\\Logger\\Spi' => __DIR__ . '/includes/debug/logger/Spi.php',
'MediaWiki\\Tidy\\Html5Depurate' => __DIR__ . '/includes/tidy/Html5Depurate.php',
'MediaWiki\\Tidy\\RaggettBase' => __DIR__ . '/includes/tidy/RaggettBase.php',
'MediaWiki\\Tidy\\RaggettExternal' => __DIR__ . '/includes/tidy/RaggettExternal.php',
'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ . '/includes/tidy/RaggettInternalHHVM.php',

View file

@ -38,7 +38,8 @@ class MWTidy {
* If tidy isn't able to correct the markup, the original will be
* returned in all its glory with a warning comment appended.
*
* @param string $text Hideous HTML input
* @param string $text HTML input fragment. This should not contain a
* <body> or <html> tag.
* @return string Corrected HTML output
*/
public static function tidy( $text ) {
@ -110,6 +111,9 @@ class MWTidy {
case 'RaggettExternal':
self::$instance = new MediaWiki\Tidy\RaggettExternal( $config );
break;
case 'Html5Depurate':
self::$instance = new MediaWiki\Tidy\Html5Depurate( $config );
break;
default:
throw new MWException( "Invalid tidy driver: \"{$config['driver']}\"" );
}

View file

@ -0,0 +1,45 @@
<?php
namespace MediaWiki\Tidy;
use MWHttpRequest;
use Exception;
class Html5Depurate extends TidyDriverBase {
public function __construct( array $config ) {
parent::__construct( $config + array(
'url' => 'http://localhost:4339/document',
'timeout' => 10,
'connectTimeout' => 0.5,
) );
}
public function tidy( $text ) {
$wrappedtext = '<!DOCTYPE html><html>' .
'<body>' . $text . '</body></html>';
$req = MWHttpRequest::factory( $this->config['url'],
array(
'method' => 'POST',
'timeout' => $this->config['timeout'],
'connectTimeout' => $this->config['connectTimeout'],
'postData' => array(
'text' => $wrappedtext
)
) );
$status = $req->execute();
if ( !$status->isOK() ) {
throw new Exception( "Error contacting depurate service: " . $status->getWikiText() );
} elseif ( $req->getStatus() !== 200 ) {
throw new Exception( "Depurate returned error: " . $status->getWikiText() );
}
$result = $req->getContent();
$startBody = strpos( $result, "<body>" );
$endBody = strrpos( $result, "</body>" );
if ( $startBody !== false && $endBody !== false && $endBody > $startBody ) {
$startBody += strlen( "<body>" );
return substr( $result, $startBody, $endBody - $startBody );
} else {
return $text . "\n<!-- Html5Depurate returned an invalid result -->";
}
}
}