wiki.techinc.nl/tests/parser/ParserTestResultNormalizer.php
Tim Starling a387fee397 Split up testHelpers.inc, break off fuzz testing
* Split up testHelpers.inc into one class per file, with the file named
  after the class per the usual convention. Put them in tests/parser
  since they are all parser-related, even though a couple are reused by
  other unit tests.
* Also rename parserTest.inc and parserTestsParserHook.php to follow the
  usual convention, and split off ParserTestResultNormalizer
* Move fuzz testing out to its own maintenance script. It's really not
  helpful to have fuzz testing, which is designed to run forever,
  exposed as a PHPUnit test.
* Increased fuzz test memory limit, and increased the memory headroom for
  getMemoryBreakdown(), since HHVM's ReflectionClass has an internal
  cache which uses quite a lot of memory.
* Temporarily switched a couple of ParserTest methods from private to
  public to support fuzz testing from a separate class -- I plan on
  replacing this interface in a subsequent commit.

Change-Id: Ib1a07e109ec1005bff2751b78eb4de35f2dfc472
2016-09-08 09:06:45 +10:00

87 lines
2.5 KiB
PHP

<?php
/**
* @file
* @ingroup Testing
*/
class ParserTestResultNormalizer {
protected $doc, $xpath, $invalid;
public static function normalize( $text, $funcs ) {
$norm = new self( $text );
if ( $norm->invalid ) {
return $text;
}
foreach ( $funcs as $func ) {
$norm->$func();
}
return $norm->serialize();
}
protected function __construct( $text ) {
$this->doc = new DOMDocument( '1.0', 'utf-8' );
// Note: parsing a supposedly XHTML document with an XML parser is not
// guaranteed to give accurate results. For example, it may introduce
// differences in the number of line breaks in <pre> tags.
MediaWiki\suppressWarnings();
if ( !$this->doc->loadXML( '<html><body>' . $text . '</body></html>' ) ) {
$this->invalid = true;
}
MediaWiki\restoreWarnings();
$this->xpath = new DOMXPath( $this->doc );
$this->body = $this->xpath->query( '//body' )->item( 0 );
}
protected function removeTbody() {
foreach ( $this->xpath->query( '//tbody' ) as $tbody ) {
while ( $tbody->firstChild ) {
$child = $tbody->firstChild;
$tbody->removeChild( $child );
$tbody->parentNode->insertBefore( $child, $tbody );
}
$tbody->parentNode->removeChild( $tbody );
}
}
/**
* The point of this function is to produce a normalized DOM in which
* Tidy's output matches the output of html5depurate. Tidy both trims
* and pretty-prints, so this requires fairly aggressive treatment.
*
* In particular, note that Tidy converts <pre>x</pre> to <pre>\nx\n</pre>,
* which theoretically affects display since the second line break is not
* ignored by compliant HTML parsers.
*
* This function also removes empty elements, as does Tidy.
*/
protected function trimWhitespace() {
foreach ( $this->xpath->query( '//text()' ) as $child ) {
if ( strtolower( $child->parentNode->nodeName ) === 'pre' ) {
// Just trim one line break from the start and end
if ( substr_compare( $child->data, "\n", 0 ) === 0 ) {
$child->data = substr( $child->data, 1 );
}
if ( substr_compare( $child->data, "\n", -1 ) === 0 ) {
$child->data = substr( $child->data, 0, -1 );
}
} else {
// Trim all whitespace
$child->data = trim( $child->data );
}
if ( $child->data === '' ) {
$child->parentNode->removeChild( $child );
}
}
}
/**
* Serialize the XML DOM for comparison purposes. This does not generate HTML.
*/
protected function serialize() {
return strtr( $this->doc->saveXML( $this->body ),
[ '<body>' => '', '</body>' => '' ] );
}
}