wiki.techinc.nl/tests/parser/fuzzTest.php
Tim Starling 20d06b34bb Safer autoloading with respect to file-scope code
Many files were in the autoloader despite having potentially harmful
file-scope code.

* Exclude all CommandLineInc maintenance scripts from the autoloader.
* Introduce  "NO_AUTOLOAD" tag which excludes the file containing it
  from the autoloader. Use it on CommandLineInc.php and a few
  suspicious-looking files without classes in case they are refactored
  to add classes in the future.
* Add a test which parses all non-PSR4 class files and confirms that
  they do not contain dangerous file-scope code. It's slow (15s) but
  its results were enlightening.
* Several maintenance scripts define constants in the file scope,
  intending to modify the behaviour of MediaWiki. Either move the
  define() to a later setup function, or protect with NO_AUTOLOAD.
* Use require_once consistently with Maintenance.php and
  doMaintenance.php, per the original convention which is supposed to
  allow one maintenance script to use the class of another maintenance
  script. Using require breaks autoloading of these maintenance class
  files.
* When Maintenance.php is included, check if MediaWiki has already
  started, and if so, return early. Revert the fix for T250003 which
  is incompatible with this safety measure. Hopefully it was superseded
  by splitting out the class file.
* In runScript.php add a redundant PHP_SAPI check since it does some
  things in file-scope code before any other check will be run.
* Change the if(false) class_alias(...) to something more hackish and
  more compatible with the new test.
* Some site-related scripts found Maintenance.php in a non-standard way.
  Use the standard way.
* fileOpPerfTest.php called error_reporting(). Probably debugging code
  left in; removed.
* Moved mediawiki.compress.7z registration from the class file to the
  caller.

Change-Id: I1b1be90343a5ab678df6f1b1bdd03319dcf6537f
2021-01-11 11:59:36 +11:00

199 lines
4.9 KiB
PHP

<?php
use Wikimedia\ScopedCallback;
require_once __DIR__ . '/../../maintenance/Maintenance.php';
class ParserFuzzTest extends Maintenance {
private $parserTest;
private $maxFuzzTestLength = 300;
private $memoryLimit = 100;
private $seed;
public function __construct() {
parent::__construct();
$this->addDescription( 'Run a fuzz test on the parser, until it segfaults ' .
'or throws an exception' );
$this->addOption( 'file', 'Use the specified file as a dictionary, ' .
' or leave blank to use parserTests.txt', false, true, true );
$this->addOption( 'seed', 'Start the fuzz test from the specified seed', false, true );
}
public function finalSetup() {
// Make RequestContext::resetMain() happy
define( 'MW_PARSER_TEST', 1 );
self::requireTestsAutoloader();
TestSetup::applyInitialConfig();
}
public function execute() {
$files = $this->getOption( 'file', [ __DIR__ . '/parserTests.txt' ] );
$this->seed = intval( $this->getOption( 'seed', 1 ) ) - 1;
$this->parserTest = new ParserTestRunner(
new MultiTestRecorder,
[] );
$this->fuzzTest( $files );
}
/**
* Run a fuzz test series
* Draw input from a set of test files
* @param array $filenames
*/
public function fuzzTest( $filenames ) {
$dict = $this->getFuzzInput( $filenames );
$dictSize = strlen( $dict );
$logMaxLength = log( $this->maxFuzzTestLength );
$teardown = $this->parserTest->staticSetup();
$teardown = $this->parserTest->setupDatabase( $teardown );
$teardown = $this->parserTest->setupUploads( $teardown );
$fakeTest = [
'test' => '',
'desc' => '',
'input' => '',
'result' => '',
'options' => '',
'config' => ''
];
ini_set( 'memory_limit', $this->memoryLimit * 1048576 * 2 );
$numTotal = 0;
$numSuccess = 0;
$user = new User;
$opts = ParserOptions::newFromUser( $user );
$title = Title::makeTitle( NS_MAIN, 'Parser_test' );
while ( true ) {
// Generate test input
mt_srand( ++$this->seed );
$totalLength = mt_rand( 1, $this->maxFuzzTestLength );
$input = '';
while ( strlen( $input ) < $totalLength ) {
$logHairLength = mt_rand( 0, 1000000 ) / 1000000 * $logMaxLength;
$hairLength = min( intval( exp( $logHairLength ) ), $dictSize );
$offset = mt_rand( 0, $dictSize - $hairLength );
$input .= substr( $dict, $offset, $hairLength );
}
$perTestTeardown = $this->parserTest->perTestSetup( $fakeTest );
$parser = $this->parserTest->getParser();
// Run the test
try {
$parser->parse( $input, $title, $opts );
$fail = false;
} catch ( Exception $exception ) {
$fail = true;
}
if ( $fail ) {
echo "Test failed with seed {$this->seed}\n";
echo "Input:\n";
printf( "string(%d) \"%s\"\n\n", strlen( $input ), $input );
echo "$exception\n";
} else {
$numSuccess++;
}
$numTotal++;
ScopedCallback::consume( $perTestTeardown );
if ( $numTotal % 100 == 0 ) {
$usage = intval( memory_get_usage( true ) / $this->memoryLimit / 1048576 * 100 );
echo "{$this->seed}: $numSuccess/$numTotal (mem: $usage%)\n";
if ( $usage >= 100 ) {
echo "Out of memory:\n";
$memStats = $this->getMemoryBreakdown();
foreach ( $memStats as $name => $usage ) {
echo "$name: $usage\n";
}
return;
}
}
}
}
/**
* Get a memory usage breakdown
* @return array
*/
private function getMemoryBreakdown() {
$memStats = [];
foreach ( $GLOBALS as $name => $value ) {
$memStats['$' . $name] = $this->guessVarSize( $value );
}
$classes = get_declared_classes();
foreach ( $classes as $class ) {
$rc = new ReflectionClass( $class );
$props = $rc->getStaticProperties();
$memStats[$class] = $this->guessVarSize( $props );
$methods = $rc->getMethods();
foreach ( $methods as $method ) {
$memStats[$class] += $this->guessVarSize( $method->getStaticVariables() );
}
}
$functions = get_defined_functions();
foreach ( $functions['user'] as $function ) {
$rf = new ReflectionFunction( $function );
$memStats["$function()"] = $this->guessVarSize( $rf->getStaticVariables() );
}
asort( $memStats );
return $memStats;
}
/**
* Estimate the size of the input variable
*/
public function guessVarSize( $var ) {
$length = 0;
try {
Wikimedia\suppressWarnings();
$length = strlen( serialize( $var ) );
Wikimedia\restoreWarnings();
} catch ( Exception $e ) {
}
return $length;
}
/**
* Get an input dictionary from a set of parser test files
* @param array $filenames
* @return string
*/
public function getFuzzInput( $filenames ) {
$dict = '';
foreach ( $filenames as $filename ) {
$contents = file_get_contents( $filename );
preg_match_all(
'/!!\s*(input|wikitext)\n(.*?)\n!!\s*(result|html|html\/\*|html\/php)/s',
$contents,
$matches
);
foreach ( $matches[1] as $match ) {
$dict .= $match . "\n";
}
}
return $dict;
}
}
$maintClass = ParserFuzzTest::class;
require_once RUN_MAINTENANCE_IF_MAIN;