* Revert back to my parser pseudo-branch again. Note: if you feel like reverting this, reverting Parser.php alone to r40010 will work just fine.

* Merged replaceFreeExternalLinks() with doMagicLinks(). Makes a lot of sense, very similar operations, doesn't break any parser tests. Stops free links from interacting with other parser stages, the same way ISBN links don't. 
* The pass order change fixes Brion's complaint in r39980. Early link expansion, triggered by having more than 1000 links in the page, was outputting URLs which were destroyed by RFEL. Added parser test.
* Fixed an unrelated bug in LinkHolderArray::replace(): if a link to a redirect appears in two separate RLH calls, the second and subsequent calls do not add the mw-redirect class. Caused by an unmigrated LinkCache fetch.
* Added a parser test for a pass interaction bug that the pass order change fixes.
* The fuzzer told me to tell you that free external links in non-caption image parameters, which are and have always been invisible, are now not registered either.
* Miscellaneous supporting updates to the test infrastructure.
This commit is contained in:
Tim Starling 2008-08-26 14:37:15 +00:00
parent b88b852ad5
commit 350b498b9f
14 changed files with 655 additions and 742 deletions

View file

@ -94,6 +94,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
* HTML entities like   now work (are not escaped) in edit summaries.
* (bug 13815) In the comment for page moves, use the colon-separator message
instead of a hardcoded colon.
* Allow <gallery> to accept image names without an Image: prefix
=== Bug fixes in 1.14 ===

View file

@ -3335,6 +3335,12 @@ $wgParserConf = array(
#'preprocessorClass' => 'Preprocessor_Hash',
);
/**
* LinkHolderArray batch size
* For debugging
*/
$wgLinkHolderBatchSize = 1000;
/**
* Hooks that are used for outputting exceptions. Format is:
* $wgExceptionHooks[] = $funcname

View file

@ -83,7 +83,7 @@ class MWException extends Exception {
function getHTML() {
global $wgShowExceptionDetails;
if( $wgShowExceptionDetails ) {
return '<p>' . htmlspecialchars( $this->getMessage() ) .
return '<p>' . nl2br( htmlspecialchars( $this->getMessage() ) ) .
'</p><p>Backtrace:</p><p>' . nl2br( htmlspecialchars( $this->getTraceAsString() ) ) .
"</p>\n";
} else {

View file

@ -649,12 +649,18 @@ class MessageCache {
return $message;
}
global $wgParser;
global $wgParser, $wgParserConf;
if ( !$this->mParser && isset( $wgParser ) ) {
# Do some initialisation so that we don't have to do it twice
$wgParser->firstCallInit();
# Clone it and store it
$this->mParser = clone $wgParser;
$class = $wgParserConf['class'];
if ( $class == 'Parser_DiffTest' ) {
# Uncloneable
$this->mParser = new $class( $wgParserConf );
} else {
$this->mParser = clone $wgParser;
}
#wfDebug( __METHOD__ . ": following contents triggered transform: $message\n" );
}
if ( $this->mParser ) {

View file

@ -410,6 +410,12 @@ class Title {
global $wgInterwikiCache, $wgContLang;
$fname = 'Title::getInterwikiLink';
if ( count( Title::$interwikiCache ) >= self::CACHE_MAX ) {
// Don't use infinite memory
reset( Title::$interwikiCache );
unset( Title::$interwikiCache[ key( Title::$interwikiCache ) ] );
}
$key = $wgContLang->lc( $key );
$k = wfMemcKey( 'interwiki', $key );

View file

@ -1,8 +1,6 @@
<?php
class LinkHolderArray {
var $batchSize = 1000;
var $internals = array(), $interwikis = array();
var $size = 0;
var $parent;
@ -11,6 +9,15 @@ class LinkHolderArray {
$this->parent = $parent;
}
/**
* Reduce memory usage to reduce the impact of circular references
*/
function __destruct() {
foreach ( $this as $name => $value ) {
unset( $this->$name );
}
}
/**
* Merge another LinkHolderArray into this one
*/
@ -30,7 +37,8 @@ class LinkHolderArray {
* Returns true if the memory requirements of this object are getting large
*/
function isBig() {
return $this->size > $this->batchSize;
global $wgLinkHolderBatchSize;
return $this->size > $wgLinkHolderBatchSize;
}
/**
@ -145,7 +153,7 @@ class LinkHolderArray {
if ( $title->isAlwaysKnown() ) {
$colours[$pdbk] = '';
} elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
$colours[$pdbk] = '';
$colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
$output->addLink( $title, $id );
} elseif ( $linkCache->isBadLink( $pdbk ) ) {
$colours[$pdbk] = 'new';
@ -180,6 +188,9 @@ class LinkHolderArray {
$pdbk = $title->getPrefixedDBkey();
$linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
$output->addLink( $title, $s->page_id );
# FIXME: convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
$colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
//add id to the extension todolist
$linkcolour_ids[$s->page_id] = $pdbk;
@ -274,6 +285,9 @@ class LinkHolderArray {
$entry['pdbk'] = $varPdbk;
// set pdbk and colour
# FIXME: convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
$colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
$linkcolour_ids[$s->page_id] = $pdbk;
}

File diff suppressed because it is too large Load diff

View file

@ -69,9 +69,22 @@ class Parser_DiffTest
$lastResult = $currentResult;
}
if ( $mismatch ) {
throw new MWException( "Parser_DiffTest: results mismatch on call to $name\n" .
'Arguments: ' . $this->formatArray( $args ) . "\n" .
'Results: ' . $this->formatArray( $results ) . "\n" );
if ( count( $results ) == 2 ) {
$resultsList = array();
foreach ( $this->parsers as $i => $parser ) {
$resultsList[] = var_export( $results[$i], true );
}
$diff = wfDiff( $resultsList[0], $resultsList[1] );
} else {
$diff = '[too many parsers]';
}
$msg = "Parser_DiffTest: results mismatch on call to $name\n";
if ( !$this->shortOutput ) {
$msg .= 'Arguments: ' . $this->formatArray( $args ) . "\n";
}
$msg .= 'Results: ' . $this->formatArray( $results ) . "\n" .
"Diff: $diff\n";
throw new MWException( $msg );
}
return $lastResult;
}

View file

@ -176,6 +176,15 @@ class Language {
}
}
/**
* Reduce memory usage
*/
function __destruct() {
foreach ( $this as $name => $value ) {
unset( $this->$name );
}
}
/**
* Hook which will be called if this is the content language.
* Descendants can use this to register hook functions or modify globals

View file

@ -435,8 +435,9 @@ class LanguageConverter {
if ($isTitle) return $this->convertTitle($text);
$plang = $this->getPreferredVariant();
$tarray = explode($this->mMarkup['end'], $text);
$tarray = StringUtils::explode($this->mMarkup['end'], $text);
$text = '';
$lastDelim = false;
foreach($tarray as $txt) {
$marked = explode($this->mMarkup['begin'], $txt, 2);
@ -452,8 +453,17 @@ class LanguageConverter {
$text .= $crule->getDisplay();
$this->applyManualConv($crule);
$lastDelim = false;
} else {
// Reinsert the }- which wasn't part of anything
$text .= $this->mMarkup['end'];
$lastDelim = true;
}
}
if ( $lastDelim ) {
// Remove the last delimiter (wasn't real)
$text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) );
}
return $text;
}

View file

@ -26,7 +26,7 @@
/** */
$options = array( 'quick', 'color', 'quiet', 'help', 'show-output', 'record' );
$optionsWithArgs = array( 'regex' );
$optionsWithArgs = array( 'regex', 'seed' );
require_once( 'commandLine.inc' );
require_once( "$IP/maintenance/parserTestsParserHook.php" );
@ -62,6 +62,10 @@ class ParserTest {
*/
private $oldTablePrefix;
private $maxFuzzTestLength = 300;
private $fuzzSeed = 0;
private $memoryLimit = 50;
/**
* Sets terminal colorization and diff/quick modes depending on OS and
* command-line options (--color and --quick).
@ -117,6 +121,10 @@ class ParserTest {
}
$this->keepUploads = isset( $options['keep-uploads'] );
if ( isset( $options['seed'] ) ) {
$this->fuzzSeed = intval( $options['seed'] ) - 1;
}
$this->hooks = array();
$this->functionHooks = array();
}
@ -133,6 +141,119 @@ class ParserTest {
}
}
/**
* Run a fuzz test series
* Draw input from a set of test files
*/
function fuzzTest( $filenames ) {
$dict = $this->getFuzzInput( $filenames );
$dictSize = strlen( $dict );
$logMaxLength = log( $this->maxFuzzTestLength );
$this->setupDatabase();
ini_set( 'memory_limit', $this->memoryLimit * 1048576 );
$numTotal = 0;
$numSuccess = 0;
$user = new User;
$opts = ParserOptions::newFromUser( $user );
$title = Title::makeTitle( NS_MAIN, 'Parser_test' );
while ( true ) {
// Generate test input
mt_srand( ++$this->fuzzSeed );
$totalLength = mt_rand( 1, $this->maxFuzzTestLength );
$input = '';
while ( strlen( $input ) < $totalLength ) {
$logHairLength = mt_rand( 0, 1000000 ) / 1000000 * $logMaxLength;
$hairLength = min( intval( exp( $logHairLength ) ), $dictSize );
$offset = mt_rand( 0, $dictSize - $hairLength );
$input .= substr( $dict, $offset, $hairLength );
}
$this->setupGlobals();
$parser = $this->getParser();
// Run the test
try {
$parser->parse( $input, $title, $opts );
$fail = false;
} catch ( Exception $exception ) {
$fail = true;
}
if ( $fail ) {
echo "Test failed with seed {$this->fuzzSeed}\n";
echo "Input:\n";
var_dump( $input );
echo "\n\n";
echo "$exception\n";
} else {
$numSuccess++;
}
$numTotal++;
$this->teardownGlobals();
$parser->__destruct();
if ( $numTotal % 100 == 0 ) {
$usage = intval( memory_get_usage( true ) / $this->memoryLimit / 1048576 * 100 );
echo "{$this->fuzzSeed}: $numSuccess/$numTotal (mem: $usage%)\n";
if ( $usage > 90 ) {
echo "Out of memory:\n";
$memStats = $this->getMemoryBreakdown();
foreach ( $memStats as $name => $usage ) {
echo "$name: $usage\n";
}
$this->abort();
}
}
}
}
/**
* Get an input dictionary from a set of parser test files
*/
function getFuzzInput( $filenames ) {
$dict = '';
foreach( $filenames as $filename ) {
$contents = file_get_contents( $filename );
preg_match_all( '/!!\s*input\n(.*?)\n!!\s*result/s', $contents, $matches );
foreach ( $matches[1] as $match ) {
$dict .= $match . "\n";
}
}
return $dict;
}
/**
* Get a memory usage breakdown
*/
function getMemoryBreakdown() {
$memStats = array();
foreach ( $GLOBALS as $name => $value ) {
$memStats['$'.$name] = strlen( serialize( $value ) );
}
$classes = get_declared_classes();
foreach ( $classes as $class ) {
$rc = new ReflectionClass( $class );
$props = $rc->getStaticProperties();
$memStats[$class] = strlen( serialize( $props ) );
$methods = $rc->getMethods();
foreach ( $methods as $method ) {
$memStats[$class] += strlen( serialize( $method->getStaticVariables() ) );
}
}
$functions = get_defined_functions();
foreach ( $functions['user'] as $function ) {
$rf = new ReflectionFunction( $function );
$memStats["$function()"] = strlen( serialize( $rf->getStaticVariables() ) );
}
asort( $memStats );
return $memStats;
}
function abort() {
$this->abort();
}
/**
* Run a series of tests listed in the given text files.
* Each test consists of a brief description, wikitext input,
@ -266,6 +387,24 @@ class ParserTest {
return $ok;
}
/**
* Get a Parser object
*/
function getParser() {
global $wgParserConf;
$class = $wgParserConf['class'];
$parser = new $class( $wgParserConf );
foreach( $this->hooks as $tag => $callback ) {
$parser->setHook( $tag, $callback );
}
foreach( $this->functionHooks as $tag => $bits ) {
list( $callback, $flags ) = $bits;
$parser->setFunctionHook( $tag, $callback, $flags );
}
wfRunHooks( 'ParserTestParser', array( &$parser ) );
return $parser;
}
/**
* Run a given wikitext input through a freshly-constructed wiki parser,
* and compare the output against the expected results.
@ -276,7 +415,6 @@ class ParserTest {
* @return bool
*/
private function runTest( $desc, $input, $result, $opts ) {
global $wgParserConf;
if( $this->showProgress ) {
$this->showTesting( $desc );
}
@ -300,18 +438,7 @@ class ParserTest {
}
$noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts );
$class = $wgParserConf['class'];
$parser = new $class( $wgParserConf );
foreach( $this->hooks as $tag => $callback ) {
$parser->setHook( $tag, $callback );
}
foreach( $this->functionHooks as $tag => $bits ) {
list( $callback, $flags ) = $bits;
$parser->setFunctionHook( $tag, $callback, $flags );
}
wfRunHooks( 'ParserTestParser', array( &$parser ) );
$parser = $this->getParser();
$title =& Title::makeTitle( NS_MAIN, $titleText );
$matches = array();
@ -387,6 +514,8 @@ class ParserTest {
self::getOptionValue( '/variant=([a-z]+(?:-[a-z]+)?)/', $opts, false );
$maxtoclevel =
self::getOptionValue( '/wgMaxTocLevel=(\d+)/', $opts, 999 );
$linkHolderBatchSize =
self::getOptionValue( '/wgLinkHolderBatchSize=(\d+)/', $opts, 1000 );
$settings = array(
'wgServer' => 'http://localhost',
@ -432,6 +561,7 @@ class ParserTest {
) ),
'wgDefaultExternalStore' => array(),
'wgForeignFileRepos' => array(),
'wgLinkHolderBatchSize' => $linkHolderBatchSize,
);
$this->savedGlobals = array();
foreach( $settings as $var => $val ) {
@ -441,6 +571,7 @@ class ParserTest {
$langObj = Language::factory( $lang );
$GLOBALS['wgLang'] = $langObj;
$GLOBALS['wgContLang'] = $langObj;
$GLOBALS['wgMemc'] = new FakeMemCachedClient;
//$GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] );
@ -551,10 +682,10 @@ class ParserTest {
# Hack: insert a few Wikipedia in-project interwiki prefixes,
# for testing inter-language links
$db->insert( 'interwiki', array(
array( 'iw_prefix' => 'Wikipedia',
array( 'iw_prefix' => 'wikipedia',
'iw_url' => 'http://en.wikipedia.org/wiki/$1',
'iw_local' => 0 ),
array( 'iw_prefix' => 'MeatBall',
array( 'iw_prefix' => 'meatball',
'iw_url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1',
'iw_local' => 0 ),
array( 'iw_prefix' => 'zh',
@ -621,11 +752,12 @@ class ParserTest {
return;
}
/*
$tables = $this->listTables();
$db = wfGetDB( DB_MASTER );
foreach ( $tables as $table ) {
$db->query( "DROP TABLE `parsertest_$table`" );
}
}*/
}
/**
@ -645,6 +777,10 @@ class ParserTest {
}
wfDebug( "Creating upload directory $dir\n" );
if ( file_exists( $dir ) ) {
wfDebug( "Already exists!\n" );
return $dir;
}
mkdir( $dir );
mkdir( $dir . '/3' );
mkdir( $dir . '/3/3a' );
@ -658,6 +794,8 @@ class ParserTest {
*/
private function teardownGlobals() {
RepoGroup::destroySingleton();
LinkCache::singleton()->clear();
$GLOBALS['wgLang']->__destruct();
foreach( $this->savedGlobals as $var => $val ) {
$GLOBALS[$var] = $val;
}

View file

@ -28,22 +28,21 @@ require('parserTests.inc');
if( isset( $options['help'] ) ) {
echo <<<ENDS
MediaWiki $wgVersion parser test suite
Usage: php parserTests.php [--quick] [--quiet] [--show-output]
[--color[=(yes|no)]]
[--regex=<expression>] [--file=<testfile>]
[--record] [--compare]
[--help]
Usage: php parserTests.php [options...]
Options:
--quick Suppress diff output of failed tests
--quiet Suppress notification of passed tests (shows only failed tests)
--show-output Show expected and actual output
--color Override terminal detection and force color output on or off
--color[=yes|no] Override terminal detection and force color output on or off
use wgCommandLineDarkBg = true; if your term is dark
--regex Only run tests whose descriptions which match given regex
--file Run test cases from a custom file instead of parserTests.txt
--file=<testfile> Run test cases from a custom file instead of parserTests.txt
--record Record tests in database
--compare Compare with recorded results, without updating the database.
--keep-uploads Re-use the same upload directory for each test, don't delete it
--fuzz Do a fuzz test instead of a normal test
--seed <n> Start the fuzz test from the specified seed
--help Show this help message
@ -67,7 +66,10 @@ if( isset( $options['file'] ) ) {
# Print out software version to assist with locating regressions
$version = SpecialVersion::getVersion();
echo( "This is MediaWiki version {$version}.\n\n" );
$ok = $tester->runTestsFromFiles( $files );
exit ($ok ? 0 : -1);
if ( isset( $options['fuzz'] ) ) {
$tester->fuzzTest( $files );
} else {
$ok = $tester->runTestsFromFiles( $files );
exit ($ok ? 0 : -1);
}

View file

@ -7066,6 +7066,29 @@ Line two
!! end
!! test
Interwiki links trounced by replaceExternalLinks after early LinkHolderArray expansion
!! options
wgLinkHolderBatchSize=0
!! input
[[meatball:1]]
[[meatball:2]]
[[meatball:3]]
!! result
<p><a href="http://www.usemod.com/cgi-bin/mb.pl?1" class="extiw" title="meatball:1">meatball:1</a>
<a href="http://www.usemod.com/cgi-bin/mb.pl?2" class="extiw" title="meatball:2">meatball:2</a>
<a href="http://www.usemod.com/cgi-bin/mb.pl?3" class="extiw" title="meatball:3">meatball:3</a>
</p>
!! end
!! test
Free external link invading image caption
!! input
[[Image:Foobar.jpg|thumb|http://x|hello]]
!! result
<div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="image" title="hello"><img alt="hello" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" border="0" class="thumbimage" /></a> <div class="thumbcaption"><div class="magnify"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="" /></a></div>hello</div></div></div>
!! end
#
#

View file

@ -21,24 +21,27 @@ function wfParserTestStaticParserHookSetup( &$parser ) {
return true;
}
function wfParserTestStaticParserHookHook( $in, $argv ) {
static $buf = null;
function wfParserTestStaticParserHookHook( $in, $argv, $parser ) {
if ( ! count( $argv ) ) {
$buf = $in;
$parser->static_tag_buf = $in;
return '';
} else if ( count( $argv ) === 1 && $argv['action'] === 'flush' && $in === null ) {
} else if ( count( $argv ) === 1 && isset( $argv['action'] )
&& $argv['action'] === 'flush' && $in === null )
{
// Clear the buffer, we probably don't need to
$tmp = $buf;
$buf = null;
if ( isset( $parser->static_tag_buf ) ) {
$tmp = $parser->static_tag_buf;
} else {
$tmp = '';
}
$parser->static_tag_buf = null;
return $tmp;
} else
// wtf?
die(
return
"\nCall this extension as <statictag>string</statictag> or as" .
" <statictag action=flush/>, not in any other way.\n" .
"text: " . var_export( $in, true ) . "\n" .
"argv: " . var_export( $argv, true ) . "\n"
);
"argv: " . var_export( $argv, true ) . "\n";
}