Generated node count limit

To prevent large template DOM caches from sending servers into swap,
throw an exception when more than some number of DOM elements are
parsed. Unfortunately, it wasn't possible to return a normal error
message, because it broke PST and extractSections and corrupted the
article text. It's safer to refuse to save the edit, and we don't
have decent ways to do that short of throwing an exception.

Ideally we would like to have an upstream patch that hooks libxml to
allocate memory from PHP's request pool, then a fatal error would be
raised instead of swapping.

Change-Id: I4cb4f6fd313e1e0940b56cc5e586afd1bea9267a
This commit is contained in:
Tim Starling 2012-09-15 14:51:58 -07:00
parent 3f090da982
commit 2caa7829fc
4 changed files with 34 additions and 4 deletions

View file

@ -3218,10 +3218,18 @@ $wgParserConf = array(
$wgMaxTocLevel = 999;
/**
* A complexity limit on template expansion
* A complexity limit on template expansion: the maximum number of nodes visited
* by PPFrame::expand()
*/
$wgMaxPPNodeCount = 1000000;
/**
* A complexity limit on template expansion: the maximum number of nodes
* generated by Preprocessor::preprocessToObj()
*/
$wgMaxGeneratedPPNodeCount = 1000000;
/**
* Maximum recursion depth for templates within templates.
* The current parser adds two levels to the PHP call stack for each template,

View file

@ -163,7 +163,8 @@ class Parser {
var $mLinkHolders;
var $mLinkID;
var $mIncludeSizes, $mPPNodeCount, $mHighestExpansionDepth, $mDefaultSort;
var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
var $mDefaultSort;
var $mTplExpandCache; # empty-frame expansion cache
var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
var $mExpensiveFunctionCount; # number of expensive parser function calls
@ -306,6 +307,7 @@ class Parser {
'arg' => 0,
);
$this->mPPNodeCount = 0;
$this->mGeneratedPPNodeCount = 0;
$this->mHighestExpansionDepth = 0;
$this->mDefaultSort = false;
$this->mHeadings = array();
@ -482,7 +484,9 @@ class Parser {
$PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
$limitReport =
"NewPP limit report\n" .
"Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
"Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
"Preprocessor generated node count: " .
"{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" .
"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
"Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n".

View file

@ -94,6 +94,11 @@ class ParserOptions {
* Maximum number of nodes touched by PPFrame::expand()
*/
var $mMaxPPNodeCount;
/**
* Maximum number of nodes generated by Preprocessor::preprocessToObj()
*/
var $mMaxGeneratedPPNodeCount;
/**
* Maximum recursion depth in PPFrame::expand()
@ -219,6 +224,7 @@ class ParserOptions {
function getTargetLanguage() { return $this->mTargetLanguage; }
function getMaxIncludeSize() { return $this->mMaxIncludeSize; }
function getMaxPPNodeCount() { return $this->mMaxPPNodeCount; }
function getMaxGeneratedPPNodeCount() { return $this->mMaxGeneratedPPNodeCount; }
function getMaxPPExpandDepth() { return $this->mMaxPPExpandDepth; }
function getMaxTemplateDepth() { return $this->mMaxTemplateDepth; }
/* @since 1.20 */
@ -307,6 +313,7 @@ class ParserOptions {
function setTargetLanguage( $x ) { return wfSetVar( $this->mTargetLanguage, $x, true ); }
function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); }
function setMaxPPNodeCount( $x ) { return wfSetVar( $this->mMaxPPNodeCount, $x ); }
function setMaxGeneratedPPNodeCount( $x ) { return wfSetVar( $this->mMaxGeneratedPPNodeCount, $x ); }
function setMaxTemplateDepth( $x ) { return wfSetVar( $this->mMaxTemplateDepth, $x ); }
/* @since 1.20 */
function setExpensiveParserFunctionLimit( $x ) { return wfSetVar( $this->mExpensiveParserFunctionLimit, $x ); }
@ -404,7 +411,8 @@ class ParserOptions {
global $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages,
$wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion,
$wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth,
$wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit;
$wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit,
$wgMaxGeneratedPPNodeCount;
wfProfileIn( __METHOD__ );
@ -416,6 +424,7 @@ class ParserOptions {
$this->mAllowSpecialInclusion = $wgAllowSpecialInclusion;
$this->mMaxIncludeSize = $wgMaxArticleSize * 1024;
$this->mMaxPPNodeCount = $wgMaxPPNodeCount;
$this->mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount;
$this->mMaxPPExpandDepth = $wgMaxPPExpandDepth;
$this->mMaxTemplateDepth = $wgMaxTemplateDepth;
$this->mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit;

View file

@ -162,6 +162,15 @@ class Preprocessor_DOM implements Preprocessor {
}
}
// Fail if the number of elements exceeds acceptable limits
// Do not attempt to generate the DOM
$this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
$max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
if ( $this->parser->mGeneratedPPNodeCount > $max ) {
throw new MWException( __METHOD__.': generated node count limit exceeded' );
}
wfProfileIn( __METHOD__.'-loadXML' );
$dom = new DOMDocument;
wfSuppressWarnings();