Consolidate common Preprocessor caching code

* Consolidate nearly-identical caching code in Preprocessor_DOM and
  Preprocessor_Hash by making Preprocessor an abstract class rather than an
  interface and by implementing Preprocessor::cacheSetTree() and
  Preprocessor::cacheGetTree().
* Cache trees for wikitext blobs that have length equal or greater to
  PreprocessorCacheThreshold. Previously they needed to be greater than
  PreprocessorCacheThreshold, so this changes the requirement by one character.
  I did it because it seems more natural.
* Modernize the code to use singleton service objects rather than globals.

We spend a lot of time in the Preprocessor, so it would be nice for this code
to be well-factored and clear.

Change-Id: Ib71c29f14a28445a505e12c774a24ad964330b95
This commit is contained in:
Ori Livneh 2015-10-08 13:54:15 -07:00 committed by Tim Starling
parent 8b8f6d15e0
commit 1559be9f7a
3 changed files with 88 additions and 70 deletions

View file

@ -21,23 +21,87 @@
* @ingroup Parser
*/
use MediaWiki\Logger\LoggerFactory;
/**
* @ingroup Parser
*/
interface Preprocessor {
abstract class Preprocessor {
const CACHE_VERSION = 1;
/**
* Create a new preprocessor object based on an initialised Parser object
* Store a document tree in the cache.
*
* @param Parser $parser
* @param string $text
* @param int $flags
*/
public function __construct( $parser );
protected function cacheSetTree( $text, $flags, $tree ) {
$config = RequestContext::getMain()->getConfig();
$length = strlen( $text );
$threshold = $config->get( 'PreprocessorCacheThreshold' );
if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
return false;
}
$key = wfMemcKey(
defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
md5( $text ), $flags );
$value = sprintf( "%08d", self::CACHE_VERSION ) . $tree;
$cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
$cache->set( $key, $value, 86400 );
LoggerFactory::getInstance( 'Preprocessor' )
->info( "Cached preprocessor output (key: $key)" );
}
/**
* Attempt to load a precomputed document tree for some given wikitext
* from the cache.
*
* @param string $text
* @param int $flags
* @return PPNode_Hash_Tree|bool
*/
protected function cacheGetTree( $text, $flags ) {
$config = RequestContext::getMain()->getConfig();
$length = strlen( $text );
$threshold = $config->get( 'PreprocessorCacheThreshold' );
if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
return false;
}
$cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
$key = wfMemcKey(
defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
md5( $text ), $flags );
$value = $cache->get( $key );
if ( !$value ) {
return false;
}
$version = intval( substr( $value, 0, 8 ) );
if ( $version !== self::CACHE_VERSION ) {
return false;
}
LoggerFactory::getInstance( 'Preprocessor' )
->info( "Loaded preprocessor output from cache (key: $key)" );
return substr( $value, 8 );
}
/**
* Create a new top-level frame for expansion of a page
*
* @return PPFrame
*/
public function newFrame();
abstract public function newFrame();
/**
* Create a new custom frame for programmatic use of parameter replacement
@ -47,7 +111,7 @@ interface Preprocessor {
*
* @return PPFrame
*/
public function newCustomFrame( $args );
abstract public function newCustomFrame( $args );
/**
* Create a new custom node for programmatic use of parameter replacement
@ -55,7 +119,7 @@ interface Preprocessor {
*
* @param array $values
*/
public function newPartNodeArray( $values );
abstract public function newPartNodeArray( $values );
/**
* Preprocess text to a PPNode
@ -65,7 +129,7 @@ interface Preprocessor {
*
* @return PPNode
*/
public function preprocessToObj( $text, $flags = 0 );
abstract public function preprocessToObj( $text, $flags = 0 );
}
/**

View file

@ -25,7 +25,7 @@
* @ingroup Parser
*/
// @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps
class Preprocessor_DOM implements Preprocessor {
class Preprocessor_DOM extends Preprocessor {
// @codingStandardsIgnoreEnd
/**
@ -35,7 +35,7 @@ class Preprocessor_DOM implements Preprocessor {
public $memoryLimit;
const CACHE_VERSION = 1;
const CACHE_PREFIX = 'preprocess-xml';
public function __construct( $parser ) {
$this->parser = $parser;
@ -148,30 +148,11 @@ class Preprocessor_DOM implements Preprocessor {
* @return PPNode_DOM
*/
public function preprocessToObj( $text, $flags = 0 ) {
global $wgMemc, $wgPreprocessorCacheThreshold;
$xml = false;
$cacheable = ( $wgPreprocessorCacheThreshold !== false
&& strlen( $text ) > $wgPreprocessorCacheThreshold );
if ( $cacheable ) {
$cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags );
$cacheValue = $wgMemc->get( $cacheKey );
if ( $cacheValue ) {
$version = substr( $cacheValue, 0, 8 );
if ( intval( $version ) == self::CACHE_VERSION ) {
$xml = substr( $cacheValue, 8 );
// From the cache
wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
}
}
if ( $xml === false ) {
$xml = $this->preprocessToXml( $text, $flags );
$cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
$wgMemc->set( $cacheKey, $cacheValue, 86400 );
wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
}
} else {
$xml = $this->cacheGetTree( $text, $flags );
if ( $xml === false ) {
$xml = $this->preprocessToXml( $text, $flags );
$this->cacheSetTree( $text, $flags, $xml );
}
// Fail if the number of elements exceeds acceptable limits
@ -179,8 +160,7 @@ class Preprocessor_DOM implements Preprocessor {
$this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
$max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
if ( $this->parser->mGeneratedPPNodeCount > $max ) {
if ( $cacheable ) {
}
// if ( $cacheable ) { ... }
throw new MWException( __METHOD__ . ': generated node count limit exceeded' );
}
@ -199,8 +179,7 @@ class Preprocessor_DOM implements Preprocessor {
$obj = new PPNode_DOM( $dom->documentElement );
}
if ( $cacheable ) {
}
// if ( $cacheable ) { ... }
if ( !$result ) {
throw new MWException( __METHOD__ . ' generated invalid XML' );

View file

@ -28,7 +28,7 @@
* @ingroup Parser
*/
// @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps
class Preprocessor_Hash implements Preprocessor {
class Preprocessor_Hash extends Preprocessor {
// @codingStandardsIgnoreEnd
/**
@ -36,7 +36,7 @@ class Preprocessor_Hash implements Preprocessor {
*/
public $parser;
const CACHE_VERSION = 1;
const CACHE_PREFIX = 'preprocess-hash';
public function __construct( $parser ) {
$this->parser = $parser;
@ -88,6 +88,7 @@ class Preprocessor_Hash implements Preprocessor {
return $node;
}
/**
* Preprocess some wikitext and return the document tree.
* This is the ghost of Parser::replace_variables().
@ -112,25 +113,9 @@ class Preprocessor_Hash implements Preprocessor {
* @return PPNode_Hash_Tree
*/
public function preprocessToObj( $text, $flags = 0 ) {
// Check cache.
global $wgMemc, $wgPreprocessorCacheThreshold;
$cacheable = $wgPreprocessorCacheThreshold !== false
&& strlen( $text ) > $wgPreprocessorCacheThreshold;
if ( $cacheable ) {
$cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags );
$cacheValue = $wgMemc->get( $cacheKey );
if ( $cacheValue ) {
$version = substr( $cacheValue, 0, 8 );
if ( intval( $version ) == self::CACHE_VERSION ) {
$hash = unserialize( substr( $cacheValue, 8 ) );
// From the cache
wfDebugLog( "Preprocessor",
"Loaded preprocessor hash from memcached (key $cacheKey)" );
return $hash;
}
}
$tree = $this->cacheGetTree( $text, $flags );
if ( $tree !== false ) {
return unserialize( $tree );
}
$rules = array(
@ -629,13 +614,11 @@ class Preprocessor_Hash implements Preprocessor {
$lastNode = $node;
}
if ( !$node ) {
if ( $cacheable ) {
}
// if ( $cacheable ) { ... }
throw new MWException( __METHOD__ . ': eqpos not found' );
}
if ( $node->name !== 'equals' ) {
if ( $cacheable ) {
}
// if ( $cacheable ) { ... }
throw new MWException( __METHOD__ . ': eqpos is not equals' );
}
$equalsNode = $node;
@ -732,15 +715,7 @@ class Preprocessor_Hash implements Preprocessor {
$rootNode->lastChild = $stack->rootAccum->lastNode;
// Cache
if ( $cacheable ) {
$cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );
// T111289: Cache values should not exceed 1 Mb, but they do.
if ( strlen( $cacheValue ) <= 1e6 ) {
$wgMemc->set( $cacheKey, $cacheValue, 86400 );
wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
}
}
$this->cacheSetTree( $text, $flags, serialize( $rootNode ) );
return $rootNode;
}