Avoid parsing more in refreshLinksJobs

* This reuses the parser cache in some cases when possible
* Clarified the return value of CacheTime::getCacheTime()
* A few documentation tweaks

Change-Id: I80b7c6404b3f8c48b53c3bba96115dbf94d80873
This commit is contained in:
Aaron Schulz 2013-11-27 22:43:00 -08:00
parent e7c8c43d23
commit 48a77e1d83
3 changed files with 37 additions and 14 deletions

View file

@ -241,7 +241,9 @@ abstract class Job {
/**
* @see JobQueue::deduplicateRootJob()
* @param string $key A key that identifies the task
* @return array
* @return array Map of:
* - rootJobSignature : hash (e.g. SHA1) that identifies the task
* - rootJobTimestamp : TS_MW timestamp of this instance of the task
* @since 1.21
*/
public static function newRootJobParams( $key ) {

View file

@ -119,21 +119,39 @@ class RefreshLinksJob extends Job {
wfGetLB()->waitFor( $this->params['masterPos'] );
}
$revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
if ( !$revision ) {
$this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
return false; // XXX: what if it was just deleted?
$parserOutput = false;
// If page_touched changed after this root job (with a good slave lag skew factor),
// then it is likely that any views of the pages already resulted in re-parses which
// are now in cache. This can be reused to avoid expensive parsing in some cases.
if ( isset( $this->params['rootJobTimestamp'] ) ) {
$page = WikiPage::factory( $title );
$skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
$parserOptions = $page->makeParserOptions( 'canonical' );
$parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
if ( $parserOutput->getCacheTime() <= $skewedTimestamp ) {
$parserOutput = false; // too stale
}
}
}
// Fetch the current revision and parse it if necessary...
if ( $parserOutput == false ) {
$revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
if ( !$revision ) {
$this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
return false; // XXX: what if it was just deleted?
}
$content = $revision->getContent( Revision::RAW );
if ( !$content ) {
// If there is no content, pretend the content is empty
$content = $revision->getContentHandler()->makeEmptyContent();
$content = $revision->getContent( Revision::RAW );
if ( !$content ) {
// If there is no content, pretend the content is empty
$content = $revision->getContentHandler()->makeEmptyContent();
}
// Revision ID must be passed to the parser output to get revision variables correct
$parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
}
// Revision ID must be passed to the parser output to get revision variables correct
$parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
$updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
DataUpdate::runUpdates( $updates );

View file

@ -37,14 +37,17 @@ class CacheTime {
$mCacheExpiry = null, # Seconds after which the object should expire, use 0 for uncachable. Used in ParserCache.
$mContainsOldMagic; # Boolean variable indicating if the input contained variables like {{CURRENTDAY}}
function getCacheTime() { return $this->mCacheTime; }
/**
* @return string TS_MW timestamp
*/
function getCacheTime() { return wfTimestamp( TS_MW, $this->mCacheTime ); }
function containsOldMagic() { return $this->mContainsOldMagic; }
function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
/**
* setCacheTime() sets the timestamp expressing when the page has been rendered.
* This doesn not control expiry, see updateCacheExpiry() for that!
* This does not control expiry, see updateCacheExpiry() for that!
* @param $t string
* @return string
*/