2006-06-18 12:42:16 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Class to invalidate the HTML cache of all the pages linking to a given title.
|
|
|
|
|
* Small numbers of links will be done immediately, large numbers are pushed onto
|
|
|
|
|
* the job queue.
|
|
|
|
|
*
|
2008-04-14 07:45:50 +00:00
|
|
|
* This class is designed to work efficiently with small numbers of links, and
|
2006-06-18 12:42:16 +00:00
|
|
|
* to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory
|
|
|
|
|
* and time requirements of loading all backlinked IDs in doUpdate() might become
|
|
|
|
|
* prohibitive. The requirements measured at Wikimedia are approximately:
|
2008-04-14 07:45:50 +00:00
|
|
|
*
|
2006-06-18 12:42:16 +00:00
|
|
|
* memory: 48 bytes per row
|
|
|
|
|
* time: 16us per row for the query plus processing
|
|
|
|
|
*
|
|
|
|
|
* The reason this query is done is to support partitioning of the job
|
2008-04-14 07:45:50 +00:00
|
|
|
* by backlinked ID. The memory issue could be allieviated by doing this query in
|
2006-06-18 12:42:16 +00:00
|
|
|
* batches, but of course LIMIT with an offset is inefficient on the DB side.
|
|
|
|
|
*
|
2008-04-14 07:45:50 +00:00
|
|
|
* The class is nevertheless a vast improvement on the previous method of using
|
2006-06-18 12:42:16 +00:00
|
|
|
* Image::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per
|
|
|
|
|
* link.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
*
|
|
|
|
|
* @ingroup Cache
|
2006-06-18 12:42:16 +00:00
|
|
|
*/
|
|
|
|
|
class HTMLCacheUpdate
|
|
|
|
|
{
|
|
|
|
|
public $mTitle, $mTable, $mPrefix;
|
|
|
|
|
public $mRowsPerJob, $mRowsPerQuery;
|
|
|
|
|
|
|
|
|
|
function __construct( $titleTo, $table ) {
|
|
|
|
|
global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
|
|
|
|
|
|
|
|
|
|
$this->mTitle = $titleTo;
|
|
|
|
|
$this->mTable = $table;
|
|
|
|
|
$this->mRowsPerJob = $wgUpdateRowsPerJob;
|
|
|
|
|
$this->mRowsPerQuery = $wgUpdateRowsPerQuery;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function doUpdate() {
|
|
|
|
|
# Fetch the IDs
|
|
|
|
|
$cond = $this->getToCondition();
|
2007-01-22 23:50:42 +00:00
|
|
|
$dbr = wfGetDB( DB_SLAVE );
|
2006-06-18 12:42:16 +00:00
|
|
|
$res = $dbr->select( $this->mTable, $this->getFromField(), $cond, __METHOD__ );
|
2008-07-06 10:12:41 +00:00
|
|
|
|
2006-06-18 12:42:16 +00:00
|
|
|
if ( $dbr->numRows( $res ) != 0 ) {
|
|
|
|
|
if ( $dbr->numRows( $res ) > $this->mRowsPerJob ) {
|
2008-01-17 12:31:54 +00:00
|
|
|
$this->insertJobs( $res );
|
2006-06-18 12:42:16 +00:00
|
|
|
} else {
|
2008-01-17 12:31:54 +00:00
|
|
|
$this->invalidateIDs( $res );
|
2006-06-18 12:42:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function insertJobs( ResultWrapper $res ) {
|
|
|
|
|
$numRows = $res->numRows();
|
|
|
|
|
$numBatches = ceil( $numRows / $this->mRowsPerJob );
|
|
|
|
|
$realBatchSize = $numRows / $numBatches;
|
|
|
|
|
$start = false;
|
|
|
|
|
$jobs = array();
|
|
|
|
|
do {
|
|
|
|
|
for ( $i = 0; $i < $realBatchSize - 1; $i++ ) {
|
|
|
|
|
$row = $res->fetchRow();
|
|
|
|
|
if ( $row ) {
|
|
|
|
|
$id = $row[0];
|
|
|
|
|
} else {
|
|
|
|
|
$id = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2007-06-21 19:11:24 +00:00
|
|
|
$params = array(
|
|
|
|
|
'table' => $this->mTable,
|
|
|
|
|
'start' => $start,
|
|
|
|
|
'end' => ( $id !== false ? $id - 1 : false ),
|
|
|
|
|
);
|
|
|
|
|
$jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
|
2006-06-18 12:42:16 +00:00
|
|
|
|
|
|
|
|
$start = $id;
|
|
|
|
|
} while ( $start );
|
|
|
|
|
|
|
|
|
|
Job::batchInsert( $jobs );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getPrefix() {
|
|
|
|
|
static $prefixes = array(
|
|
|
|
|
'pagelinks' => 'pl',
|
|
|
|
|
'imagelinks' => 'il',
|
|
|
|
|
'categorylinks' => 'cl',
|
|
|
|
|
'templatelinks' => 'tl',
|
2008-01-17 12:31:54 +00:00
|
|
|
'redirect' => 'rd',
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2006-06-18 12:42:16 +00:00
|
|
|
# Not needed
|
|
|
|
|
# 'externallinks' => 'el',
|
|
|
|
|
# 'langlinks' => 'll'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if ( is_null( $this->mPrefix ) ) {
|
|
|
|
|
$this->mPrefix = $prefixes[$this->mTable];
|
|
|
|
|
if ( is_null( $this->mPrefix ) ) {
|
|
|
|
|
throw new MWException( "Invalid table type \"{$this->mTable}\" in " . __CLASS__ );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return $this->mPrefix;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2006-06-18 12:42:16 +00:00
|
|
|
function getFromField() {
|
|
|
|
|
return $this->getPrefix() . '_from';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getToCondition() {
|
2008-01-17 12:31:54 +00:00
|
|
|
$prefix = $this->getPrefix();
|
2006-06-18 12:42:16 +00:00
|
|
|
switch ( $this->mTable ) {
|
|
|
|
|
case 'pagelinks':
|
|
|
|
|
case 'templatelinks':
|
2008-01-17 12:31:54 +00:00
|
|
|
case 'redirect':
|
2008-04-14 07:45:50 +00:00
|
|
|
return array(
|
2008-01-17 12:31:54 +00:00
|
|
|
"{$prefix}_namespace" => $this->mTitle->getNamespace(),
|
|
|
|
|
"{$prefix}_title" => $this->mTitle->getDBkey()
|
2006-06-18 12:42:16 +00:00
|
|
|
);
|
|
|
|
|
case 'imagelinks':
|
|
|
|
|
return array( 'il_to' => $this->mTitle->getDBkey() );
|
|
|
|
|
case 'categorylinks':
|
|
|
|
|
return array( 'cl_to' => $this->mTitle->getDBkey() );
|
|
|
|
|
}
|
|
|
|
|
throw new MWException( 'Invalid table type in ' . __CLASS__ );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Invalidate a set of IDs, right now
|
|
|
|
|
*/
|
|
|
|
|
function invalidateIDs( ResultWrapper $res ) {
|
|
|
|
|
global $wgUseFileCache, $wgUseSquid;
|
|
|
|
|
|
|
|
|
|
if ( $res->numRows() == 0 ) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2007-01-22 23:50:42 +00:00
|
|
|
$dbw = wfGetDB( DB_MASTER );
|
2006-06-18 12:42:16 +00:00
|
|
|
$timestamp = $dbw->timestamp();
|
|
|
|
|
$done = false;
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2006-06-18 12:42:16 +00:00
|
|
|
while ( !$done ) {
|
|
|
|
|
# Get all IDs in this query into an array
|
|
|
|
|
$ids = array();
|
|
|
|
|
for ( $i = 0; $i < $this->mRowsPerQuery; $i++ ) {
|
|
|
|
|
$row = $res->fetchRow();
|
|
|
|
|
if ( $row ) {
|
|
|
|
|
$ids[] = $row[0];
|
|
|
|
|
} else {
|
|
|
|
|
$done = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( !count( $ids ) ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2006-06-18 12:42:16 +00:00
|
|
|
# Update page_touched
|
2008-04-14 07:45:50 +00:00
|
|
|
$dbw->update( 'page',
|
|
|
|
|
array( 'page_touched' => $timestamp ),
|
2006-06-18 12:42:16 +00:00
|
|
|
array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ),
|
|
|
|
|
__METHOD__
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
# Update squid
|
|
|
|
|
if ( $wgUseSquid || $wgUseFileCache ) {
|
|
|
|
|
$titles = Title::newFromIDs( $ids );
|
|
|
|
|
if ( $wgUseSquid ) {
|
|
|
|
|
$u = SquidUpdate::newFromTitles( $titles );
|
|
|
|
|
$u->doUpdate();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Update file cache
|
|
|
|
|
if ( $wgUseFileCache ) {
|
|
|
|
|
foreach ( $titles as $title ) {
|
2006-10-11 08:25:26 +00:00
|
|
|
$cm = new HTMLFileCache($title);
|
2006-06-18 12:42:16 +00:00
|
|
|
@unlink($cm->fileCacheName());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2007-04-04 05:22:37 +00:00
|
|
|
/**
|
|
|
|
|
* @todo document (e.g. one-sentence top-level class description).
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup JobQueue
|
2007-04-04 05:22:37 +00:00
|
|
|
*/
|
2006-06-18 12:42:16 +00:00
|
|
|
class HTMLCacheUpdateJob extends Job {
|
|
|
|
|
var $table, $start, $end;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Construct a job
|
|
|
|
|
* @param Title $title The title linked to
|
2007-06-21 19:11:24 +00:00
|
|
|
* @param array $params Job parameters (table, start and end page_ids)
|
2006-06-18 12:42:16 +00:00
|
|
|
* @param integer $id job_id
|
|
|
|
|
*/
|
2007-06-21 19:11:24 +00:00
|
|
|
function __construct( $title, $params, $id = 0 ) {
|
2006-06-19 02:29:21 +00:00
|
|
|
parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
|
2007-06-21 19:11:24 +00:00
|
|
|
$this->table = $params['table'];
|
|
|
|
|
$this->start = $params['start'];
|
|
|
|
|
$this->end = $params['end'];
|
2006-06-18 12:42:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function run() {
|
|
|
|
|
$update = new HTMLCacheUpdate( $this->title, $this->table );
|
|
|
|
|
|
|
|
|
|
$fromField = $update->getFromField();
|
|
|
|
|
$conds = $update->getToCondition();
|
|
|
|
|
if ( $this->start ) {
|
|
|
|
|
$conds[] = "$fromField >= {$this->start}";
|
|
|
|
|
}
|
|
|
|
|
if ( $this->end ) {
|
|
|
|
|
$conds[] = "$fromField <= {$this->end}";
|
|
|
|
|
}
|
|
|
|
|
|
2007-01-22 23:50:42 +00:00
|
|
|
$dbr = wfGetDB( DB_SLAVE );
|
2006-06-18 12:42:16 +00:00
|
|
|
$res = $dbr->select( $this->table, $fromField, $conds, __METHOD__ );
|
2008-07-06 13:31:35 +00:00
|
|
|
$update->invalidateIDs( $res );
|
2006-06-18 12:42:16 +00:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|