wiki.techinc.nl/maintenance/refreshLinks.inc
Nick Jenkins f88c771756 The war on redundant ampersand usage!
* Convert "$dbw =& wfGetDB( DB_MASTER );" --> "$dbw = wfGetDB( DB_MASTER );"
* convert "$skin =& $wgUser->getSkin();" --> "$skin = $wgUser->getSkin();"

For the time being have not changed the function definitions of wfGetDB() or User::getSkin() [i.e. they are still both return-by-ref], so as to ensure the interface does not change for extensions [some of which may still be trying to run on PHP4 environments]. However presumably at some point this can be changed too.

Also includes tiny tweak to newlines in parserTests - will show 1 rather than 2 newlines between the "Reading tests from" strings when in quiet mode.
2007-01-22 23:50:42 +00:00

130 lines
2.9 KiB
PHP

<?php
/**
* @todo document
* @addtogroup Maintenance
*/
/** */
define( "REPORTING_INTERVAL", 100 );
#define( "REPORTING_INTERVAL", 1 );
function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0 ) {
global $wgUser, $wgParser, $wgUseImageResize, $wgUseTidy;
$fname = 'refreshLinks';
$dbr = wfGetDB( DB_SLAVE );
$start = intval( $start );
# Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
$wgUser->setOption('math', MW_MATH_SOURCE);
# Don't generate extension images (e.g. Timeline)
$wgParser->mTagHooks = array();
# Don't generate thumbnail images
$wgUseImageResize = false;
$wgUseTidy = false;
if ( $newOnly ) {
print "Refreshing links from ";
$res = $dbr->select( 'page',
array( 'page_id' ),
array(
'page_is_new' => 1,
"page_id > $start" ),
$fname
);
$num = $dbr->numRows( $res );
print "$num new articles...\n";
$i = 0;
while ( $row = $dbr->fetchObject( $res ) ) {
if ( !( ++$i % REPORTING_INTERVAL ) ) {
print "$i\n";
wfWaitForSlaves( $maxLag );
}
fixLinksFromArticle( $row->page_id );
}
} else {
print "Refreshing link table.\n";
if ( !$end ) {
$end = $dbr->selectField( 'page', 'max(page_id)', false );
}
print("Starting from page_id $start of $end.\n");
for ($id = $start; $id <= $end; $id++) {
if ( !($id % REPORTING_INTERVAL) ) {
print "$id\n";
wfWaitForSlaves( $maxLag );
}
fixLinksFromArticle( $id );
}
}
}
function fixLinksFromArticle( $id ) {
global $wgTitle, $wgParser;
$wgTitle = Title::newFromID( $id );
$dbw = wfGetDB( DB_MASTER );
$linkCache =& LinkCache::singleton();
$linkCache->clear();
if ( is_null( $wgTitle ) ) {
return;
}
$dbw->begin();
$revision = Revision::newFromTitle( $wgTitle );
if ( !$revision ) {
return;
}
$options = new ParserOptions;
$parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
$update = new LinksUpdate( $wgTitle, $parserOutput, false );
$update->doUpdate();
$dbw->immediateCommit();
}
function deleteLinksFromNonexistent( $maxLag = 0 ) {
$fname = 'deleteLinksFromNonexistent';
wfWaitForSlaves( $maxLag );
$dbw = wfGetDB( DB_WRITE );
$linksTables = array(
'pagelinks' => 'pl_from',
'imagelinks' => 'il_from',
'categorylinks' => 'cl_from',
'templatelinks' => 'tl_from',
'externallinks' => 'el_from',
);
$page = $dbw->tableName( 'page' );
foreach ( $linksTables as $table => $field ) {
if ( !$dbw->ping() ) {
print "DB disconnected, reconnecting...";
while ( !$dbw->ping() ) {
print ".";
sleep(10);
}
print "\n";
}
$pTable = $dbw->tableName( $table );
$sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
print "Deleting $table from non-existent articles...";
$dbw->query( $sql, $fname );
print " fixed " .$dbw->affectedRows() . " row(s)\n";
}
}
?>