Recommit of r45431 with these changes:
* Removed backspace characters in output * Small code update * Removed 'disabled' comment from refreshLinks.php
This commit is contained in:
parent
35383a74e3
commit
05c146c191
2 changed files with 84 additions and 36 deletions
|
|
@ -136,41 +136,90 @@ function fixLinksFromArticle( $id ) {
|
|||
$dbw->immediateCommit();
|
||||
}
|
||||
|
||||
function deleteLinksFromNonexistent( $maxLag = 0 ) {
|
||||
/*
|
||||
* Removes non-existing links from pages from pagelinks, imagelinks,
|
||||
* categorylinks, templatelinks and externallinks tables.
|
||||
*
|
||||
* @param $maxLag
|
||||
* @param $batchSize The size of deletion batches
|
||||
*
|
||||
* @author Merlijn van Deen <valhallasw@arctus.nl>
|
||||
*/
|
||||
function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
|
||||
$fname = 'deleteLinksFromNonexistent';
|
||||
|
||||
wfWaitForSlaves( $maxLag );
|
||||
|
||||
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
|
||||
$linksTables = array(
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
$dbr->bufferResults(false);
|
||||
|
||||
$linksTables = array( // table name => page_id field
|
||||
'pagelinks' => 'pl_from',
|
||||
'imagelinks' => 'il_from',
|
||||
'categorylinks' => 'cl_from',
|
||||
'templatelinks' => 'tl_from',
|
||||
'externallinks' => 'el_from',
|
||||
);
|
||||
|
||||
$page = $dbw->tableName( 'page' );
|
||||
|
||||
|
||||
|
||||
$readPage = $dbr->tableName( 'page' );
|
||||
|
||||
foreach ( $linksTables as $table => $field ) {
|
||||
if ( !$dbw->ping() ) {
|
||||
print "DB disconnected, reconnecting...";
|
||||
while ( !$dbw->ping() ) {
|
||||
print ".";
|
||||
sleep(10);
|
||||
$readLinks = $dbr->tableName( $table );
|
||||
|
||||
print "Retrieving illegal entries from $table... ";
|
||||
|
||||
$sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;";
|
||||
$results = $dbr->query( $sql, $fname . ':' . $readLinks );
|
||||
|
||||
print $results->numRows() . " illegal " . $field. "s. ";
|
||||
|
||||
if ( $results->numRows() > 0 ) {
|
||||
$counter = 0;
|
||||
$list = array();
|
||||
print "Removing illegal links: 1..";
|
||||
|
||||
foreach( $results as $row ) {
|
||||
$counter++;
|
||||
$list[] = $row->$field;
|
||||
if ( ( $counter % $batchSize ) == 0 ) {
|
||||
print $counter . "..";
|
||||
deleteBatch($dbw, $table, $field, $list);
|
||||
$list = array();
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
print $counter;
|
||||
deleteBatch($dbw, $table, $field, $list);
|
||||
}
|
||||
|
||||
$pTable = $dbw->tableName( $table );
|
||||
$sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
|
||||
|
||||
print "Deleting $table from non-existent articles...";
|
||||
$dbw->query( $sql, $fname );
|
||||
print " fixed " .$dbw->affectedRows() . " row(s)\n";
|
||||
|
||||
print "\n";
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
/* Deletes a batch of items from a table.
|
||||
* Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>)
|
||||
*
|
||||
* @param $dbw Database Database object to run the DELETE query on
|
||||
* @param $table table to work on; will be converted via $dbw->tableName.
|
||||
* @param $field column to search in
|
||||
* @param $list values to remove. Array with SQL-safe (!) values.
|
||||
*
|
||||
* @author Merlijn van Deen <valhallasw@arctus.nl>
|
||||
*/
|
||||
function deleteBatch($dbw, $table, $field, $list) {
|
||||
if (count($list) == 0) return;
|
||||
|
||||
$masterLinks = $dbw->tableName( $table );
|
||||
$fname = "deleteBatch:masterLinks";
|
||||
|
||||
if ( !$dbw->ping() ) {
|
||||
print "\nDB disconnected, reconnecting...";
|
||||
while ( !$dbw->ping() ) {
|
||||
print ".";
|
||||
sleep(10);
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
$sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");";
|
||||
$dbw->query($sql, $fname);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,14 +18,16 @@ Usage:
|
|||
[--new-only] [--redirects-only]
|
||||
php refreshLinks.php [<start>] [-e <end>] [-m <maxlag>] --old-redirects-only
|
||||
|
||||
--help : This help message
|
||||
--dfn-only : Delete links from nonexistent articles only
|
||||
--new-only : Only affect articles with just a single edit
|
||||
--redirects-only : Only fix redirects, not all links
|
||||
--old-redirects-only : Only fix redirects with no redirect table entry
|
||||
-m <number> : Maximum replication lag
|
||||
<start> : First page id to refresh
|
||||
-e <number> : Last page id to refresh
|
||||
--help : This help message
|
||||
--dfn-only : Delete links from nonexistent articles only
|
||||
--batch-size <number> : The delete batch size when removing links from
|
||||
nonexistent articles (default 100)
|
||||
--new-only : Only affect articles with just a single edit
|
||||
--redirects-only : Only fix redirects, not all links
|
||||
--old-redirects-only : Only fix redirects with no redirect table entry
|
||||
-m <number> : Maximum replication lag
|
||||
<start> : First page id to refresh
|
||||
-e <number> : Last page id to refresh
|
||||
|
||||
TEXT;
|
||||
exit(0);
|
||||
|
|
@ -42,12 +44,9 @@ if ( !$options['dfn-only'] ) {
|
|||
|
||||
refreshLinks( $start, $options['new-only'], $options['m'], $options['e'], $options['redirects-only'], $options['old-redirects-only'] );
|
||||
}
|
||||
// this bit's bad for replication: disabling temporarily
|
||||
// --brion 2005-07-16
|
||||
//deleteLinksFromNonexistent();
|
||||
|
||||
deleteLinksFromNonexistent($options['m'], $options['batch-size']);
|
||||
|
||||
if ( $options['globals'] ) {
|
||||
print_r( $GLOBALS );
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue