Fixes for TitleCleanup subclasses:
* (r56711) Don't use unbufferred queries unless someone is holding a gun to your head, they cause no end of trouble. Use batched queries instead. Should fix the bug reported on bug 20741 comment 13. * Fixed a bug in cleanupTitles.php which caused it to fail to convert spaces in page_title to underscores, possibly caused by r6355. * Made cleanupWatchlist.php respect "--fix" as documented * Made runTable accept a parameter array instead of an ever-growing formal parameter list * Renamed processPage() to more accurate processRow(). Removed abstract function definition since the name of the function can be overridden. * Made a few things public instead of protected for easier testing via eval.php * Fixed missing newlines in progress messages in cleanupCaps.php
This commit is contained in:
parent
87469c3e1d
commit
77086dc2ef
5 changed files with 121 additions and 49 deletions
|
|
@ -45,12 +45,14 @@ class CapsCleanup extends TableCleanup {
|
|||
if( $wgCapitalLinks )
|
||||
$this->error( "\$wgCapitalLinks is on -- no need for caps links cleanup.", true );
|
||||
|
||||
$this->runTable( $this->targetTable,
|
||||
'WHERE page_namespace=' . $this->namespace,
|
||||
array( &$this, 'processPage' ) );
|
||||
$this->runTable( array(
|
||||
'table' => 'page',
|
||||
'conds' => array( 'page_namespace' => $this->namespace ),
|
||||
'index' => 'page_id',
|
||||
'callback' => 'processRow' ) );
|
||||
}
|
||||
|
||||
protected function processPage( $row ) {
|
||||
protected function processRow( $row ) {
|
||||
global $wgContLang;
|
||||
|
||||
$current = Title::makeTitle( $row->page_namespace, $row->page_title );
|
||||
|
|
@ -58,23 +60,23 @@ class CapsCleanup extends TableCleanup {
|
|||
$upper = $row->page_title;
|
||||
$lower = $wgContLang->lcfirst( $row->page_title );
|
||||
if( $upper == $lower ) {
|
||||
$this->output( "\"$display\" already lowercase." );
|
||||
$this->output( "\"$display\" already lowercase.\n" );
|
||||
return $this->progress( 0 );
|
||||
}
|
||||
|
||||
$target = Title::makeTitle( $row->page_namespace, $lower );
|
||||
$targetDisplay = $target->getPrefixedText();
|
||||
if( $target->exists() ) {
|
||||
$this->output( "\"$display\" skipped; \"$targetDisplay\" already exists" );
|
||||
$this->output( "\"$display\" skipped; \"$targetDisplay\" already exists\n" );
|
||||
return $this->progress( 0 );
|
||||
}
|
||||
|
||||
if( $this->dryrun ) {
|
||||
$this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED" );
|
||||
$this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED\n" );
|
||||
$ok = true;
|
||||
} else {
|
||||
$ok = $current->moveTo( $target, false, 'Converting page titles to lowercase' );
|
||||
$this->output( "\"$display\" -> \"$targetDisplay\": $ok" );
|
||||
$this->output( "\"$display\" -> \"$targetDisplay\": $ok\n" );
|
||||
}
|
||||
if( $ok === true ) {
|
||||
$this->progress( 1 );
|
||||
|
|
@ -82,7 +84,7 @@ class CapsCleanup extends TableCleanup {
|
|||
$talk = $target->getTalkPage();
|
||||
$row->page_namespace = $talk->getNamespace();
|
||||
if( $talk->exists() ) {
|
||||
return $this->processPage( $row );
|
||||
return $this->processRow( $row );
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -31,13 +31,19 @@
|
|||
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
|
||||
|
||||
class ImageCleanup extends TableCleanup {
|
||||
protected $targetTable = 'image';
|
||||
protected $defaultParams = array(
|
||||
'table' => 'image',
|
||||
'conds' => array(),
|
||||
'index' => 'img_name',
|
||||
'callback' => 'processRow',
|
||||
);
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->mDescription = "Script to clean up broken, unparseable upload filenames";
|
||||
}
|
||||
|
||||
protected function processPage( $row ) {
|
||||
protected function processRow( $row ) {
|
||||
global $wgContLang;
|
||||
|
||||
$source = $row->img_name;
|
||||
|
|
|
|||
|
|
@ -23,10 +23,18 @@
|
|||
|
||||
require_once( dirname(__FILE__) . '/Maintenance.php' );
|
||||
|
||||
abstract class TableCleanup extends Maintenance {
|
||||
protected $targetTable = 'page';
|
||||
class TableCleanup extends Maintenance {
|
||||
protected $defaultParams = array(
|
||||
'table' => 'page',
|
||||
'conds' => array(),
|
||||
'index' => 'page_id',
|
||||
'callback' => 'processRow',
|
||||
);
|
||||
|
||||
protected $dryrun = false;
|
||||
protected $maxLag = 10; # if slaves are lagged more than 10 secs, wait
|
||||
public $batchSize = 100;
|
||||
public $reportInterval = 100;
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
|
|
@ -42,9 +50,7 @@ abstract class TableCleanup extends Maintenance {
|
|||
} else {
|
||||
$this->output( "Checking and fixing bad titles...\n" );
|
||||
}
|
||||
$this->runTable( $this->targetTable,
|
||||
'', //'WHERE page_namespace=0',
|
||||
array( $this, 'processPage' ) );
|
||||
$this->runTable( $this->defaultParams );
|
||||
}
|
||||
|
||||
protected function init( $count, $table ) {
|
||||
|
|
@ -58,7 +64,7 @@ abstract class TableCleanup extends Maintenance {
|
|||
protected function progress( $updated ) {
|
||||
$this->updated += $updated;
|
||||
$this->processed++;
|
||||
if( $this->processed % 100 != 0 ) {
|
||||
if( $this->processed % $this->reportInterval != 0 ) {
|
||||
return;
|
||||
}
|
||||
$portion = $this->processed / $this->count;
|
||||
|
|
@ -85,34 +91,73 @@ abstract class TableCleanup extends Maintenance {
|
|||
flush();
|
||||
}
|
||||
|
||||
protected function runTable( $table, $where, $callback ) {
|
||||
public function runTable( $params ) {
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
|
||||
$count = $dbr->selectField( $table, 'count(*)', '', __METHOD__ );
|
||||
$this->init( $count, $table );
|
||||
$this->output( "Processing $table..." );
|
||||
|
||||
// Unbuffered queries, avoids OOM
|
||||
$dbr->bufferResults( false );
|
||||
|
||||
$tableName = $dbr->tableName( $table );
|
||||
$sql = "SELECT * FROM $tableName $where";
|
||||
$result = $dbr->query( $sql, __METHOD__ );
|
||||
|
||||
foreach( $result as $row ) {
|
||||
call_user_func( $callback, $row );
|
||||
if ( array_diff( array_keys( $params ),
|
||||
array( 'table', 'conds', 'index', 'callback' ) ) )
|
||||
{
|
||||
throw new MWException( __METHOD__.': Missing parameter ' . implode( ', ', $params ) );
|
||||
}
|
||||
|
||||
|
||||
$table = $params['table'];
|
||||
$count = $dbr->selectField( $table, 'count(*)', $params['conds'], __METHOD__ );
|
||||
$this->init( $count, $table );
|
||||
$this->output( "Processing $table...\n" );
|
||||
|
||||
|
||||
$index = (array)$params['index'];
|
||||
$indexConds = array();
|
||||
$options = array(
|
||||
'ORDER BY' => implode( ',', $index ),
|
||||
'LIMIT' => $this->batchSize
|
||||
);
|
||||
$callback = array( $this, $params['callback'] );
|
||||
|
||||
while ( true ) {
|
||||
$conds = array_merge( $params['conds'], $indexConds );
|
||||
$res = $dbr->select( $table, '*', $conds, __METHOD__, $options );
|
||||
if ( !$res->numRows() ) {
|
||||
// Done
|
||||
break;
|
||||
}
|
||||
|
||||
foreach ( $res as $row ) {
|
||||
call_user_func( $callback, $row );
|
||||
}
|
||||
|
||||
if ( $res->numRows() < $this->batchSize ) {
|
||||
// Done
|
||||
break;
|
||||
}
|
||||
|
||||
// Update the conditions to select the next batch.
|
||||
// Construct a condition string by starting with the least significant part
|
||||
// of the index, and adding more significant parts progressively to the left
|
||||
// of the string.
|
||||
$nextCond = '';
|
||||
foreach ( array_reverse( $index ) as $field ) {
|
||||
$encValue = $dbr->addQuotes( $row->$field );
|
||||
if ( $nextCond === '' ) {
|
||||
$nextCond = "$field > $encValue";
|
||||
} else {
|
||||
$nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))";
|
||||
}
|
||||
}
|
||||
$indexConds = array( $nextCond );
|
||||
}
|
||||
|
||||
$this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );
|
||||
|
||||
$result->free();
|
||||
|
||||
$dbr->bufferResults( true );
|
||||
}
|
||||
|
||||
protected function hexChar( $matches ) {
|
||||
return sprintf( "\\x%02x", ord( $matches[1] ) );
|
||||
}
|
||||
|
||||
abstract protected function processPage( $row );
|
||||
}
|
||||
|
||||
class TableCleanupTest extends TableCleanup {
|
||||
function processRow( $row ) {
|
||||
$this->progress( mt_rand( 0, 1 ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,15 +36,16 @@ class TitleCleanup extends TableCleanup {
|
|||
$this->mDescription = "Script to clean up broken, unparseable titles";
|
||||
}
|
||||
|
||||
protected function processPage( $row ) {
|
||||
$current = Title::makeTitle( $row->page_namespace, $row->page_title );
|
||||
$display = $current->getPrefixedText();
|
||||
|
||||
protected function processRow( $row ) {
|
||||
$display = Title::makeName( $row->page_namespace, $row->page_title );
|
||||
$verified = UtfNormal::cleanUp( $display );
|
||||
|
||||
$title = Title::newFromText( $verified );
|
||||
|
||||
if( !is_null( $title ) && $title->equals( $current ) && $title->canExist() ) {
|
||||
if( !is_null( $title )
|
||||
&& $title->canExist()
|
||||
&& $title->getNamespace() == $row->page_namespace
|
||||
&& $title->getDBkey() === $row->page_title )
|
||||
{
|
||||
return $this->progress( 0 ); // all is fine
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,13 +31,27 @@
|
|||
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
|
||||
|
||||
class WatchlistCleanup extends TableCleanup {
|
||||
protected $targetTable = 'watchlist';
|
||||
protected $defaultParams = array(
|
||||
'table' => 'watchlist',
|
||||
'index' => array( 'wl_user', 'wl_namespace', 'wl_title' ),
|
||||
'conds' => array(),
|
||||
'callback' => 'processRow'
|
||||
);
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->mDescription = "Script to remove broken, unparseable titles in the Watchlist";
|
||||
$this->addOption( 'fix', 'Actually remove entries; without will only report.' );
|
||||
}
|
||||
|
||||
protected function processPage( $row ) {
|
||||
function execute() {
|
||||
if ( !$this->hasOption( 'fix' ) ) {
|
||||
$this->output( "Dry run only: use --fix to enable updates\n" );
|
||||
}
|
||||
parent::execute();
|
||||
}
|
||||
|
||||
protected function processRow( $row ) {
|
||||
$current = Title::makeTitle( $row->wl_namespace, $row->wl_title );
|
||||
$display = $current->getPrefixedText();
|
||||
$verified = UtfNormal::cleanUp( $display );
|
||||
|
|
@ -45,14 +59,15 @@ class WatchlistCleanup extends TableCleanup {
|
|||
|
||||
if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) {
|
||||
$this->output( "invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")\n" );
|
||||
$this->removeWatch( $row );
|
||||
return $this->progress( 1 );
|
||||
$updated = $this->removeWatch( $row );
|
||||
$this->progress( $updated );
|
||||
return;
|
||||
}
|
||||
$this->progress( 0 );
|
||||
}
|
||||
|
||||
private function removeWatch( $row ) {
|
||||
if( !$this->dryrun ) {
|
||||
if( !$this->dryrun && $this->hasOption( 'fix' ) ) {
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
$dbw->delete( 'watchlist', array(
|
||||
'wl_user' => $row->wl_user,
|
||||
|
|
@ -60,6 +75,9 @@ class WatchlistCleanup extends TableCleanup {
|
|||
'wl_title' => $row->wl_title ),
|
||||
__METHOD__ );
|
||||
$this->output( "- removed\n" );
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue