Fixes for TitleCleanup subclasses:

* (r56711) Don't use unbufferred queries unless someone is holding a gun to your head, they cause no end of trouble. Use batched queries instead. Should fix the bug reported on bug 20741 comment 13. 
* Fixed a bug in cleanupTitles.php which caused it to fail to convert spaces in page_title to underscores, possibly caused by r6355.
* Made cleanupWatchlist.php respect "--fix" as documented
* Made runTable accept a parameter array instead of an ever-growing formal parameter list
* Renamed processPage() to more accurate processRow(). Removed abstract function definition since the name of the function can be overridden.
* Made a few things public instead of protected for easier testing via eval.php
* Fixed missing newlines in progress messages in cleanupCaps.php
This commit is contained in:
Tim Starling 2009-09-24 04:19:25 +00:00
parent 87469c3e1d
commit 77086dc2ef
5 changed files with 121 additions and 49 deletions

View file

@ -45,12 +45,14 @@ class CapsCleanup extends TableCleanup {
if( $wgCapitalLinks )
$this->error( "\$wgCapitalLinks is on -- no need for caps links cleanup.", true );
$this->runTable( $this->targetTable,
'WHERE page_namespace=' . $this->namespace,
array( &$this, 'processPage' ) );
$this->runTable( array(
'table' => 'page',
'conds' => array( 'page_namespace' => $this->namespace ),
'index' => 'page_id',
'callback' => 'processRow' ) );
}
protected function processPage( $row ) {
protected function processRow( $row ) {
global $wgContLang;
$current = Title::makeTitle( $row->page_namespace, $row->page_title );
@ -58,23 +60,23 @@ class CapsCleanup extends TableCleanup {
$upper = $row->page_title;
$lower = $wgContLang->lcfirst( $row->page_title );
if( $upper == $lower ) {
$this->output( "\"$display\" already lowercase." );
$this->output( "\"$display\" already lowercase.\n" );
return $this->progress( 0 );
}
$target = Title::makeTitle( $row->page_namespace, $lower );
$targetDisplay = $target->getPrefixedText();
if( $target->exists() ) {
$this->output( "\"$display\" skipped; \"$targetDisplay\" already exists" );
$this->output( "\"$display\" skipped; \"$targetDisplay\" already exists\n" );
return $this->progress( 0 );
}
if( $this->dryrun ) {
$this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED" );
$this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED\n" );
$ok = true;
} else {
$ok = $current->moveTo( $target, false, 'Converting page titles to lowercase' );
$this->output( "\"$display\" -> \"$targetDisplay\": $ok" );
$this->output( "\"$display\" -> \"$targetDisplay\": $ok\n" );
}
if( $ok === true ) {
$this->progress( 1 );
@ -82,7 +84,7 @@ class CapsCleanup extends TableCleanup {
$talk = $target->getTalkPage();
$row->page_namespace = $talk->getNamespace();
if( $talk->exists() ) {
return $this->processPage( $row );
return $this->processRow( $row );
}
}
} else {

View file

@ -31,13 +31,19 @@
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
class ImageCleanup extends TableCleanup {
protected $targetTable = 'image';
protected $defaultParams = array(
'table' => 'image',
'conds' => array(),
'index' => 'img_name',
'callback' => 'processRow',
);
public function __construct() {
parent::__construct();
$this->mDescription = "Script to clean up broken, unparseable upload filenames";
}
protected function processPage( $row ) {
protected function processRow( $row ) {
global $wgContLang;
$source = $row->img_name;

View file

@ -23,10 +23,18 @@
require_once( dirname(__FILE__) . '/Maintenance.php' );
abstract class TableCleanup extends Maintenance {
protected $targetTable = 'page';
class TableCleanup extends Maintenance {
protected $defaultParams = array(
'table' => 'page',
'conds' => array(),
'index' => 'page_id',
'callback' => 'processRow',
);
protected $dryrun = false;
protected $maxLag = 10; # if slaves are lagged more than 10 secs, wait
public $batchSize = 100;
public $reportInterval = 100;
public function __construct() {
parent::__construct();
@ -42,9 +50,7 @@ abstract class TableCleanup extends Maintenance {
} else {
$this->output( "Checking and fixing bad titles...\n" );
}
$this->runTable( $this->targetTable,
'', //'WHERE page_namespace=0',
array( $this, 'processPage' ) );
$this->runTable( $this->defaultParams );
}
protected function init( $count, $table ) {
@ -58,7 +64,7 @@ abstract class TableCleanup extends Maintenance {
protected function progress( $updated ) {
$this->updated += $updated;
$this->processed++;
if( $this->processed % 100 != 0 ) {
if( $this->processed % $this->reportInterval != 0 ) {
return;
}
$portion = $this->processed / $this->count;
@ -85,34 +91,73 @@ abstract class TableCleanup extends Maintenance {
flush();
}
protected function runTable( $table, $where, $callback ) {
public function runTable( $params ) {
$dbr = wfGetDB( DB_SLAVE );
$count = $dbr->selectField( $table, 'count(*)', '', __METHOD__ );
$this->init( $count, $table );
$this->output( "Processing $table..." );
// Unbuffered queries, avoids OOM
$dbr->bufferResults( false );
$tableName = $dbr->tableName( $table );
$sql = "SELECT * FROM $tableName $where";
$result = $dbr->query( $sql, __METHOD__ );
foreach( $result as $row ) {
call_user_func( $callback, $row );
if ( array_diff( array_keys( $params ),
array( 'table', 'conds', 'index', 'callback' ) ) )
{
throw new MWException( __METHOD__.': Missing parameter ' . implode( ', ', $params ) );
}
$table = $params['table'];
$count = $dbr->selectField( $table, 'count(*)', $params['conds'], __METHOD__ );
$this->init( $count, $table );
$this->output( "Processing $table...\n" );
$index = (array)$params['index'];
$indexConds = array();
$options = array(
'ORDER BY' => implode( ',', $index ),
'LIMIT' => $this->batchSize
);
$callback = array( $this, $params['callback'] );
while ( true ) {
$conds = array_merge( $params['conds'], $indexConds );
$res = $dbr->select( $table, '*', $conds, __METHOD__, $options );
if ( !$res->numRows() ) {
// Done
break;
}
foreach ( $res as $row ) {
call_user_func( $callback, $row );
}
if ( $res->numRows() < $this->batchSize ) {
// Done
break;
}
// Update the conditions to select the next batch.
// Construct a condition string by starting with the least significant part
// of the index, and adding more significant parts progressively to the left
// of the string.
$nextCond = '';
foreach ( array_reverse( $index ) as $field ) {
$encValue = $dbr->addQuotes( $row->$field );
if ( $nextCond === '' ) {
$nextCond = "$field > $encValue";
} else {
$nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))";
}
}
$indexConds = array( $nextCond );
}
$this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );
$result->free();
$dbr->bufferResults( true );
}
protected function hexChar( $matches ) {
return sprintf( "\\x%02x", ord( $matches[1] ) );
}
abstract protected function processPage( $row );
}
class TableCleanupTest extends TableCleanup {
function processRow( $row ) {
$this->progress( mt_rand( 0, 1 ) );
}
}

View file

@ -36,15 +36,16 @@ class TitleCleanup extends TableCleanup {
$this->mDescription = "Script to clean up broken, unparseable titles";
}
protected function processPage( $row ) {
$current = Title::makeTitle( $row->page_namespace, $row->page_title );
$display = $current->getPrefixedText();
protected function processRow( $row ) {
$display = Title::makeName( $row->page_namespace, $row->page_title );
$verified = UtfNormal::cleanUp( $display );
$title = Title::newFromText( $verified );
if( !is_null( $title ) && $title->equals( $current ) && $title->canExist() ) {
if( !is_null( $title )
&& $title->canExist()
&& $title->getNamespace() == $row->page_namespace
&& $title->getDBkey() === $row->page_title )
{
return $this->progress( 0 ); // all is fine
}

View file

@ -31,13 +31,27 @@
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
class WatchlistCleanup extends TableCleanup {
protected $targetTable = 'watchlist';
protected $defaultParams = array(
'table' => 'watchlist',
'index' => array( 'wl_user', 'wl_namespace', 'wl_title' ),
'conds' => array(),
'callback' => 'processRow'
);
public function __construct() {
parent::__construct();
$this->mDescription = "Script to remove broken, unparseable titles in the Watchlist";
$this->addOption( 'fix', 'Actually remove entries; without will only report.' );
}
protected function processPage( $row ) {
function execute() {
if ( !$this->hasOption( 'fix' ) ) {
$this->output( "Dry run only: use --fix to enable updates\n" );
}
parent::execute();
}
protected function processRow( $row ) {
$current = Title::makeTitle( $row->wl_namespace, $row->wl_title );
$display = $current->getPrefixedText();
$verified = UtfNormal::cleanUp( $display );
@ -45,14 +59,15 @@ class WatchlistCleanup extends TableCleanup {
if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) {
$this->output( "invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")\n" );
$this->removeWatch( $row );
return $this->progress( 1 );
$updated = $this->removeWatch( $row );
$this->progress( $updated );
return;
}
$this->progress( 0 );
}
private function removeWatch( $row ) {
if( !$this->dryrun ) {
if( !$this->dryrun && $this->hasOption( 'fix' ) ) {
$dbw = wfGetDB( DB_MASTER );
$dbw->delete( 'watchlist', array(
'wl_user' => $row->wl_user,
@ -60,6 +75,9 @@ class WatchlistCleanup extends TableCleanup {
'wl_title' => $row->wl_title ),
__METHOD__ );
$this->output( "- removed\n" );
return 1;
} else {
return 0;
}
}
}