wiki.techinc.nl/maintenance/findMissingActors.php
Tim Starling d6727856fd Support new block schema
Support migration stages when reading and writing blocks.

I tried to set it up for an easy next stage, in which support for the
old schema is removed. I tried to avoid factoring out of shared code
between the two schemas, so that the old schema cases can simply be
deleted without the need to revert unnecessary abstractions.

However, I added HideUserUtils to factor out ipb_deleted queries. Code
review showed that this was already quite complex, with multiple
approaches to the problem, so it benefits from refactoring even without
the schema abstraction.

HideUserUtils is a service rather than a standalone class to support
unit tests, since unit tests do not allow global config access. When
the migration stage config is removed, it will be a service with no
constructor parameters -- an unnecessary abstraction which should
ideally be resolved at that time.

When interpreting result rows, it is possible to share code by using
field aliases. But when constructing WHERE conditions, the actual field
names need to be used, so the migration is more intrusive in
ApiQueryBlocks and SpecialBlockList, where complex conditions are used.

Bug: T346293
Bug: T51504
Bug: T349883
Change-Id: I408acf7a57b0100fe18c455fc13141277a598925
2023-11-29 13:31:42 +11:00

320 lines
9.6 KiB
PHP

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
use MediaWiki\MainConfigNames;
use MediaWiki\User\ActorNormalization;
use MediaWiki\User\UserFactory;
use MediaWiki\User\UserNameUtils;
use MediaWiki\User\UserRigorOptions;
use Wikimedia\Rdbms\LBFactory;
use Wikimedia\Rdbms\LoadBalancer;
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script for finding and replacing invalid actor IDs, see T261325 and T307738.
*
* @ingroup Maintenance
*/
class FindMissingActors extends Maintenance {
/**
* @var UserFactory|null
*/
private $userFactory;
/**
* @var UserNameUtils|null
*/
private $userNameUtils;
/**
* @var LoadBalancer|null
*/
private $loadBalancer;
/**
* @var LBFactory
*/
private $lbFactory;
/**
* @var ActorNormalization
*/
private $actorNormalization;
/** @var array|null */
private $tables;
public function __construct() {
parent::__construct();
$this->addDescription( 'Find and fix invalid actor IDs.' );
$this->addOption( 'field', 'The name of a database field to process',
true, true );
$this->addOption( 'type', 'Which type of invalid actors to find or fix, '
. 'missing or broken (with empty actor_name which can\'t be associated '
. 'with an existing user).',
false, true );
$this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
false, true );
$this->addOption( 'overwrite-with', 'Replace invalid actors with this user. '
. 'Typically, this would be "Unknown user", but it could be any reserved '
. 'system user (per $wgReservedUsernames) or locally registered user. '
. 'If not given, invalid actors will only be listed, not fixed. '
. 'You will be prompted for confirmation before data is written. ',
false, true );
$this->setBatchSize( 1000 );
}
public function initializeServices(
?UserFactory $userFactory = null,
?UserNameUtils $userNameUtils = null,
?LoadBalancer $loadBalancer = null,
?LBFactory $lbFactory = null,
?ActorNormalization $actorNormalization = null
) {
$services = $this->getServiceContainer();
$this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
$this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
$this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
$this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
$this->actorNormalization = $actorNormalization ?? $this->actorNormalization ??
$services->getActorNormalization();
}
/**
* @return array
*/
private function getTables() {
if ( !$this->tables ) {
$tables = [
'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
'img_actor' => [ 'image', 'img_actor', 'img_name' ],
'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ],
];
$stage = $this->getServiceContainer()->getMainConfig()
->get( MainConfigNames::BlockTargetMigrationStage );
if ( $stage & SCHEMA_COMPAT_WRITE_OLD ) {
$tables['ipb_by_actor'] = [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ]; // no index on ipb_by_actor!
}
if ( $stage & SCHEMA_COMPAT_WRITE_NEW ) {
$tables['bl_by_actor'] = [ 'block', 'bl_by_actor', 'bl_id' ]; // no index on bl_by_actor!
}
$this->tables = $tables;
}
return $this->tables;
}
/**
* @param string $field
* @return array|null
*/
private function getTableInfo( $field ) {
$tables = $this->getTables();
return $tables[$field] ?? null;
}
/**
* Returns the actor ID of the user specified with the --overwrite-with option,
* or null if --overwrite-with is not set.
*
* Existing users and reserved system users are supported.
* If the user does not have an actor ID yet, one will be assigned.
*
* @return int|null
*/
private function getNewActorId() {
$name = $this->getOption( 'overwrite-with' );
if ( $name === null ) {
return null;
}
$user = $this->userFactory->newFromName( $name );
if ( !$user ) {
$this->fatalError( "Not a valid user name: '$name'" );
}
$name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE );
if ( $user->isRegistered() ) {
$this->output( "Using existing user: '$user'\n" );
} elseif ( !$this->userNameUtils->isValid( $name ) ) {
$this->fatalError( "Not a valid user name: '$name'" );
} elseif ( !$this->userNameUtils->isUsable( $name ) ) {
$this->output( "Using system user: '$name'\n" );
} else {
$this->fatalError( "Unknown user: '$name'" );
}
$dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
$actorId = $this->actorNormalization->acquireActorId( $user, $dbw );
if ( !$actorId ) {
$this->fatalError( "Failed to acquire an actor ID for user '$user'" );
}
$this->output( "Replacement actor ID is $actorId.\n" );
return $actorId;
}
public function execute() {
$this->initializeServices();
$field = $this->getOption( 'field' );
if ( !$this->getTableInfo( $field ) ) {
$this->fatalError( "Unknown field: $field.\n" );
}
$type = $this->getOption( 'type', 'missing' );
if ( $type !== 'missing' && $type !== 'broken' ) {
$this->fatalError( "Unknown type: $type.\n" );
}
$skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
$overwrite = $this->getNewActorId();
$bad = $this->findBadActors( $field, $type, $skip );
if ( $bad && $overwrite ) {
$this->output( "\n" );
$this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
$this->output( "Information about the invalid IDs will be lost!\n" );
$this->output( "\n" );
$confirm = self::readconsole( 'Type "yes" to continue: ' );
if ( $confirm === 'yes' ) {
$this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
} else {
$this->fatalError( 'Aborted.' );
}
}
$this->output( "Done.\n" );
}
/**
* Find rows that have bad actor IDs.
*
* @param string $field the database field in which to detect bad actor IDs.
* @param string $type type of bad actors, missing or broken.
* @param int[] $skip bad actor IDs not to replace.
*
* @return array a list of row IDs, identifying rows in which the actor ID needs to be replaced.
*/
private function findBadActors( $field, $type, $skip ) {
[ $table, $actorField, $idField ] = $this->getTableInfo( $field );
$this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA, 'vslow' );
/*
We are building an SQL query like this one here, performing a left join
to detect rows in $table that lack a matching row in the actor table.
In this example, $field is 'log_actor', so $table is 'logging',
$actorField is 'log_actor', and $idField is 'log_id'.
Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
SELECT log_id
FROM logging
LEFT JOIN actor ON log_actor = actor_id
WHERE actor_id IS NULL
AND log_actor NOT IN (1, 2, 3, 4)
LIMIT 1000;
*/
$queryBuilder = $dbr->newSelectQueryBuilder()
->select( [ $actorField, $idField ] )
->from( $table )
->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
->where( $type == 'missing' ? [ 'actor_id' => null ] : [ 'actor_name' => '' ] )
->limit( $this->getBatchSize() );
if ( $skip ) {
$queryBuilder->andWhere( $dbr->expr( $actorField, '!=', $skip ) );
}
$res = $queryBuilder->caller( __METHOD__ )->fetchResultSet();
$count = $res->numRows();
$bad = [];
if ( $count ) {
$this->output( "\t\tID\tACTOR\n" );
}
foreach ( $res as $row ) {
$id = $row->$idField;
$actor = (int)( $row->$actorField );
$bad[$id] = $actor;
$this->output( "\t\t$id\t$actor\n" );
}
$this->output( "\tFound $count invalid actor IDs.\n" );
if ( $count >= $this->getBatchSize() ) {
$this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
}
return $bad;
}
/**
* Overwrite the actor ID in a given set of rows.
*
* @param string $field the database field in which to replace IDs.
* @param array $ids The row IDs of the rows in which the actor ID should be replaced
* @param int $overwrite The actor ID to write to the rows identified by $ids.
*
* @return int
*/
private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
[ $table, $actorField, $idField ] = $this->getTableInfo( $field );
$count = count( $ids );
$this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
$dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
$dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
$count = $dbw->affectedRows();
$this->lbFactory->waitForReplication();
$this->output( "\tUpdated $count rows.\n" );
return $count;
}
}
$maintClass = FindMissingActors::class;
require_once RUN_MAINTENANCE_IF_MAIN;