2006-01-04 12:33:45 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* Remove unused user accounts from the database
|
|
|
|
|
* An unused account is one which has made no edits
|
|
|
|
|
*
|
2009-08-02 19:35:17 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2012-08-09 16:06:18 +00:00
|
|
|
* @file
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Maintenance
|
2006-01-04 12:33:45 +00:00
|
|
|
* @author Rob Church <robchur@gmail.com>
|
|
|
|
|
*/
|
2006-01-05 23:16:11 +00:00
|
|
|
|
Support new block schema
Support migration stages when reading and writing blocks.
I tried to set it up for an easy next stage, in which support for the
old schema is removed. I tried to avoid factoring out of shared code
between the two schemas, so that the old schema cases can simply be
deleted without the need to revert unnecessary abstractions.
However, I added HideUserUtils to factor out ipb_deleted queries. Code
review showed that this was already quite complex, with multiple
approaches to the problem, so it benefits from refactoring even without
the schema abstraction.
HideUserUtils is a service rather than a standalone class to support
unit tests, since unit tests do not allow global config access. When
the migration stage config is removed, it will be a service with no
constructor parameters -- an unnecessary abstraction which should
ideally be resolved at that time.
When interpreting result rows, it is possible to share code by using
field aliases. But when constructing WHERE conditions, the actual field
names need to be used, so the migration is more intrusive in
ApiQueryBlocks and SpecialBlockList, where complex conditions are used.
Bug: T346293
Bug: T51504
Bug: T349883
Change-Id: I408acf7a57b0100fe18c455fc13141277a598925
2023-10-27 03:34:10 +00:00
|
|
|
use MediaWiki\MainConfigNames;
|
2022-12-16 11:41:52 +00:00
|
|
|
use MediaWiki\User\ActorMigration;
|
2021-12-19 13:11:07 +00:00
|
|
|
use MediaWiki\User\UserIdentity;
|
2021-07-01 10:32:24 +00:00
|
|
|
|
2013-05-17 00:16:59 +00:00
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
2006-01-04 13:02:04 +00:00
|
|
|
|
2012-08-09 16:06:18 +00:00
|
|
|
/**
|
|
|
|
|
* Maintenance script that removes unused user accounts from the database.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
class RemoveUnusedAccounts extends Maintenance {
|
|
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->addOption( 'delete', 'Actually delete the account' );
|
|
|
|
|
$this->addOption( 'ignore-groups', 'List of comma-separated groups to exclude', false, true );
|
|
|
|
|
$this->addOption( 'ignore-touched', 'Skip accounts touched in last N days', false, true );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function execute() {
|
2023-08-31 09:21:12 +00:00
|
|
|
$services = $this->getServiceContainer();
|
2021-07-01 10:32:24 +00:00
|
|
|
$userFactory = $services->getUserFactory();
|
|
|
|
|
$userGroupManager = $services->getUserGroupManager();
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "Remove unused accounts\n\n" );
|
2010-12-04 03:20:14 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
# Do an initial scan for inactive accounts and report the result
|
|
|
|
|
$this->output( "Checking for unused user accounts...\n" );
|
2017-09-12 17:12:29 +00:00
|
|
|
$delUser = [];
|
|
|
|
|
$delActor = [];
|
2016-09-05 19:55:19 +00:00
|
|
|
$dbr = $this->getDB( DB_REPLICA );
|
2023-07-18 22:56:37 +00:00
|
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'user_id', 'user_name', 'user_touched', 'actor_id' ] )
|
|
|
|
|
->from( 'user' )
|
|
|
|
|
->leftJoin( 'actor', null, 'user_id = actor_user' )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( $this->hasOption( 'ignore-groups' ) ) {
|
|
|
|
|
$excludedGroups = explode( ',', $this->getOption( 'ignore-groups' ) );
|
|
|
|
|
} else {
|
2016-02-17 09:09:32 +00:00
|
|
|
$excludedGroups = [];
|
2009-06-24 02:02:37 +00:00
|
|
|
}
|
2009-08-02 19:35:17 +00:00
|
|
|
$touched = $this->getOption( 'ignore-touched', "1" );
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( !ctype_digit( $touched ) ) {
|
2017-11-20 00:36:54 +00:00
|
|
|
$this->fatalError( "Please put a valid positive integer on the --ignore-touched parameter." );
|
2009-08-02 19:35:17 +00:00
|
|
|
}
|
|
|
|
|
$touchedSeconds = 86400 * $touched;
|
2010-05-22 16:50:39 +00:00
|
|
|
foreach ( $res as $row ) {
|
2014-04-22 20:55:50 +00:00
|
|
|
# Check the account, but ignore it if it's within a $excludedGroups
|
|
|
|
|
# group or if it's touched within the $touchedSeconds seconds.
|
2021-07-01 10:32:24 +00:00
|
|
|
$instance = $userFactory->newFromId( $row->user_id );
|
|
|
|
|
if ( count(
|
|
|
|
|
array_intersect( $userGroupManager->getUserEffectiveGroups( $instance ), $excludedGroups ) ) == 0
|
2021-12-19 13:11:07 +00:00
|
|
|
&& $this->isInactiveAccount( $instance, $row->actor_id ?? null, true )
|
2021-07-01 10:32:24 +00:00
|
|
|
&& wfTimestamp( TS_UNIX, $row->user_touched ) < wfTimestamp( TS_UNIX, time() - $touchedSeconds
|
|
|
|
|
)
|
2014-04-23 18:09:26 +00:00
|
|
|
) {
|
2009-08-02 19:35:17 +00:00
|
|
|
# Inactive; print out the name and flag it
|
2017-09-12 17:12:29 +00:00
|
|
|
$delUser[] = $row->user_id;
|
2018-07-13 03:33:10 +00:00
|
|
|
if ( isset( $row->actor_id ) && $row->actor_id ) {
|
2017-09-12 17:12:29 +00:00
|
|
|
$delActor[] = $row->actor_id;
|
|
|
|
|
}
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( $row->user_name . "\n" );
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-09-12 17:12:29 +00:00
|
|
|
$count = count( $delUser );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "...found {$count}.\n" );
|
2010-12-04 03:20:14 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
# If required, go back and delete each marked account
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( $count > 0 && $this->hasOption( 'delete' ) ) {
|
2013-07-18 06:31:59 +00:00
|
|
|
$this->output( "\nDeleting unused accounts..." );
|
2021-04-29 02:37:11 +00:00
|
|
|
$dbw = $this->getDB( DB_PRIMARY );
|
2017-09-12 17:12:29 +00:00
|
|
|
$dbw->delete( 'user', [ 'user_id' => $delUser ], __METHOD__ );
|
2019-07-23 17:40:52 +00:00
|
|
|
# Keep actor rows referenced from ipblocks
|
Support new block schema
Support migration stages when reading and writing blocks.
I tried to set it up for an easy next stage, in which support for the
old schema is removed. I tried to avoid factoring out of shared code
between the two schemas, so that the old schema cases can simply be
deleted without the need to revert unnecessary abstractions.
However, I added HideUserUtils to factor out ipb_deleted queries. Code
review showed that this was already quite complex, with multiple
approaches to the problem, so it benefits from refactoring even without
the schema abstraction.
HideUserUtils is a service rather than a standalone class to support
unit tests, since unit tests do not allow global config access. When
the migration stage config is removed, it will be a service with no
constructor parameters -- an unnecessary abstraction which should
ideally be resolved at that time.
When interpreting result rows, it is possible to share code by using
field aliases. But when constructing WHERE conditions, the actual field
names need to be used, so the migration is more intrusive in
ApiQueryBlocks and SpecialBlockList, where complex conditions are used.
Bug: T346293
Bug: T51504
Bug: T349883
Change-Id: I408acf7a57b0100fe18c455fc13141277a598925
2023-10-27 03:34:10 +00:00
|
|
|
$stage = $this->getConfig()
|
|
|
|
|
->get( MainConfigNames::BlockTargetMigrationStage );
|
|
|
|
|
if ( $stage & SCHEMA_COMPAT_READ_OLD ) {
|
|
|
|
|
$keep = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( 'ipb_by_actor' )
|
|
|
|
|
->from( 'ipblocks' )
|
|
|
|
|
->where( [ 'ipb_by_actor' => $delActor ] )
|
|
|
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
|
|
|
|
} else {
|
|
|
|
|
$keep = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( 'bl_by_actor' )
|
|
|
|
|
->from( 'block' )
|
|
|
|
|
->where( [ 'bl_by_actor' => $delActor ] )
|
|
|
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
|
|
|
|
}
|
2019-07-23 17:40:52 +00:00
|
|
|
$del = array_diff( $delActor, $keep );
|
|
|
|
|
if ( $del ) {
|
|
|
|
|
$dbw->delete( 'actor', [ 'actor_id' => $del ], __METHOD__ );
|
|
|
|
|
}
|
|
|
|
|
if ( $keep ) {
|
2021-03-30 00:11:06 +00:00
|
|
|
$dbw->update( 'actor', [ 'actor_user' => null ], [ 'actor_id' => $keep ], __METHOD__ );
|
2017-09-12 17:12:29 +00:00
|
|
|
}
|
|
|
|
|
$dbw->delete( 'user_groups', [ 'ug_user' => $delUser ], __METHOD__ );
|
|
|
|
|
$dbw->delete( 'user_former_groups', [ 'ufg_user' => $delUser ], __METHOD__ );
|
|
|
|
|
$dbw->delete( 'user_properties', [ 'up_user' => $delUser ], __METHOD__ );
|
2019-07-23 17:40:52 +00:00
|
|
|
$dbw->delete( 'logging', [ 'log_actor' => $delActor ], __METHOD__ );
|
|
|
|
|
$dbw->delete( 'recentchanges', [ 'rc_actor' => $delActor ], __METHOD__ );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "done.\n" );
|
|
|
|
|
# Update the site_stats.ss_users field
|
2023-07-18 22:56:37 +00:00
|
|
|
$users = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( 'COUNT(*)' )
|
|
|
|
|
->from( 'user' )
|
|
|
|
|
->caller( __METHOD__ )->fetchField();
|
2014-04-22 20:55:50 +00:00
|
|
|
$dbw->update(
|
|
|
|
|
'site_stats',
|
2016-02-17 09:09:32 +00:00
|
|
|
[ 'ss_users' => $users ],
|
|
|
|
|
[ 'ss_row_id' => 1 ],
|
2014-04-22 20:55:50 +00:00
|
|
|
__METHOD__
|
|
|
|
|
);
|
2010-05-22 16:50:39 +00:00
|
|
|
} elseif ( $count > 0 ) {
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "\nRun the script again with --delete to remove them from the database.\n" );
|
|
|
|
|
}
|
|
|
|
|
$this->output( "\n" );
|
|
|
|
|
}
|
2010-12-04 03:20:14 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
/**
|
|
|
|
|
* Could the specified user account be deemed inactive?
|
|
|
|
|
* (No edits, no deleted edits, no log entries, no current/old uploads)
|
|
|
|
|
*
|
2021-12-19 13:11:07 +00:00
|
|
|
* @param UserIdentity $user
|
2018-07-13 03:33:10 +00:00
|
|
|
* @param int|null $actor User's actor ID
|
2021-09-02 23:10:56 +00:00
|
|
|
* @param bool $primary Perform checking on the primary DB
|
2009-08-02 19:35:17 +00:00
|
|
|
* @return bool
|
|
|
|
|
*/
|
2021-12-19 13:11:07 +00:00
|
|
|
private function isInactiveAccount( $user, $actor, $primary = false ) {
|
2021-03-30 00:11:06 +00:00
|
|
|
if ( $actor === null ) {
|
|
|
|
|
// There's no longer a way for a user to be active in any of
|
|
|
|
|
// these tables without having an actor ID. The only way to link
|
|
|
|
|
// to a user row is via an actor row.
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-02 23:10:56 +00:00
|
|
|
$dbo = $this->getDB( $primary ? DB_PRIMARY : DB_REPLICA );
|
2016-02-17 09:09:32 +00:00
|
|
|
$checks = [
|
2012-12-21 21:58:00 +00:00
|
|
|
'archive' => 'ar',
|
|
|
|
|
'image' => 'img',
|
|
|
|
|
'oldimage' => 'oi',
|
|
|
|
|
'filearchive' => 'fa'
|
2021-03-30 00:11:06 +00:00
|
|
|
// re-add when actor migration is complete
|
|
|
|
|
// 'revision' => 'rev'
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2009-08-02 19:35:17 +00:00
|
|
|
$count = 0;
|
2010-12-04 03:20:14 +00:00
|
|
|
|
2015-12-31 00:10:31 +00:00
|
|
|
$this->beginTransaction( $dbo, __METHOD__ );
|
2017-09-12 17:12:29 +00:00
|
|
|
foreach ( $checks as $table => $prefix ) {
|
|
|
|
|
$count += (int)$dbo->selectField(
|
2021-03-30 00:11:06 +00:00
|
|
|
$table,
|
2017-09-12 17:12:29 +00:00
|
|
|
'COUNT(*)',
|
2021-03-30 00:11:06 +00:00
|
|
|
[ "{$prefix}_actor" => $actor ],
|
|
|
|
|
__METHOD__
|
2017-09-12 17:12:29 +00:00
|
|
|
);
|
2009-08-02 19:35:17 +00:00
|
|
|
}
|
2012-12-21 21:58:00 +00:00
|
|
|
|
2021-03-30 00:11:06 +00:00
|
|
|
// Delete this special case when the actor migration is complete
|
|
|
|
|
$actorQuery = ActorMigration::newMigration()->getWhere( $dbo, 'rev_user', $user );
|
2017-09-12 17:12:29 +00:00
|
|
|
$count += (int)$dbo->selectField(
|
2021-03-30 00:11:06 +00:00
|
|
|
[ 'revision' ] + $actorQuery['tables'],
|
2017-09-12 17:12:29 +00:00
|
|
|
'COUNT(*)',
|
2021-03-30 00:11:06 +00:00
|
|
|
$actorQuery['conds'],
|
2017-09-12 17:12:29 +00:00
|
|
|
__METHOD__,
|
|
|
|
|
[],
|
|
|
|
|
$actorQuery['joins']
|
|
|
|
|
);
|
2012-12-21 21:58:00 +00:00
|
|
|
|
2023-07-18 22:56:37 +00:00
|
|
|
$count += (int)$dbo->newSelectQueryBuilder()
|
|
|
|
|
->select( 'COUNT(*)' )
|
|
|
|
|
->from( 'logging' )
|
|
|
|
|
->where( [ 'log_actor' => $actor, 'log_type != ' . $dbo->addQuotes( 'newusers' ) ] )
|
|
|
|
|
->caller( __METHOD__ )->fetchField();
|
2021-03-30 00:11:06 +00:00
|
|
|
|
2015-12-31 00:10:31 +00:00
|
|
|
$this->commitTransaction( $dbo, __METHOD__ );
|
2010-12-04 03:20:14 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
return $count == 0;
|
2006-01-04 12:33:45 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-05-05 01:38:22 +00:00
|
|
|
|
2018-01-13 00:02:09 +00:00
|
|
|
$maintClass = RemoveUnusedAccounts::class;
|
2013-05-07 23:00:15 +00:00
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|