2015-01-17 01:02:10 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
*/
|
2022-01-06 18:44:56 +00:00
|
|
|
|
2023-11-21 21:08:14 +00:00
|
|
|
use MediaWiki\Deferred\SiteStatsUpdate;
|
2023-05-06 20:01:10 +00:00
|
|
|
use MediaWiki\HookContainer\HookRunner;
|
2022-04-10 15:34:45 +00:00
|
|
|
use MediaWiki\MainConfigNames;
|
2016-09-15 21:40:00 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2023-09-15 09:32:18 +00:00
|
|
|
use MediaWiki\SpecialPage\SpecialPage;
|
2023-03-01 20:33:26 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2015-01-17 01:02:10 +00:00
|
|
|
|
|
|
|
|
/**
|
2022-09-23 15:10:58 +00:00
|
|
|
* Puurge expired rows from the recentchanges table.
|
2015-01-17 01:02:10 +00:00
|
|
|
*
|
|
|
|
|
* @ingroup JobQueue
|
|
|
|
|
* @since 1.25
|
|
|
|
|
*/
|
|
|
|
|
class RecentChangesUpdateJob extends Job {
|
2019-11-30 23:03:59 +00:00
|
|
|
public function __construct( Title $title, array $params ) {
|
2015-01-17 01:02:10 +00:00
|
|
|
parent::__construct( 'recentChangesUpdate', $title, $params );
|
|
|
|
|
|
|
|
|
|
if ( !isset( $params['type'] ) ) {
|
2024-02-08 23:12:50 +00:00
|
|
|
throw new InvalidArgumentException( "Missing 'type' parameter." );
|
2015-01-17 01:02:10 +00:00
|
|
|
}
|
|
|
|
|
|
2018-04-16 20:38:01 +00:00
|
|
|
$this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
|
2015-01-17 01:02:10 +00:00
|
|
|
$this->removeDuplicates = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @return RecentChangesUpdateJob
|
|
|
|
|
*/
|
|
|
|
|
final public static function newPurgeJob() {
|
|
|
|
|
return new self(
|
2016-02-17 09:09:32 +00:00
|
|
|
SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
|
2015-01-17 01:02:10 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-08 20:41:10 +00:00
|
|
|
/**
|
|
|
|
|
* @return RecentChangesUpdateJob
|
|
|
|
|
* @since 1.26
|
|
|
|
|
*/
|
|
|
|
|
final public static function newCacheUpdateJob() {
|
|
|
|
|
return new self(
|
2016-02-17 09:09:32 +00:00
|
|
|
SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
|
2015-04-08 20:41:10 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2015-01-17 01:02:10 +00:00
|
|
|
public function run() {
|
|
|
|
|
if ( $this->params['type'] === 'purge' ) {
|
|
|
|
|
$this->purgeExpiredRows();
|
2015-04-08 20:41:10 +00:00
|
|
|
} elseif ( $this->params['type'] === 'cacheUpdate' ) {
|
|
|
|
|
$this->updateActiveUsers();
|
2015-01-17 01:02:10 +00:00
|
|
|
} else {
|
2015-04-08 20:41:10 +00:00
|
|
|
throw new InvalidArgumentException(
|
|
|
|
|
"Invalid 'type' parameter '{$this->params['type']}'." );
|
2015-01-17 01:02:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function purgeExpiredRows() {
|
2023-05-06 20:01:10 +00:00
|
|
|
$services = MediaWikiServices::getInstance();
|
|
|
|
|
$rcMaxAge = $services->getMainConfig()->get(
|
2022-04-10 15:34:45 +00:00
|
|
|
MainConfigNames::RCMaxAge );
|
2023-05-06 20:01:10 +00:00
|
|
|
$updateRowsPerQuery = $services->getMainConfig()->get(
|
2022-04-10 15:34:45 +00:00
|
|
|
MainConfigNames::UpdateRowsPerQuery );
|
2024-01-22 21:32:48 +00:00
|
|
|
$dbProvider = $services->getConnectionProvider();
|
|
|
|
|
$dbw = $dbProvider->getPrimaryDatabase();
|
2018-10-16 02:18:16 +00:00
|
|
|
$lockKey = $dbw->getDomainID() . ':recentchanges-prune';
|
2018-02-15 22:30:26 +00:00
|
|
|
if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
|
|
|
|
|
// already in progress
|
|
|
|
|
return;
|
2015-01-17 01:02:10 +00:00
|
|
|
}
|
2024-01-22 21:32:48 +00:00
|
|
|
$ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
|
2023-05-06 20:01:10 +00:00
|
|
|
$hookRunner = new HookRunner( $services->getHookContainer() );
|
2022-01-06 18:44:56 +00:00
|
|
|
$cutoff = $dbw->timestamp( time() - $rcMaxAge );
|
2017-10-06 17:03:55 +00:00
|
|
|
$rcQuery = RecentChange::getQueryInfo();
|
2015-01-17 01:02:10 +00:00
|
|
|
do {
|
2017-05-01 12:06:56 +00:00
|
|
|
$rcIds = [];
|
|
|
|
|
$rows = [];
|
2024-05-04 10:30:43 +00:00
|
|
|
$res = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->queryInfo( $rcQuery )
|
|
|
|
|
->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
|
|
|
|
|
->limit( $updateRowsPerQuery )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->fetchResultSet();
|
2017-05-01 12:06:56 +00:00
|
|
|
foreach ( $res as $row ) {
|
|
|
|
|
$rcIds[] = $row->rc_id;
|
|
|
|
|
$rows[] = $row;
|
|
|
|
|
}
|
2015-01-17 01:02:10 +00:00
|
|
|
if ( $rcIds ) {
|
2023-06-20 19:31:41 +00:00
|
|
|
$dbw->newDeleteQueryBuilder()
|
In query builders, use insertInto() and deleteFrom() instead of insert() and delete()
The design principle for SelectQueryBuilder was to make the chained
builder calls look as much like SQL as possible, so that developers
could leverage their knowledge of SQL to understand what the query
builder is doing.
That's why SelectQueryBuilder::select() takes a list of fields, and by
the same principle, it makes sense for UpdateQueryBuilder::update() to
take a table. However with "insert" and "delete", the SQL designers
chose to add prepositions "into" and "from", and I think it makes sense
to follow that here.
In terms of natural language, we update a table, but we don't delete a
table, or insert a table. We delete rows from a table, or insert rows
into a table. The table is not the object of the verb.
So, add insertInto() as an alias for insert(), and add deleteFrom() as
an alias for delete(). Use the new methods in MW core callers where
PHPStorm knows the type.
Change-Id: Idb327a54a57a0fb2288ea067472c1e9727016000
2023-09-08 00:06:59 +00:00
|
|
|
->deleteFrom( 'recentchanges' )
|
2023-06-20 19:31:41 +00:00
|
|
|
->where( [ 'rc_id' => $rcIds ] )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2023-05-06 20:01:10 +00:00
|
|
|
$hookRunner->onRecentChangesPurgeRows( $rows );
|
2016-09-05 20:21:26 +00:00
|
|
|
// There might be more, so try waiting for replica DBs
|
2024-01-22 21:32:48 +00:00
|
|
|
if ( !$dbProvider->commitAndWaitForReplication(
|
2018-08-16 07:01:55 +00:00
|
|
|
__METHOD__, $ticket, [ 'timeout' => 3 ]
|
|
|
|
|
) ) {
|
2015-02-22 21:08:48 +00:00
|
|
|
// Another job will continue anyway
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-01-17 01:02:10 +00:00
|
|
|
} while ( $rcIds );
|
|
|
|
|
|
2015-02-11 17:54:34 +00:00
|
|
|
$dbw->unlock( $lockKey, __METHOD__ );
|
2015-01-17 01:02:10 +00:00
|
|
|
}
|
2015-04-08 20:41:10 +00:00
|
|
|
|
|
|
|
|
protected function updateActiveUsers() {
|
2022-04-10 15:34:45 +00:00
|
|
|
$activeUserDays = MediaWikiServices::getInstance()->getMainConfig()->get(
|
|
|
|
|
MainConfigNames::ActiveUserDays );
|
2015-04-08 20:41:10 +00:00
|
|
|
|
|
|
|
|
// Users that made edits at least this many days ago are "active"
|
2022-01-06 18:44:56 +00:00
|
|
|
$days = $activeUserDays;
|
2015-04-08 20:41:10 +00:00
|
|
|
// Pull in the full window of active users in this update
|
2022-01-06 18:44:56 +00:00
|
|
|
$window = $activeUserDays * 86400;
|
2015-04-08 20:41:10 +00:00
|
|
|
|
2024-01-22 21:32:48 +00:00
|
|
|
$dbProvider = MediaWikiServices::getInstance()->getConnectionProvider();
|
|
|
|
|
$dbw = $dbProvider->getPrimaryDatabase();
|
|
|
|
|
$ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
|
2015-04-08 20:41:10 +00:00
|
|
|
|
2018-10-16 02:18:16 +00:00
|
|
|
$lockKey = $dbw->getDomainID() . '-activeusers';
|
2018-04-16 20:38:01 +00:00
|
|
|
if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
|
|
|
|
|
// Exclusive update (avoids duplicate entries)… it's usually fine to just
|
|
|
|
|
// drop out here, if the Job is already running.
|
|
|
|
|
return;
|
|
|
|
|
}
|
2015-04-08 20:41:10 +00:00
|
|
|
|
2018-04-16 20:38:01 +00:00
|
|
|
// Long-running queries expected
|
|
|
|
|
$dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
|
2016-09-15 21:40:00 +00:00
|
|
|
|
2018-04-16 20:38:01 +00:00
|
|
|
$nowUnix = time();
|
|
|
|
|
// Get the last-updated timestamp for the cache
|
2023-04-14 13:08:18 +00:00
|
|
|
$cTime = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( 'qci_timestamp' )
|
|
|
|
|
->from( 'querycache_info' )
|
|
|
|
|
->where( [ 'qci_type' => 'activeusers' ] )
|
|
|
|
|
->caller( __METHOD__ )->fetchField();
|
2021-10-16 21:47:01 +00:00
|
|
|
$cTimeUnix = $cTime ? (int)wfTimestamp( TS_UNIX, $cTime ) : 1;
|
2018-04-16 20:38:01 +00:00
|
|
|
|
|
|
|
|
// Pick the date range to fetch from. This is normally from the last
|
2021-11-19 23:19:42 +00:00
|
|
|
// update to till the present time, but has a limited window.
|
2018-04-16 20:38:01 +00:00
|
|
|
// If the window is limited, multiple runs are need to fully populate it.
|
|
|
|
|
$sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
|
|
|
|
|
$eTimestamp = min( $sTimestamp + $window, $nowUnix );
|
|
|
|
|
|
|
|
|
|
// Get all the users active since the last update
|
2023-04-14 13:08:18 +00:00
|
|
|
$res = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
|
|
|
|
|
->from( 'recentchanges' )
|
|
|
|
|
->join( 'actor', null, 'actor_id=rc_actor' )
|
|
|
|
|
->where( [
|
2023-10-21 11:32:26 +00:00
|
|
|
$dbw->expr( 'actor_user', '!=', null ), // actual accounts
|
|
|
|
|
$dbw->expr( 'rc_type', '!=', RC_EXTERNAL ), // no wikidata
|
|
|
|
|
$dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ),
|
2023-10-21 22:01:51 +00:00
|
|
|
$dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ),
|
|
|
|
|
$dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ),
|
2023-04-14 13:08:18 +00:00
|
|
|
] )
|
|
|
|
|
->groupBy( 'actor_name' )
|
|
|
|
|
->orderBy( 'NULL' ) // avoid filesort
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
|
|
|
|
|
2018-04-16 20:38:01 +00:00
|
|
|
$names = [];
|
|
|
|
|
foreach ( $res as $row ) {
|
2021-04-19 00:57:23 +00:00
|
|
|
$names[$row->actor_name] = $row->lastedittime;
|
2018-04-16 20:38:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find which of the recently active users are already accounted for
|
|
|
|
|
if ( count( $names ) ) {
|
2023-04-14 13:08:18 +00:00
|
|
|
$res = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'user_name' => 'qcc_title' ] )
|
|
|
|
|
->from( 'querycachetwo' )
|
|
|
|
|
->where( [
|
2018-04-16 20:38:01 +00:00
|
|
|
'qcc_type' => 'activeusers',
|
|
|
|
|
'qcc_namespace' => NS_USER,
|
2019-12-16 21:01:11 +00:00
|
|
|
'qcc_title' => array_map( 'strval', array_keys( $names ) ),
|
2023-10-21 22:01:51 +00:00
|
|
|
$dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ),
|
2023-04-14 13:08:18 +00:00
|
|
|
] )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2018-04-16 20:38:01 +00:00
|
|
|
// Note: In order for this to be actually consistent, we would need
|
|
|
|
|
// to update these rows with the new lastedittime.
|
|
|
|
|
foreach ( $res as $row ) {
|
|
|
|
|
unset( $names[$row->user_name] );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Insert the users that need to be added to the list
|
|
|
|
|
if ( count( $names ) ) {
|
|
|
|
|
$newRows = [];
|
|
|
|
|
foreach ( $names as $name => $lastEditTime ) {
|
|
|
|
|
$newRows[] = [
|
|
|
|
|
'qcc_type' => 'activeusers',
|
|
|
|
|
'qcc_namespace' => NS_USER,
|
|
|
|
|
'qcc_title' => $name,
|
2021-10-16 21:47:01 +00:00
|
|
|
'qcc_value' => (int)wfTimestamp( TS_UNIX, $lastEditTime ),
|
2018-04-16 20:38:01 +00:00
|
|
|
'qcc_namespacetwo' => 0, // unused
|
|
|
|
|
'qcc_titletwo' => '' // unused
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
|
2023-12-21 15:10:59 +00:00
|
|
|
$dbw->newInsertQueryBuilder()
|
|
|
|
|
->insertInto( 'querycachetwo' )
|
|
|
|
|
->rows( $rowBatch )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2024-01-22 21:32:48 +00:00
|
|
|
$dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
|
2018-04-16 20:38:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If a transaction was already started, it might have an old
|
|
|
|
|
// snapshot, so kludge the timestamp range back as needed.
|
|
|
|
|
$asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
|
|
|
|
|
|
|
|
|
|
// Touch the data freshness timestamp
|
2023-09-07 16:21:48 +00:00
|
|
|
$dbw->newReplaceQueryBuilder()
|
|
|
|
|
->replaceInto( 'querycache_info' )
|
2024-04-01 21:57:03 +00:00
|
|
|
->row( [
|
2023-09-07 16:21:48 +00:00
|
|
|
'qci_type' => 'activeusers',
|
2023-09-27 13:22:44 +00:00
|
|
|
'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now
|
2023-09-07 16:21:48 +00:00
|
|
|
] )
|
|
|
|
|
->uniqueIndexFields( [ 'qci_type' ] )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2018-04-16 20:38:01 +00:00
|
|
|
|
|
|
|
|
// Rotate out users that have not edited in too long (according to old data set)
|
2023-06-20 19:31:41 +00:00
|
|
|
$dbw->newDeleteQueryBuilder()
|
In query builders, use insertInto() and deleteFrom() instead of insert() and delete()
The design principle for SelectQueryBuilder was to make the chained
builder calls look as much like SQL as possible, so that developers
could leverage their knowledge of SQL to understand what the query
builder is doing.
That's why SelectQueryBuilder::select() takes a list of fields, and by
the same principle, it makes sense for UpdateQueryBuilder::update() to
take a table. However with "insert" and "delete", the SQL designers
chose to add prepositions "into" and "from", and I think it makes sense
to follow that here.
In terms of natural language, we update a table, but we don't delete a
table, or insert a table. We delete rows from a table, or insert rows
into a table. The table is not the object of the verb.
So, add insertInto() as an alias for insert(), and add deleteFrom() as
an alias for delete(). Use the new methods in MW core callers where
PHPStorm knows the type.
Change-Id: Idb327a54a57a0fb2288ea067472c1e9727016000
2023-09-08 00:06:59 +00:00
|
|
|
->deleteFrom( 'querycachetwo' )
|
2023-06-20 19:31:41 +00:00
|
|
|
->where( [
|
2018-04-16 20:38:01 +00:00
|
|
|
'qcc_type' => 'activeusers',
|
2023-10-21 22:01:51 +00:00
|
|
|
$dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS_UNIX
|
2023-06-20 19:31:41 +00:00
|
|
|
] )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2021-04-01 19:40:17 +00:00
|
|
|
|
2023-04-02 19:57:13 +00:00
|
|
|
if ( !MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
|
|
|
|
|
SiteStatsUpdate::cacheUpdate( $dbw );
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-01 19:40:17 +00:00
|
|
|
$dbw->unlock( $lockKey, __METHOD__ );
|
2015-04-08 20:41:10 +00:00
|
|
|
}
|
2015-01-17 01:02:10 +00:00
|
|
|
}
|