2017-01-26 11:39:30 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* Refreshes category counts.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Maintenance script that refreshes category membership counts in the category
|
|
|
|
|
* table.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
|
|
|
|
class RecountCategories extends Maintenance {
|
2019-09-09 09:11:50 +00:00
|
|
|
/** @var int */
|
|
|
|
|
private $minimumId;
|
|
|
|
|
|
2017-01-26 11:39:30 +00:00
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->addDescription( <<<'TEXT'
|
|
|
|
|
This script refreshes the category membership counts stored in the category
|
|
|
|
|
table. As time passes, these counts often drift from the actual number of
|
|
|
|
|
category members. The script identifies rows where the value in the category
|
|
|
|
|
table does not match the number of categorylinks rows for that category, and
|
|
|
|
|
updates the category table accordingly.
|
|
|
|
|
|
|
|
|
|
To fully refresh the data in the category table, you need to run this script
|
2022-01-15 08:19:02 +00:00
|
|
|
for all three modes. Alternatively, just one mode can be run if required.
|
2017-01-26 11:39:30 +00:00
|
|
|
TEXT
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'mode',
|
2022-01-15 08:19:02 +00:00
|
|
|
'(REQUIRED) Which category count column to recompute: "pages", "subcats", "files" or "all".',
|
2017-01-26 11:39:30 +00:00
|
|
|
true,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'begin',
|
|
|
|
|
'Only recount categories with cat_id greater than the given value',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'throttle',
|
|
|
|
|
'Wait this many milliseconds after each batch. Default: 0',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
|
2022-01-24 06:57:58 +00:00
|
|
|
$this->addOption(
|
|
|
|
|
'skip-cleanup',
|
|
|
|
|
'Skip running cleanupEmptyCategories if the "page" mode is selected',
|
|
|
|
|
false,
|
|
|
|
|
false
|
|
|
|
|
);
|
|
|
|
|
|
2017-01-26 11:39:30 +00:00
|
|
|
$this->setBatchSize( 500 );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function execute() {
|
2022-01-23 12:12:04 +00:00
|
|
|
$originalMode = $this->getOption( 'mode' );
|
|
|
|
|
if ( !in_array( $originalMode, [ 'pages', 'subcats', 'files', 'all' ] ) ) {
|
2022-01-15 08:19:02 +00:00
|
|
|
$this->fatalError( 'Please specify a valid mode: one of "pages", "subcats", "files" or "all".' );
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-23 12:12:04 +00:00
|
|
|
if ( $originalMode === 'all' ) {
|
2022-01-15 08:19:02 +00:00
|
|
|
$modes = [ 'pages', 'subcats', 'files' ];
|
|
|
|
|
} else {
|
2022-01-23 12:12:04 +00:00
|
|
|
$modes = [ $originalMode ];
|
2017-01-26 11:39:30 +00:00
|
|
|
}
|
|
|
|
|
|
2022-01-15 08:19:02 +00:00
|
|
|
foreach ( $modes as $mode ) {
|
|
|
|
|
$this->output( "Starting to recount {$mode} counts.\n" );
|
|
|
|
|
$this->minimumId = intval( $this->getOption( 'begin', 0 ) );
|
2017-01-26 11:39:30 +00:00
|
|
|
|
2022-01-15 08:19:02 +00:00
|
|
|
// do the work, batch by batch
|
|
|
|
|
$affectedRows = 0;
|
|
|
|
|
while ( ( $result = $this->doWork( $mode ) ) !== false ) {
|
|
|
|
|
$affectedRows += $result;
|
|
|
|
|
usleep( $this->getOption( 'throttle', 0 ) * 1000 );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->output( "Updated the {$mode} counts of $affectedRows categories.\n" );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Finished
|
|
|
|
|
$this->output( "Done!\n" );
|
2022-01-23 12:12:04 +00:00
|
|
|
if ( $originalMode !== 'all' ) {
|
2022-01-15 08:19:02 +00:00
|
|
|
$this->output( "Now run the script using the other --mode options if you haven't already.\n" );
|
2017-01-26 11:39:30 +00:00
|
|
|
}
|
|
|
|
|
|
2022-01-15 08:19:02 +00:00
|
|
|
if ( in_array( 'pages', $modes ) ) {
|
2022-01-24 06:57:58 +00:00
|
|
|
if ( $this->hasOption( 'skip-cleanup' ) ) {
|
|
|
|
|
$this->output(
|
|
|
|
|
"Also run 'php cleanupEmptyCategories.php --mode remove' to remove empty,\n" .
|
|
|
|
|
"nonexistent categories from the category table.\n\n" );
|
|
|
|
|
} else {
|
|
|
|
|
$this->output( "Running cleanupEmptyCategories.php\n" );
|
|
|
|
|
$cleanup = $this->runChild( CleanupEmptyCategories::class );
|
|
|
|
|
'@phan-var CleanupEmptyCategories $cleanup';
|
|
|
|
|
// Pass no options into the child because of a parameter collision between "mode", which
|
|
|
|
|
// both scripts use but set to different values. We'll just use the defaults.
|
|
|
|
|
$cleanup->loadParamsAndArgs( $this->mSelf, [], [] );
|
|
|
|
|
// Force execution because we want to run it regardless of whether it's been run before.
|
|
|
|
|
$cleanup->setForce( true );
|
|
|
|
|
$cleanup->execute();
|
|
|
|
|
}
|
2017-01-26 11:39:30 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-15 08:19:02 +00:00
|
|
|
protected function doWork( $mode ) {
|
2017-11-05 08:09:51 +00:00
|
|
|
$this->output( "Finding up to {$this->getBatchSize()} drifted rows " .
|
2020-03-08 23:34:05 +00:00
|
|
|
"greater than cat_id {$this->minimumId}...\n" );
|
2017-01-26 11:39:30 +00:00
|
|
|
|
2023-07-25 13:37:41 +00:00
|
|
|
$dbr = $this->getDB( DB_REPLICA, 'vslow' );
|
|
|
|
|
$queryBuilder = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( 'COUNT(*)' )
|
|
|
|
|
->from( 'categorylinks' )
|
|
|
|
|
->where( 'cl_to = cat_title' );
|
2022-01-15 08:19:02 +00:00
|
|
|
if ( $mode === 'subcats' ) {
|
2023-07-25 13:37:41 +00:00
|
|
|
$queryBuilder->andWhere( [ 'cl_type' => 'subcat' ] );
|
2022-01-15 08:19:02 +00:00
|
|
|
} elseif ( $mode === 'files' ) {
|
2023-07-25 13:37:41 +00:00
|
|
|
$queryBuilder->andWhere( [ 'cl_type' => 'file' ] );
|
2017-01-26 11:39:30 +00:00
|
|
|
}
|
|
|
|
|
|
2023-07-25 13:37:41 +00:00
|
|
|
$countingSubquery = $queryBuilder->caller( __METHOD__ )->getSQL();
|
2017-01-26 11:39:30 +00:00
|
|
|
|
|
|
|
|
// First, let's find out which categories have drifted and need to be updated.
|
|
|
|
|
// The query counts the categorylinks for each category on the replica DB,
|
|
|
|
|
// but this data can't be used for updating the master, so we don't include it
|
|
|
|
|
// in the results.
|
2023-07-25 13:37:41 +00:00
|
|
|
$idsToUpdate = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( 'cat_id' )
|
|
|
|
|
->from( 'category' )
|
|
|
|
|
->where( [ 'cat_id > ' . (int)$this->minimumId, "cat_{$mode} != ($countingSubquery)" ] )
|
|
|
|
|
->limit( $this->getBatchSize() )
|
|
|
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
2017-01-26 11:39:30 +00:00
|
|
|
if ( !$idsToUpdate ) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2022-01-15 08:19:02 +00:00
|
|
|
$this->output( "Updating cat_{$mode} field on " .
|
2017-01-26 11:39:30 +00:00
|
|
|
count( $idsToUpdate ) . " rows...\n" );
|
|
|
|
|
|
|
|
|
|
// In the next batch, start where this query left off. The rows selected
|
|
|
|
|
// in this iteration shouldn't be selected again after being updated, but
|
|
|
|
|
// we still keep track of where we are up to, as extra protection against
|
|
|
|
|
// infinite loops.
|
|
|
|
|
$this->minimumId = end( $idsToUpdate );
|
|
|
|
|
|
|
|
|
|
// Now, on master, find the correct counts for these categories.
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getPrimaryDB();
|
2023-07-18 22:56:37 +00:00
|
|
|
$res = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'cat_id', 'count' => "($countingSubquery)" ] )
|
|
|
|
|
->from( 'category' )
|
|
|
|
|
->where( [ 'cat_id' => $idsToUpdate ] )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2017-01-26 11:39:30 +00:00
|
|
|
|
|
|
|
|
// Update the category counts on the rows we just identified.
|
|
|
|
|
// This logic is equivalent to Category::refreshCounts, except here, we
|
|
|
|
|
// don't remove rows when cat_pages is zero and the category description page
|
|
|
|
|
// doesn't exist - instead we print a suggestion to run
|
|
|
|
|
// cleanupEmptyCategories.php.
|
|
|
|
|
$affectedRows = 0;
|
|
|
|
|
foreach ( $res as $row ) {
|
2024-04-14 18:36:13 +00:00
|
|
|
$dbw->newUpdateQueryBuilder()
|
|
|
|
|
->update( 'category' )
|
|
|
|
|
->set( [ "cat_{$mode}" => $row->count ] )
|
|
|
|
|
->where( [
|
2017-01-26 11:39:30 +00:00
|
|
|
'cat_id' => $row->cat_id,
|
2024-04-14 18:36:13 +00:00
|
|
|
$dbw->expr( "cat_{$mode}", '!=', (int)$row->count ),
|
|
|
|
|
] )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->execute();
|
2017-01-26 11:39:30 +00:00
|
|
|
$affectedRows += $dbw->affectedRows();
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-24 18:31:49 +00:00
|
|
|
$this->waitForReplication();
|
2017-01-26 11:39:30 +00:00
|
|
|
|
|
|
|
|
return $affectedRows;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-13 00:02:09 +00:00
|
|
|
$maintClass = RecountCategories::class;
|
2017-01-26 11:39:30 +00:00
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|