empty() only makes sense when the expression it checks is possibly undefined, otherwise it's equivalent to a truthiness check with the additional downside of suppressing errors when it's not wanted. Replace it with simple truthiness checks, using strict comparison when that seems to help with polymorphic variables. These were caught by a bespoke phan plugin. Change-Id: Ide262162553d2da7e5388d05e8731529c44591c1
244 lines
7 KiB
PHP
244 lines
7 KiB
PHP
<?php
|
|
/**
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*/
|
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
|
|
/**
|
|
* Populate and improve accuracy of change_tag_def statistics.
|
|
*
|
|
* @ingroup Maintenance
|
|
*/
|
|
class PopulateChangeTagDef extends LoggedUpdateMaintenance {
|
|
/** @var Wikimedia\Rdbms\ILBFactory */
|
|
protected $lbFactory;
|
|
|
|
public function __construct() {
|
|
parent::__construct();
|
|
$this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
|
|
$this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
|
|
$this->setBatchSize( 1000 );
|
|
$this->addOption(
|
|
'sleep',
|
|
'Sleep time (in seconds) between every batch, defaults to zero',
|
|
false,
|
|
true
|
|
);
|
|
$this->addOption( 'populate-only', 'Do not update change_tag_def table' );
|
|
$this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
|
|
}
|
|
|
|
protected function doDBUpdates() {
|
|
$this->lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
|
|
$this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
|
|
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
if ( $dbr->fieldExists(
|
|
'change_tag',
|
|
'ct_tag',
|
|
__METHOD__
|
|
)
|
|
) {
|
|
if ( $this->hasOption( 'set-user-tags-only' ) ) {
|
|
$this->setUserDefinedTags();
|
|
return true;
|
|
}
|
|
if ( !$this->hasOption( 'populate-only' ) ) {
|
|
$this->updateCountTag();
|
|
}
|
|
$this->backpopulateChangeTagId();
|
|
$this->setUserDefinedTags();
|
|
} else {
|
|
$this->updateCountTagId();
|
|
}
|
|
|
|
// TODO: Implement
|
|
// $this->cleanZeroCountRows();
|
|
|
|
return true;
|
|
}
|
|
|
|
private function setUserDefinedTags() {
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
|
|
$userTags = null;
|
|
if ( $dbr->tableExists( 'valid_tag', __METHOD__ ) ) {
|
|
$userTags = $dbr->newSelectQueryBuilder()
|
|
->select( 'vt_tag' )
|
|
->from( 'valid_tag' )
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
|
}
|
|
|
|
if ( !$userTags ) {
|
|
$this->output( "No user defined tags to set, moving on...\n" );
|
|
return;
|
|
}
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
$this->output(
|
|
'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
|
|
);
|
|
return;
|
|
}
|
|
|
|
$dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
|
|
|
|
$dbw->update(
|
|
'change_tag_def',
|
|
[ 'ctd_user_defined' => 1 ],
|
|
[ 'ctd_name' => $userTags ],
|
|
__METHOD__
|
|
);
|
|
$this->lbFactory->waitForReplication();
|
|
$this->output( "Finished setting user defined tags in change_tag_def table\n" );
|
|
}
|
|
|
|
private function updateCountTagId() {
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
|
|
// This query can be pretty expensive, don't run it on master
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
|
|
->from( 'change_tag' )
|
|
->groupBy( 'ct_tag_id' )
|
|
->caller( __METHOD__ )->fetchResultSet();
|
|
|
|
$dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
|
|
|
|
foreach ( $res as $row ) {
|
|
if ( !$row->ct_tag_id ) {
|
|
continue;
|
|
}
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
$this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
|
|
continue;
|
|
}
|
|
|
|
$dbw->update(
|
|
'change_tag_def',
|
|
[ 'ctd_count' => $row->hitcount ],
|
|
[ 'ctd_id' => $row->ct_tag_id ],
|
|
__METHOD__
|
|
);
|
|
}
|
|
$this->lbFactory->waitForReplication();
|
|
}
|
|
|
|
private function updateCountTag() {
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
|
|
// This query can be pretty expensive, don't run it on master
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
|
|
->from( 'change_tag' )
|
|
->groupBy( 'ct_tag' )
|
|
->caller( __METHOD__ )->fetchResultSet();
|
|
|
|
$dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
|
|
|
|
foreach ( $res as $row ) {
|
|
// Hygiene check
|
|
if ( !$row->ct_tag ) {
|
|
continue;
|
|
}
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
$this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
|
|
continue;
|
|
}
|
|
$dbw->newInsertQueryBuilder()
|
|
->insertInto( 'change_tag_def' )
|
|
->row( [
|
|
'ctd_name' => $row->ct_tag,
|
|
'ctd_user_defined' => 0,
|
|
'ctd_count' => $row->hitcount
|
|
] )
|
|
->onDuplicateKeyUpdate()
|
|
->uniqueIndexFields( [ 'ctd_name' ] )
|
|
->set( [ 'ctd_count' => $row->hitcount ] )
|
|
->caller( __METHOD__ )->execute();
|
|
}
|
|
$this->lbFactory->waitForReplication();
|
|
}
|
|
|
|
private function backpopulateChangeTagId() {
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
$changeTagDefs = $dbr->newSelectQueryBuilder()
|
|
->select( [ 'ctd_name', 'ctd_id' ] )
|
|
->from( 'change_tag_def' )
|
|
->orderBy( 'ctd_id' )
|
|
->caller( __METHOD__ )->fetchResultSet();
|
|
|
|
foreach ( $changeTagDefs as $row ) {
|
|
$this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
|
|
}
|
|
}
|
|
|
|
private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
|
|
$dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
|
|
$dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
|
|
$sleep = (int)$this->getOption( 'sleep', 0 );
|
|
$lastId = 0;
|
|
$this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
|
|
while ( true ) {
|
|
// Given that indexes might not be there, it's better to use replica
|
|
$ids = $dbr->newSelectQueryBuilder()
|
|
->select( 'ct_id' )
|
|
->from( 'change_tag' )
|
|
->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ] )
|
|
->orderBy( 'ct_id' )
|
|
->limit( $this->getBatchSize() )
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
|
|
|
if ( !$ids ) {
|
|
break;
|
|
}
|
|
$lastId = end( $ids );
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
$this->output(
|
|
"These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
|
|
);
|
|
continue;
|
|
} else {
|
|
$this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
|
|
}
|
|
|
|
$dbw->update(
|
|
'change_tag',
|
|
[ 'ct_tag_id' => $tagId ],
|
|
[ 'ct_id' => $ids ],
|
|
__METHOD__
|
|
);
|
|
|
|
$this->lbFactory->waitForReplication();
|
|
if ( $sleep > 0 ) {
|
|
sleep( $sleep );
|
|
}
|
|
}
|
|
|
|
$this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
|
|
}
|
|
|
|
protected function getUpdateKey() {
|
|
return __CLASS__;
|
|
}
|
|
}
|
|
|
|
$maintClass = PopulateChangeTagDef::class;
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|