2018-06-05 22:59:11 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*/
|
|
|
|
|
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreStart
|
2018-06-05 22:59:11 +00:00
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreEnd
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Populate and improve accuracy of change_tag_def statistics.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
2018-08-28 21:32:55 +00:00
|
|
|
class PopulateChangeTagDef extends LoggedUpdateMaintenance {
|
2018-06-05 22:59:11 +00:00
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
|
|
|
|
|
$this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
|
|
|
|
|
$this->setBatchSize( 1000 );
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'sleep',
|
2018-12-15 17:28:54 +00:00
|
|
|
'Sleep time (in seconds) between every batch, defaults to zero',
|
2018-06-05 22:59:11 +00:00
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
2018-08-13 10:44:11 +00:00
|
|
|
$this->addOption( 'populate-only', 'Do not update change_tag_def table' );
|
2018-09-11 08:39:53 +00:00
|
|
|
$this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
2018-08-28 21:32:55 +00:00
|
|
|
protected function doDBUpdates() {
|
2018-06-05 22:59:11 +00:00
|
|
|
$this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
|
|
|
|
|
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getDB( DB_PRIMARY );
|
|
|
|
|
if ( $dbw->fieldExists(
|
2018-08-28 21:32:55 +00:00
|
|
|
'change_tag',
|
|
|
|
|
'ct_tag',
|
|
|
|
|
__METHOD__
|
|
|
|
|
)
|
|
|
|
|
) {
|
2018-09-11 08:39:53 +00:00
|
|
|
if ( $this->hasOption( 'set-user-tags-only' ) ) {
|
|
|
|
|
$this->setUserDefinedTags();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2018-08-13 10:44:11 +00:00
|
|
|
if ( !$this->hasOption( 'populate-only' ) ) {
|
|
|
|
|
$this->updateCountTag();
|
|
|
|
|
}
|
2018-06-05 22:59:11 +00:00
|
|
|
$this->backpopulateChangeTagId();
|
2018-09-11 08:39:53 +00:00
|
|
|
$this->setUserDefinedTags();
|
2018-06-05 22:59:11 +00:00
|
|
|
} else {
|
|
|
|
|
$this->updateCountTagId();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: Implement
|
|
|
|
|
// $this->cleanZeroCountRows();
|
2018-08-28 21:32:55 +00:00
|
|
|
|
|
|
|
|
return true;
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
2018-09-11 08:39:53 +00:00
|
|
|
private function setUserDefinedTags() {
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getDB( DB_PRIMARY );
|
2018-09-11 08:39:53 +00:00
|
|
|
|
2019-08-14 15:57:12 +00:00
|
|
|
$userTags = null;
|
2024-01-17 18:53:40 +00:00
|
|
|
if ( $dbw->tableExists( 'valid_tag', __METHOD__ ) ) {
|
|
|
|
|
$userTags = $dbw->newSelectQueryBuilder()
|
2023-07-18 22:56:37 +00:00
|
|
|
->select( 'vt_tag' )
|
|
|
|
|
->from( 'valid_tag' )
|
|
|
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
2019-08-14 15:57:12 +00:00
|
|
|
}
|
2018-09-11 08:39:53 +00:00
|
|
|
|
2023-09-08 21:37:23 +00:00
|
|
|
if ( !$userTags ) {
|
2018-09-11 08:39:53 +00:00
|
|
|
$this->output( "No user defined tags to set, moving on...\n" );
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
|
|
|
$this->output(
|
|
|
|
|
'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
|
|
|
|
|
);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-14 18:36:13 +00:00
|
|
|
$dbw->newUpdateQueryBuilder()
|
|
|
|
|
->update( 'change_tag_def' )
|
|
|
|
|
->set( [ 'ctd_user_defined' => 1 ] )
|
|
|
|
|
->where( [ 'ctd_name' => $userTags ] )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->execute();
|
2024-01-17 18:53:40 +00:00
|
|
|
$this->waitForReplication();
|
2018-09-11 08:39:53 +00:00
|
|
|
$this->output( "Finished setting user defined tags in change_tag_def table\n" );
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-05 22:59:11 +00:00
|
|
|
private function updateCountTagId() {
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbr = $this->getReplicaDB();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
// This query can be pretty expensive, don't run it on master
|
2023-07-18 22:56:37 +00:00
|
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
|
|
|
|
|
->from( 'change_tag' )
|
|
|
|
|
->groupBy( 'ct_tag_id' )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getPrimaryDB();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
foreach ( $res as $row ) {
|
|
|
|
|
if ( !$row->ct_tag_id ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
|
|
|
$this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-14 18:36:13 +00:00
|
|
|
$dbw->newUpdateQueryBuilder()
|
|
|
|
|
->update( 'change_tag_def' )
|
|
|
|
|
->set( [ 'ctd_count' => $row->hitcount ] )
|
|
|
|
|
->where( [ 'ctd_id' => $row->ct_tag_id ] )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->execute();
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
2024-01-17 18:53:40 +00:00
|
|
|
$this->waitForReplication();
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function updateCountTag() {
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbr = $this->getReplicaDB();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
// This query can be pretty expensive, don't run it on master
|
2023-07-18 22:56:37 +00:00
|
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
|
|
|
|
|
->from( 'change_tag' )
|
|
|
|
|
->groupBy( 'ct_tag' )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getPrimaryDB();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
foreach ( $res as $row ) {
|
|
|
|
|
// Hygiene check
|
|
|
|
|
if ( !$row->ct_tag ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
|
|
|
$this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2023-09-05 19:21:03 +00:00
|
|
|
$dbw->newInsertQueryBuilder()
|
In query builders, use insertInto() and deleteFrom() instead of insert() and delete()
The design principle for SelectQueryBuilder was to make the chained
builder calls look as much like SQL as possible, so that developers
could leverage their knowledge of SQL to understand what the query
builder is doing.
That's why SelectQueryBuilder::select() takes a list of fields, and by
the same principle, it makes sense for UpdateQueryBuilder::update() to
take a table. However with "insert" and "delete", the SQL designers
chose to add prepositions "into" and "from", and I think it makes sense
to follow that here.
In terms of natural language, we update a table, but we don't delete a
table, or insert a table. We delete rows from a table, or insert rows
into a table. The table is not the object of the verb.
So, add insertInto() as an alias for insert(), and add deleteFrom() as
an alias for delete(). Use the new methods in MW core callers where
PHPStorm knows the type.
Change-Id: Idb327a54a57a0fb2288ea067472c1e9727016000
2023-09-08 00:06:59 +00:00
|
|
|
->insertInto( 'change_tag_def' )
|
2023-09-05 19:21:03 +00:00
|
|
|
->row( [
|
2018-06-05 22:59:11 +00:00
|
|
|
'ctd_name' => $row->ct_tag,
|
|
|
|
|
'ctd_user_defined' => 0,
|
|
|
|
|
'ctd_count' => $row->hitcount
|
2023-09-05 19:21:03 +00:00
|
|
|
] )
|
|
|
|
|
->onDuplicateKeyUpdate()
|
|
|
|
|
->uniqueIndexFields( [ 'ctd_name' ] )
|
|
|
|
|
->set( [ 'ctd_count' => $row->hitcount ] )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
2024-01-17 18:53:40 +00:00
|
|
|
$this->waitForReplication();
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function backpopulateChangeTagId() {
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbr = $this->getReplicaDB();
|
2023-07-18 22:56:37 +00:00
|
|
|
$changeTagDefs = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'ctd_name', 'ctd_id' ] )
|
|
|
|
|
->from( 'change_tag_def' )
|
|
|
|
|
->orderBy( 'ctd_id' )
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
foreach ( $changeTagDefs as $row ) {
|
|
|
|
|
$this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbr = $this->getReplicaDB();
|
|
|
|
|
$dbw = $this->getPrimaryDB();
|
2018-12-15 17:28:54 +00:00
|
|
|
$sleep = (int)$this->getOption( 'sleep', 0 );
|
2018-06-05 22:59:11 +00:00
|
|
|
$lastId = 0;
|
2018-07-05 15:11:29 +00:00
|
|
|
$this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
|
2018-06-05 22:59:11 +00:00
|
|
|
while ( true ) {
|
|
|
|
|
// Given that indexes might not be there, it's better to use replica
|
2023-07-18 22:56:37 +00:00
|
|
|
$ids = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( 'ct_id' )
|
|
|
|
|
->from( 'change_tag' )
|
2024-07-22 20:29:20 +00:00
|
|
|
->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, $dbr->expr( 'ct_id', '>', $lastId ) ] )
|
2023-07-18 22:56:37 +00:00
|
|
|
->orderBy( 'ct_id' )
|
|
|
|
|
->limit( $this->getBatchSize() )
|
|
|
|
|
->caller( __METHOD__ )->fetchFieldValues();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
|
|
|
|
if ( !$ids ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
$lastId = end( $ids );
|
|
|
|
|
|
|
|
|
|
if ( $this->hasOption( 'dry-run' ) ) {
|
|
|
|
|
$this->output(
|
|
|
|
|
"These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
|
|
|
|
|
);
|
|
|
|
|
continue;
|
2018-06-19 07:02:41 +00:00
|
|
|
} else {
|
2018-07-05 15:11:29 +00:00
|
|
|
$this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
2024-04-14 18:36:13 +00:00
|
|
|
$dbw->newUpdateQueryBuilder()
|
|
|
|
|
->update( 'change_tag' )
|
|
|
|
|
->set( [ 'ct_tag_id' => $tagId ] )
|
|
|
|
|
->where( [ 'ct_id' => $ids ] )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->execute();
|
2018-06-05 22:59:11 +00:00
|
|
|
|
2024-01-17 18:53:40 +00:00
|
|
|
$this->waitForReplication();
|
2018-06-05 22:59:11 +00:00
|
|
|
if ( $sleep > 0 ) {
|
|
|
|
|
sleep( $sleep );
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-06-19 07:02:41 +00:00
|
|
|
|
2018-07-05 15:11:29 +00:00
|
|
|
$this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
2018-08-28 21:32:55 +00:00
|
|
|
protected function getUpdateKey() {
|
|
|
|
|
return __CLASS__;
|
|
|
|
|
}
|
2018-06-05 22:59:11 +00:00
|
|
|
}
|
|
|
|
|
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreStart
|
2018-06-05 22:59:11 +00:00
|
|
|
$maintClass = PopulateChangeTagDef::class;
|
|
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreEnd
|