2022-03-10 14:30:15 +00:00
|
|
|
<?php
|
|
|
|
|
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreStart
|
2022-03-10 14:30:15 +00:00
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreEnd
|
2022-03-10 14:30:15 +00:00
|
|
|
|
2023-09-18 13:56:39 +00:00
|
|
|
use MediaWiki\Title\TitleValue;
|
|
|
|
|
|
2022-03-10 14:30:15 +00:00
|
|
|
/**
|
|
|
|
|
* Maintenance script that populates normalization column in links tables.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
* @since 1.39
|
|
|
|
|
*/
|
|
|
|
|
class MigrateLinksTable extends LoggedUpdateMaintenance {
|
|
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->addDescription(
|
|
|
|
|
'Populates normalization column in links tables.'
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'table',
|
2024-05-15 20:04:28 +00:00
|
|
|
'Table name. Like pagelinks.',
|
2022-03-10 14:30:15 +00:00
|
|
|
true,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'sleep',
|
|
|
|
|
'Sleep time (in seconds) between every batch. Default: 0',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->setBatchSize( 1000 );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function getUpdateKey() {
|
|
|
|
|
return __CLASS__ . $this->getOption( 'table', '' );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function doDBUpdates() {
|
|
|
|
|
$dbw = $this->getDB( DB_PRIMARY );
|
2022-04-05 20:13:27 +00:00
|
|
|
$mapping = \MediaWiki\Linker\LinksMigration::$mapping;
|
2022-03-10 14:30:15 +00:00
|
|
|
$table = $this->getOption( 'table', '' );
|
|
|
|
|
if ( !isset( $mapping[$table] ) ) {
|
|
|
|
|
$this->output( "Mapping for this table doesn't exist yet.\n" );
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
$targetColumn = $mapping[$table]['target_id'];
|
2022-09-06 09:27:52 +00:00
|
|
|
if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
|
|
|
|
|
$this->output( "Old fields don't exist. There is no need to run this script\n" );
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2022-03-10 14:30:15 +00:00
|
|
|
if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
|
|
|
|
|
$this->output( "Run update.php to create the $targetColumn column.\n" );
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
|
2022-07-13 00:53:24 +00:00
|
|
|
$this->output( "Run update.php to create the linktarget table.\n" );
|
2022-03-10 14:30:15 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-13 00:53:24 +00:00
|
|
|
$this->output( "Populating the $targetColumn column\n" );
|
2022-03-10 14:30:15 +00:00
|
|
|
$updated = 0;
|
2022-05-30 12:23:30 +00:00
|
|
|
|
2022-03-10 14:30:15 +00:00
|
|
|
$highestPageId = $dbw->newSelectQueryBuilder()
|
|
|
|
|
->select( 'page_id' )
|
|
|
|
|
->from( 'page' )
|
|
|
|
|
->limit( 1 )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->orderBy( 'page_id', 'DESC' )
|
2022-05-30 12:23:30 +00:00
|
|
|
->fetchResultSet()->fetchRow();
|
|
|
|
|
if ( !$highestPageId ) {
|
|
|
|
|
$this->output( "Page table is empty.\n" );
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
$highestPageId = $highestPageId[0];
|
2022-03-10 14:30:15 +00:00
|
|
|
$pageId = 0;
|
2022-07-13 04:03:59 +00:00
|
|
|
while ( $pageId <= $highestPageId ) {
|
2022-03-10 14:30:15 +00:00
|
|
|
// Given the indexes and the structure of links tables,
|
|
|
|
|
// we need to split the update into batches of pages.
|
|
|
|
|
// Otherwise the queries will take a really long time in production and cause read-only.
|
|
|
|
|
$updated += $this->handlePageBatch( $pageId, $mapping, $table );
|
|
|
|
|
$pageId += $this->getBatchSize();
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-13 00:53:24 +00:00
|
|
|
$this->output( "Completed normalization of $table, $updated rows updated.\n" );
|
2022-03-10 14:30:15 +00:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function handlePageBatch( $lowPageId, $mapping, $table ) {
|
|
|
|
|
$batchSize = $this->getBatchSize();
|
|
|
|
|
$targetColumn = $mapping[$table]['target_id'];
|
|
|
|
|
$pageIdColumn = $mapping[$table]['page_id'];
|
2024-04-21 12:24:21 +00:00
|
|
|
// range is inclusive, let's subtract one.
|
2022-07-13 04:03:59 +00:00
|
|
|
$highPageId = $lowPageId + $batchSize - 1;
|
2024-01-17 18:53:40 +00:00
|
|
|
$dbw = $this->getPrimaryDB();
|
2022-03-10 14:30:15 +00:00
|
|
|
$updated = 0;
|
|
|
|
|
|
|
|
|
|
while ( true ) {
|
|
|
|
|
$res = $dbw->newSelectQueryBuilder()
|
2022-04-13 21:01:52 +00:00
|
|
|
->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
|
2022-03-10 14:30:15 +00:00
|
|
|
->from( $table )
|
|
|
|
|
->where( [
|
2023-09-07 10:27:19 +00:00
|
|
|
$targetColumn => [ null, 0 ],
|
2024-04-21 12:24:21 +00:00
|
|
|
$dbw->expr( $pageIdColumn, '>=', $lowPageId ),
|
|
|
|
|
$dbw->expr( $pageIdColumn, '<=', $highPageId ),
|
2022-03-10 14:30:15 +00:00
|
|
|
] )
|
|
|
|
|
->limit( 1 )
|
|
|
|
|
->caller( __METHOD__ )
|
|
|
|
|
->fetchResultSet();
|
|
|
|
|
if ( !$res->numRows() ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
$row = $res->fetchRow();
|
|
|
|
|
$ns = $row[$mapping[$table]['ns']];
|
|
|
|
|
$titleString = $row[$mapping[$table]['title']];
|
|
|
|
|
$title = new TitleValue( (int)$ns, $titleString );
|
|
|
|
|
$this->output( "Starting backfill of $ns:$titleString " .
|
|
|
|
|
"title on pages between $lowPageId and $highPageId\n" );
|
2023-08-31 09:21:12 +00:00
|
|
|
$id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
|
2024-01-16 22:47:08 +00:00
|
|
|
$dbw->newUpdateQueryBuilder()
|
|
|
|
|
->update( $table )
|
|
|
|
|
->set( [ $targetColumn => $id ] )
|
|
|
|
|
->where( [
|
|
|
|
|
$targetColumn => [ null, 0 ],
|
|
|
|
|
$mapping[$table]['ns'] => $ns,
|
|
|
|
|
$mapping[$table]['title'] => $titleString,
|
2024-04-21 12:24:21 +00:00
|
|
|
$dbw->expr( $pageIdColumn, '>=', $lowPageId ),
|
|
|
|
|
$dbw->expr( $pageIdColumn, '<=', $highPageId ),
|
2024-01-16 22:47:08 +00:00
|
|
|
] )
|
|
|
|
|
->caller( __METHOD__ )->execute();
|
2022-07-13 05:32:19 +00:00
|
|
|
$updatedInThisBatch = $dbw->affectedRows();
|
|
|
|
|
$updated += $updatedInThisBatch;
|
|
|
|
|
$this->output( "Updated $updatedInThisBatch rows\n" );
|
|
|
|
|
// Sleep between batches for replication to catch up
|
2022-10-24 18:31:49 +00:00
|
|
|
$this->waitForReplication();
|
2022-07-13 05:32:19 +00:00
|
|
|
$sleep = (int)$this->getOption( 'sleep', 0 );
|
|
|
|
|
if ( $sleep > 0 ) {
|
|
|
|
|
sleep( $sleep );
|
2022-03-10 14:30:15 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return $updated;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreStart
|
2022-03-10 14:30:15 +00:00
|
|
|
$maintClass = MigrateLinksTable::class;
|
|
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|
2024-08-27 12:00:25 +00:00
|
|
|
// @codeCoverageIgnoreEnd
|