This adds a method to LinkFilter to build the query conditions necessary to properly use it, and adjusts code to use it. This also takes the opportunity to clean up the calculation of el_index: IPs are handled more sensibly and IDNs are canonicalized. Also weird edge cases for invalid hosts like "http://.example.com" and corresponding searches like "http://*..example.com" are now handled more regularly instead of being treated as if the extra dot were omitted, while explicit specification of the DNS root like "http://example.com./" is canonicalized to the usual implicit specification. Note that this patch will break link searches for links where the host is an IP or IDN until refreshExternallinksIndex.php is run. Bug: T59176 Bug: T130482 Change-Id: I84d224ef23de22dfe179009ec3a11fd0e4b5f56d
120 lines
3.4 KiB
PHP
120 lines
3.4 KiB
PHP
<?php
|
|
/**
|
|
* Refresh the externallinks table el_index and el_index_60 from el_to
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Maintenance
|
|
*/
|
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
|
|
/**
|
|
* Maintenance script that refreshes the externallinks table el_index and
|
|
* el_index_60 from el_to
|
|
*
|
|
* @ingroup Maintenance
|
|
* @since 1.33
|
|
*/
|
|
class RefreshExternallinksIndex extends LoggedUpdateMaintenance {
|
|
public function __construct() {
|
|
parent::__construct();
|
|
$this->addDescription(
|
|
'Refresh the externallinks table el_index and el_index_60 from el_to' );
|
|
$this->setBatchSize( 10000 );
|
|
}
|
|
|
|
protected function getUpdateKey() {
|
|
return static::class
|
|
. ' v' . LinkFilter::VERSION
|
|
. ( LinkFilter::supportsIDN() ? '+' : '-' ) . 'IDN';
|
|
}
|
|
|
|
protected function updateSkippedMessage() {
|
|
return 'externallinks table indexes up to date';
|
|
}
|
|
|
|
protected function doDBUpdates() {
|
|
$dbw = $this->getDB( DB_MASTER );
|
|
if ( !$dbw->tableExists( 'externallinks' ) ) {
|
|
$this->error( "externallinks table does not exist" );
|
|
return false;
|
|
}
|
|
$this->output( "Updating externallinks table index fields\n" );
|
|
|
|
$minmax = $dbw->selectRow(
|
|
'externallinks',
|
|
[ 'min' => 'MIN(el_id)', 'max' => 'MAX(el_id)' ],
|
|
'',
|
|
__METHOD__
|
|
);
|
|
|
|
$updated = 0;
|
|
$deleted = 0;
|
|
$start = $minmax->min - 1;
|
|
$last = $minmax->max;
|
|
while ( $start < $last ) {
|
|
$end = min( $start + $this->mBatchSize, $last );
|
|
$this->output( "el_id $start - $end of $last\n" );
|
|
$res = $dbw->select( 'externallinks', [ 'el_id', 'el_to', 'el_index' ],
|
|
[
|
|
"el_id > $start",
|
|
"el_id <= $end",
|
|
],
|
|
__METHOD__,
|
|
[ 'ORDER BY' => 'el_id' ]
|
|
);
|
|
foreach ( $res as $row ) {
|
|
$newIndexes = LinkFilter::makeIndexes( $row->el_to );
|
|
if ( !$newIndexes ) {
|
|
$dbw->delete( 'externallinks', [ 'el_id' => $row->el_id ], __METHOD__ );
|
|
$deleted++;
|
|
continue;
|
|
}
|
|
if ( in_array( $row->el_index, $newIndexes, true ) ) {
|
|
continue;
|
|
}
|
|
|
|
if ( count( $newIndexes ) === 1 ) {
|
|
$newIndex = $newIndexes[0];
|
|
} else {
|
|
// Assume the scheme is the only difference between the different $newIndexes.
|
|
// Keep this row's scheme, assuming there's another row with the other scheme.
|
|
$newIndex = substr( $row->el_index, 0, strpos( $row->el_index, ':' ) ) .
|
|
substr( $newIndexes[0], strpos( $newIndexes[0], ':' ) );
|
|
}
|
|
$dbw->update( 'externallinks',
|
|
[
|
|
'el_index' => $newIndex,
|
|
'el_index_60' => substr( $newIndex, 0, 60 ),
|
|
],
|
|
[ 'el_id' => $row->el_id ],
|
|
__METHOD__
|
|
);
|
|
$updated++;
|
|
}
|
|
wfWaitForSlaves();
|
|
$start = $end;
|
|
}
|
|
$this->output( "Done, $updated rows updated, $deleted deleted.\n" );
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
$maintClass = "RefreshExternallinksIndex";
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|