Add new index to make updateCollation.php painless
We want to update categories in order, to minimize disruption to users. Previous indexes required a filesort to do this, which exploded things on large wikis. See bug for details Bug: T58041 Change-Id: Iee6cd997ff87a313a46fda19d8ab063d0fed8ce8
This commit is contained in:
parent
f62f84f175
commit
eec016ece6
7 changed files with 54 additions and 13 deletions
|
|
@ -182,7 +182,6 @@ class MysqlUpdater extends DatabaseUpdater {
|
|||
[ 'dropIndex', 'iwlinks', 'iwl_prefix', 'patch-kill-iwl_prefix.sql' ],
|
||||
[ 'addField', 'categorylinks', 'cl_collation', 'patch-categorylinks-better-collation.sql' ],
|
||||
[ 'doClFieldsUpdate' ],
|
||||
[ 'doCollationUpdate' ],
|
||||
[ 'addTable', 'module_deps', 'patch-module_deps.sql' ],
|
||||
[ 'dropIndex', 'archive', 'ar_page_revid', 'patch-archive_kill_ar_page_revid.sql' ],
|
||||
[ 'addIndex', 'archive', 'ar_revid', 'patch-archive_ar_revid.sql' ],
|
||||
|
|
@ -280,6 +279,10 @@ class MysqlUpdater extends DatabaseUpdater {
|
|||
[ 'dropTable', 'msg_resource' ],
|
||||
[ 'addTable', 'bot_passwords', 'patch-bot_passwords.sql' ],
|
||||
[ 'addField', 'watchlist', 'wl_id', 'patch-watchlist-wl_id.sql' ],
|
||||
[ 'dropIndex', 'categorylinks', 'cl_collation', 'patch-kill-cl_collation_index.sql' ],
|
||||
[ 'addIndex', 'categorylinks', 'cl_collation_ext',
|
||||
'patch-add-cl_collation_ext_index.sql' ],
|
||||
[ 'doCollationUpdate' ],
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,6 @@ class SqliteUpdater extends DatabaseUpdater {
|
|||
[ 'addField', 'interwiki', 'iw_api', 'patch-iw_api_and_wikiid.sql' ],
|
||||
[ 'dropIndex', 'iwlinks', 'iwl_prefix', 'patch-kill-iwl_prefix.sql' ],
|
||||
[ 'addField', 'categorylinks', 'cl_collation', 'patch-categorylinks-better-collation.sql' ],
|
||||
[ 'doCollationUpdate' ],
|
||||
[ 'addTable', 'module_deps', 'patch-module_deps.sql' ],
|
||||
[ 'dropIndex', 'archive', 'ar_page_revid', 'patch-archive_kill_ar_page_revid.sql' ],
|
||||
[ 'addIndex', 'archive', 'ar_revid', 'patch-archive_ar_revid.sql' ],
|
||||
|
|
@ -149,6 +148,10 @@ class SqliteUpdater extends DatabaseUpdater {
|
|||
[ 'dropTable', 'msg_resource' ],
|
||||
[ 'addTable', 'bot_passwords', 'patch-bot_passwords.sql' ],
|
||||
[ 'addField', 'watchlist', 'wl_id', 'patch-watchlist-wl_id.sql' ],
|
||||
[ 'dropIndex', 'categorylinks', 'cl_collation', 'patch-kill-cl_collation_index.sql' ],
|
||||
[ 'addIndex', 'categorylinks', 'cl_collation_ext',
|
||||
'patch-add-cl_collation_ext_index.sql' ],
|
||||
[ 'doCollationUpdate' ],
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
-- @since 1.27
|
||||
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
|
||||
|
|
@ -13,7 +13,7 @@ ALTER TABLE /*$wgDBprefix*/categorylinks
|
|||
ADD COLUMN cl_sortkey_prefix varchar(255) binary NOT NULL default '',
|
||||
ADD COLUMN cl_collation varbinary(32) NOT NULL default '',
|
||||
ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL default 'page',
|
||||
ADD INDEX (cl_collation),
|
||||
-- rm'd in 1.27 ADD INDEX (cl_collation),
|
||||
DROP INDEX cl_sortkey,
|
||||
ADD INDEX cl_sortkey (cl_to, cl_type, cl_sortkey, cl_from);
|
||||
INSERT IGNORE INTO /*$wgDBprefix*/updatelog (ul_key) VALUES ('cl_fields_update');
|
||||
|
|
|
|||
7
maintenance/archives/patch-kill-cl_collation_index.sql
Normal file
7
maintenance/archives/patch-kill-cl_collation_index.sql
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
--
|
||||
-- Kill cl_collation index.
|
||||
-- @since 1.27
|
||||
--
|
||||
|
||||
DROP INDEX /*i*/cl_collation ON /*_*/categorylinks;
|
||||
|
||||
|
|
@ -619,8 +619,8 @@ CREATE INDEX /*i*/cl_sortkey ON /*_*/categorylinks (cl_to,cl_type,cl_sortkey,cl_
|
|||
-- Used by the API (and some extensions)
|
||||
CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp);
|
||||
|
||||
-- FIXME: Not used, delete this
|
||||
CREATE INDEX /*i*/cl_collation ON /*_*/categorylinks (cl_collation);
|
||||
-- Used when updating collation (e.g. updateCollation.php)
|
||||
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
|
||||
|
||||
--
|
||||
-- Track all existing categories. Something is a category if 1) it has an en-
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ require_once __DIR__ . '/Maintenance.php';
|
|||
* @ingroup Maintenance
|
||||
*/
|
||||
class UpdateCollation extends Maintenance {
|
||||
const BATCH_SIZE = 10000; // Number of rows to process in one batch
|
||||
const BATCH_SIZE = 100; // Number of rows to process in one batch
|
||||
const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
|
||||
|
||||
public $sizeHistogram = [];
|
||||
|
|
@ -85,10 +85,18 @@ TEXT
|
|||
// but this will raise an exception, breaking all category pages
|
||||
$collation->getFirstLetter( 'MediaWiki' );
|
||||
|
||||
// Locally at least, (my local is a rather old version of mysql)
|
||||
// mysql seems to filesort if there is both an equality
|
||||
// (but not for an inequality) condition on cl_collation in the
|
||||
// WHERE and it is also the first item in the ORDER BY.
|
||||
if ( $this->hasOption( 'previous-collation' ) ) {
|
||||
$orderBy = 'cl_to, cl_type, cl_from';
|
||||
} else {
|
||||
$orderBy = 'cl_collation, cl_to, cl_type, cl_from';
|
||||
}
|
||||
$options = [
|
||||
'LIMIT' => self::BATCH_SIZE,
|
||||
'ORDER BY' => 'cl_from, cl_to',
|
||||
'STRAIGHT_JOIN',
|
||||
'ORDER BY' => $orderBy,
|
||||
];
|
||||
|
||||
if ( $force || $dryRun ) {
|
||||
|
|
@ -124,16 +132,24 @@ TEXT
|
|||
}
|
||||
$this->output( "Fixing collation for $count rows.\n" );
|
||||
}
|
||||
|
||||
$count = 0;
|
||||
$batchCount = 0;
|
||||
$batchConds = [];
|
||||
do {
|
||||
$this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
|
||||
|
||||
// cl_type must be selected as a number for proper paging because
|
||||
// enums suck.
|
||||
if ( $dbw->getType() === 'mysql' ) {
|
||||
$clType = 'cl_type+0 AS "cl_type_numeric"';
|
||||
} else {
|
||||
$clType = 'cl_type';
|
||||
}
|
||||
$res = $dbw->select(
|
||||
[ 'categorylinks', 'page' ],
|
||||
[ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
|
||||
'cl_sortkey', 'page_namespace', 'page_title'
|
||||
'cl_sortkey', $clType,
|
||||
'page_namespace', 'page_title'
|
||||
],
|
||||
array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ),
|
||||
__METHOD__,
|
||||
|
|
@ -217,18 +233,28 @@ TEXT
|
|||
|
||||
/**
|
||||
* Return an SQL expression selecting rows which sort above the given row,
|
||||
* assuming an ordering of cl_from, cl_to
|
||||
* assuming an ordering of cl_collation, cl_to, cl_type, cl_from
|
||||
* @param stdClass $row
|
||||
* @param DatabaseBase $dbw
|
||||
* @return string
|
||||
*/
|
||||
function getBatchCondition( $row, $dbw ) {
|
||||
$fields = [ 'cl_from', 'cl_to' ];
|
||||
if ( $this->hasOption( 'previous-collation' ) ) {
|
||||
$fields = [ 'cl_to', 'cl_type', 'cl_from' ];
|
||||
} else {
|
||||
$fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ];
|
||||
}
|
||||
$first = true;
|
||||
$cond = false;
|
||||
$prefix = false;
|
||||
foreach ( $fields as $field ) {
|
||||
$encValue = $dbw->addQuotes( $row->$field );
|
||||
if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) {
|
||||
// Range conditions with enums are weird in mysql
|
||||
// This must be a numeric literal, or it won't work.
|
||||
$encValue = intval( $row->cl_type_numeric );
|
||||
} else {
|
||||
$encValue = $dbw->addQuotes( $row->$field );
|
||||
}
|
||||
$inequality = "$field > $encValue";
|
||||
$equality = "$field = $encValue";
|
||||
if ( $first ) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue