wiki.techinc.nl/maintenance/updateCollation.php
Aryeh Gregor dcd5d260d4 Further categorylinks schema changes
Per review by Tim, I made two changes:

1) Fix cl_sortkey to be varbinary(255).

2) Expand cl_collation to varbinary(32), and change $wgCollationVersion
to $wgCategoryCollation, to account for the variety of collations we
might have.  tinyint is too small.  I could have gone with int, but
that's annoyingly inscrutable in practice, as we all know from namespace
fields.

To make the upgrade easier for non-trunk users, I updated the old patch
file to incorporate the new changes, using the updatelog table so that
people upgrading from 1.16 won't have to do two alters on categorylinks.
I didn't test the upgrade-from-1.16 code path yet, so if anyone tests
that and it seems not to break, commenting to that effect would be
appreciated.

Also removed wfDeprecated() from archive().  Do *not* add this to
functions that are still actively used in core.  If you think this
function is so terrible that it really mustn't be used, remove callers
yourself, don't pester every single developer with messages in the hope
that someone else will do it for you.
2010-09-03 20:52:08 +00:00

109 lines
2.9 KiB
PHP

<?php
/**
* @file
* @ingroup Maintenance
* @author Aryeh Gregor (Simetrical)
*/
#$optionsWithArgs = array( 'begin', 'max-slave-lag' );
require_once( dirname( __FILE__ ) . '/Maintenance.php' );
class UpdateCollation extends Maintenance {
const BATCH_SIZE = 1000;
public function __construct() {
parent::__construct();
global $wgCategoryCollation;
$this->mDescription = <<<TEXT
This script will find all rows in the categorylinks table whose collation is
out-of-date (cl_collation != '$wgCategoryCollation') and repopulate cl_sortkey
using the page title and cl_sortkey_prefix. If everything's collation is
up-to-date, it will do nothing.
TEXT;
#$this->addOption( 'force', 'Run on all rows, even if the collation is supposed to be up-to-date.' );
}
public function execute() {
global $wgCategoryCollation, $wgContLang;
$dbw = wfGetDB( DB_MASTER );
$count = $dbw->selectField(
'categorylinks',
'COUNT(*)',
'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ),
__METHOD__
);
if ( $count == 0 ) {
$this->output( "Collations up-to-date.\n" );
return;
}
$this->output( "Fixing collation for $count rows.\n" );
$count = 0;
do {
$res = $dbw->select(
array( 'categorylinks', 'page' ),
array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
'cl_sortkey', 'page_namespace', 'page_title'
),
array(
'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ),
'cl_from = page_id'
),
__METHOD__,
array( 'LIMIT' => self::BATCH_SIZE )
);
$dbw->begin();
foreach ( $res as $row ) {
$title = Title::newFromRow( $row );
if ( $row->cl_collation == 0 ) {
# This is an old-style row, so the sortkey needs to be
# converted.
if ( $row->cl_sortkey == $title->getText()
|| $row->cl_sortkey == $title->getPrefixedText() ) {
$prefix = '';
} else {
# Custom sortkey, use it as a prefix
$prefix = $row->cl_sortkey;
}
} else {
$prefix = $row->cl_sortkey_prefix;
}
# cl_type will be wrong for lots of pages if cl_collation is 0,
# so let's update it while we're here.
if ( $title->getNamespace() == NS_CATEGORY ) {
$type = 'subcat';
} elseif ( $title->getNamespace() == NS_FILE ) {
$type = 'file';
} else {
$type = 'page';
}
$dbw->update(
'categorylinks',
array(
'cl_sortkey' => $wgContLang->convertToSortkey(
$title->getCategorySortkey( $prefix ) ),
'cl_sortkey_prefix' => $prefix,
'cl_collation' => $wgCategoryCollation,
'cl_type' => $type,
'cl_timestamp = cl_timestamp',
),
array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
__METHOD__
);
}
$dbw->commit();
$count += $res->numRows();
$this->output( "$count done.\n" );
} while ( $res->numRows() == self::BATCH_SIZE );
}
}
$maintClass = "UpdateCollation";
require_once( DO_MAINTENANCE );