CleanupTitles: Turn "Talk:Project:Foo" into "Project talk:Foo"

Rather than a broken title. This is a common kind of invalid title as it
happens if a new namespace or namespace alias is added.

Bug: T196088
Change-Id: Ie7d32858a5267615995ff0eb8074edf4248c03d8
This commit is contained in:
Pppery 2024-07-30 12:46:10 -04:00
parent 09d38b1557
commit 48e90e8ec5
2 changed files with 46 additions and 20 deletions

View file

@ -129,40 +129,57 @@ class TitleCleanup extends TableCleanup {
$legalizedUnprefixed = '(space)';
}
$ns = (int)$row->page_namespace;
// Move all broken pages to the main namespace so they can be found together
if ( $ns !== 0 ) {
$namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
$namespaceName = $namespaceInfo->getCanonicalName( $ns );
if ( $namespaceName === false ) {
$namespaceName = "NS$ns"; // Fallback for unknown namespaces
}
$legalizedUnprefixed = "$namespaceName:$legalizedUnprefixed";
}
$legalized = $this->prefix . $legalizedUnprefixed;
$title = Title::newFromText( $legalized );
$title = null;
// Try to move "Talk:Project:Foo" -> "Project talk:Foo"
if ( $ns === 1 ) {
$subjectTitle = Title::newFromText( $legalizedUnprefixed );
if ( $subjectTitle && !$subjectTitle->isTalkPage() ) {
$talkTitle = $subjectTitle->getTalkPageIfDefined();
if ( $talkTitle !== null && !$talkTitle->exists() ) {
$ns = $talkTitle->getNamespace();
$title = $talkTitle;
}
}
}
if ( $title === null ) {
// Not a talk page or that didn't work
// move any other broken pages to the main namespace so they can be found together
if ( $ns !== 0 ) {
$namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
$namespaceName = $namespaceInfo->getCanonicalName( $ns );
if ( $namespaceName === false ) {
$namespaceName = "NS$ns"; // Fallback for unknown namespaces
}
$ns = 0;
$legalizedUnprefixed = "$namespaceName:$legalizedUnprefixed";
}
$title = Title::newFromText( $this->prefix . $legalizedUnprefixed );
}
if ( $title === null ) {
// It's still not a valid title, try again with a much smaller
// allowed character set. This will mangle any titles with non-ASCII
// characters, but if we don't do this the result will be
// falling back to the Broken/id:foo failsafe below which is worse
$legalizedUnprefixed = preg_replace_callback( '!([^A-Za-z0-9_\\-:])!',
$legalizedUnprefixed = preg_replace_callback( '!([^A-Za-z0-9_:\\-])!',
[ $this, 'hexChar' ],
$legalizedUnprefixed
);
$legalized = $this->prefix . $legalizedUnprefixed;
$title = Title::newFromText( $legalized );
$title = Title::newFromText( $this->prefix . $legalizedUnprefixed );
}
if ( $title === null ) {
// Oh well, we tried
$clean = $this->prefix . 'id:' . $row->page_id;
$legalized = $this->prefix . $legalizedUnprefixed;
$this->output( "Couldn't legalize; form '$legalized' still invalid; using '$clean'\n" );
$title = Title::newFromText( $clean );
} elseif ( $title->exists() ) {
$clean = $this->prefix . 'id:' . $row->page_id;
$this->output( "Legalized for '$legalized' exists; using '$clean'\n" );
$conflict = $title->getDBKey();
$this->output( "Legalized for '$conflict' exists; using '$clean'\n" );
$title = Title::newFromText( $clean );
}
@ -177,14 +194,14 @@ class TitleCleanup extends TableCleanup {
$dest = $title->getDBkey();
if ( $this->dryrun ) {
$this->output( "DRY RUN: would rename $row->page_id ($row->page_namespace," .
"'$row->page_title') to (0,'$dest')\n" );
"'$row->page_title') to ($ns,'$dest')\n" );
} else {
$this->output( "renaming $row->page_id ($row->page_namespace," .
"'$row->page_title') to ($row->page_namespace,'$dest')\n" );
$this->getPrimaryDB()
->newUpdateQueryBuilder()
->update( 'page' )
->set( [ 'page_title' => $dest, 'page_namespace' => 0 ] )
->set( [ 'page_title' => $dest, 'page_namespace' => $ns ] )
->where( [ 'page_id' => $row->page_id ] )
->caller( __METHOD__ )->execute();
}

View file

@ -11,6 +11,7 @@ class CleanupTitlesTest extends MaintenanceBaseTestCase {
// Add some existing pages to test normalization clashes
$this->insertPage( 'User talk:195.175.37.8' );
$this->insertPage( 'User talk:195.175.37.10' );
$this->insertPage( 'Project talk:Existing' );
// Create an interwiki link to test titles with interwiki prefixes
$this->getDb()->newInsertQueryBuilder()
@ -40,8 +41,6 @@ class CleanupTitlesTest extends MaintenanceBaseTestCase {
yield [ 0, 'Project:Foo', 4, 'Foo', null ];
# Interwiki title encoded as mainspace
yield [ 0, 'custom:Foo', 0, 'Broken/custom:Foo', null ];
# Talk page of illegal title
yield [ 1, 'custom:Foo', 0, 'Broken/Talk:custom:Foo', null ];
# Unknown namespace
yield [ 9999, 'Foo', 0, 'Broken/NS9999:Foo', null ];
# Illegal characters
@ -56,7 +55,9 @@ class CleanupTitlesTest extends MaintenanceBaseTestCase {
# the namespace prefix, and then clashes with another title when fully normalized
yield [ 0, 'User talk:195.175.037.10', 0, 'Broken/User_talk:195.175.37.10', null ];
# Non-ascii characters (and otherwise invalid)
yield [ 1, 'Project:Википедия', 0, 'Broken/Talk:Project:Википедия', null ];
# The point of this is to make sure it escapes the invalid < character without also
# escaping the non-ASCII characters in the other parts of the title
yield [ 0, '<Википедия', 0, 'Broken/\x3cВикипедия', null ];
# Non-ascii charaters (and otherwise invalid in a way that removing characters not in Title::legalChars()
# doesn't cure)
# This output is unideal, and just a failsafe to avoid "Broken/id:" titles
@ -67,6 +68,14 @@ class CleanupTitlesTest extends MaintenanceBaseTestCase {
yield [ 0, "Media:Foo", 0, "Broken/Media:Foo", null ];
# With prefix
yield [ 0, '<', 0, 'Prefix/\x3c', 'Prefix' ];
# Incorrectly encoded talk page of namespace
yield [ 1, 'Project:Foo', 5, 'Foo', null ];
# Of special page
yield [ 1, 'Special:Foo', 0, 'Broken/Talk:Special:Foo', null ];
# Of interwiki
yield [ 1, 'custom:Foo', 0, 'Broken/Talk:custom:Foo', null ];
# Of page that already exists
yield [ 1, 'Project:Existing', 0, 'Broken/Talk:Project:Existing', null ];
}
/**