Redo WhatLinksHere query and add a *_from_namespace field to link tables

* Also tweaked the query so MySQL avoids doing a page_name
  index scan when it should start with the link table index
* Added population script (triggered by update.php)
* Also removed uniqueness from some indexes where it is redundant
* Renamed two confusing variables

Bug: 60618
Change-Id: Icca99b6ae0ef76cb77695faf82c615516191da36
This commit is contained in:
Aaron Schulz 2014-03-06 15:01:35 -08:00 committed by Ori Livneh
parent b87966f6bb
commit b8c038f678
17 changed files with 224 additions and 55 deletions

View file

@ -263,6 +263,9 @@ changes to languages because of Bugzilla reports.
* Removed global function xmlsafe() from Export.php. (moved to OAIRepo extension)
* Removed Title::userCanRead(). (deprecated since 1.19)
* Removed maintenance script importTextFile.php. Use edit.php script instead.
* A _from_namespace field has been added to the templatelinks, pagelinks,
and filelinks tables. Run update.php to apply this change to the schema.
==== Renamed classes ====
* CLDRPluralRuleConverter_Expression to CLDRPluralRuleConverterExpression

View file

@ -1149,6 +1149,7 @@ $wgAutoloadLocalClasses = array(
'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php',
'LoggedUpdateMaintenance' => 'maintenance/Maintenance.php',
'Maintenance' => 'maintenance/Maintenance.php',
'PopulateBacklinkNamespace' => 'maintenance/populateBacklinkNamespace.php',
'PopulateCategory' => 'maintenance/populateCategory.php',
'PopulateImageSha1' => 'maintenance/populateImageSha1.php',
'PopulateFilearchiveSha1' => 'maintenance/populateFilearchiveSha1.php',

View file

@ -7124,6 +7124,14 @@ $wgHKDFAlgorithm = 'sha256';
*/
$wgPageLanguageUseDB = false;
/**
* Enable use of the *_namespace fields of the pagelinks, redirect, and templatelinks tables.
* Set this only if the fields are fully populated. This may be removed in 1.25.
* @var bool
* @since 1.24
*/
$wgUseLinkNamespaceDBFields = false;
/**
* For really cool vim folding this needs to be at the end:
* vim: foldmarker=@{,@} foldmethod=marker

View file

@ -3814,13 +3814,31 @@ class Title {
$log->addRelations( 'pr_id', $logRelationsValues, $logId );
}
// Update *_from_namespace fields as needed
if ( $this->getNamespace() != $nt->getNamespace() ) {
$dbw->update( 'pagelinks',
array( 'pl_from_namespace' => $nt->getNamespace() ),
array( 'pl_from' => $pageid ),
__METHOD__
);
$dbw->update( 'templatelinks',
array( 'tl_from_namespace' => $nt->getNamespace() ),
array( 'tl_from' => $pageid ),
__METHOD__
);
$dbw->update( 'imagelinks',
array( 'il_from_namespace' => $nt->getNamespace() ),
array( 'il_from' => $pageid ),
__METHOD__
);
}
# Update watchlists
$oldnamespace = MWNamespace::getSubject( $this->getNamespace() );
$newnamespace = MWNamespace::getSubject( $nt->getNamespace() );
$oldtitle = $this->getDBkey();
$newtitle = $nt->getDBkey();
if ( $oldnamespace != $newnamespace || $oldtitle != $newtitle ) {
$oldsnamespace = MWNamespace::getSubject( $this->getNamespace() );
$newsnamespace = MWNamespace::getSubject( $nt->getNamespace() );
if ( $oldsnamespace != $newsnamespace || $oldtitle != $newtitle ) {
WatchedItem::duplicateEntries( $this, $nt );
}

View file

@ -358,6 +358,7 @@ class LinksUpdate extends SqlDataUpdate {
foreach ( $diffs as $dbk => $id ) {
$arr[] = array(
'pl_from' => $this->mId,
'pl_from_namespace' => $this->mTitle->getNamespace(),
'pl_namespace' => $ns,
'pl_title' => $dbk
);
@ -379,6 +380,7 @@ class LinksUpdate extends SqlDataUpdate {
foreach ( $diffs as $dbk => $id ) {
$arr[] = array(
'tl_from' => $this->mId,
'tl_from_namespace' => $this->mTitle->getNamespace(),
'tl_namespace' => $ns,
'tl_title' => $dbk
);
@ -400,6 +402,7 @@ class LinksUpdate extends SqlDataUpdate {
foreach ( $diffs as $iname => $dummy ) {
$arr[] = array(
'il_from' => $this->mId,
'il_from_namespace' => $this->mTitle->getNamespace(),
'il_to' => $iname
);
}

View file

@ -72,6 +72,7 @@ abstract class DatabaseUpdater {
'PopulateImageSha1',
'FixExtLinksProtocolRelative',
'PopulateFilearchiveSha1',
'PopulateBacklinkNamespace'
);
/**

View file

@ -256,6 +256,9 @@ class MysqlUpdater extends DatabaseUpdater {
array( 'dropField', 'recentchanges', 'rc_cur_time', 'patch-drop-rc_cur_time.sql' ),
array( 'addIndex', 'watchlist', 'wl_user_notificationtimestamp', 'patch-watchlist-user-notificationtimestamp-index.sql' ),
array( 'addField', 'page', 'page_lang', 'patch-page_lang.sql' ),
array( 'addField', 'pagelinks', 'pl_from_namespace', 'patch-pl_from_namespace.sql' ),
array( 'addField', 'templatelinks', 'tl_from_namespace', 'patch-tl_from_namespace.sql' ),
array( 'addField', 'imagelinks', 'il_from_namespace', 'patch-il_from_namespace.sql' ),
);
}

View file

@ -415,6 +415,9 @@ class PostgresUpdater extends DatabaseUpdater {
array( 'addPgIndex', 'page_props', 'pp_propname_sortkey_page',
'( pp_propname, pp_sortkey, pp_page ) WHERE ( pp_sortkey IS NOT NULL )' ),
array( 'addPgField', 'page', 'page_lang', 'TEXT default NULL' ),
array( 'addPgField', 'pagelinks', 'pl_from_namespace', 'INTEGER NOT NULL DEFAULT 0' ),
array( 'addPgField', 'templatelinks', 'tl_from_namespace', 'INTEGER NOT NULL DEFAULT 0' ),
array( 'addPgField', 'imagelinks', 'il_from_namespace', 'INTEGER NOT NULL DEFAULT 0' ),
);
}

View file

@ -134,6 +134,9 @@ class SqliteUpdater extends DatabaseUpdater {
array( 'dropField', 'recentchanges', 'rc_cur_time', 'patch-drop-rc_cur_time.sql' ),
array( 'addIndex', 'watchlist', 'wl_user_notificationtimestamp', 'patch-watchlist-user-notificationtimestamp-index.sql' ),
array( 'addField', 'page', 'page_lang', 'patch-page-page_lang.sql' ),
array( 'addField', 'pagelinks', 'pl_from_namespace', 'patch-pl_from_namespace.sql' ),
array( 'addField', 'templatelinks', 'tl_from_namespace', 'patch-tl_from_namespace.sql' ),
array( 'addField', 'imagelinks', 'il_from_namespace', 'patch-il_from_namespace.sql' ),
);
}

View file

@ -452,6 +452,7 @@ class SpecialMergeHistory extends SpecialPage {
$dbw->insert( 'pagelinks',
array(
'pl_from' => $this->mDestID,
'pf_from_namespace' => $destTitle->getNamespace(),
'pl_namespace' => $destTitle->getNamespace(),
'pl_title' => $destTitle->getDBkey() ),
__METHOD__

View file

@ -101,10 +101,10 @@ class SpecialWhatLinksHere extends IncludableSpecialPage {
* @param int $back Display from this article ID at backwards scrolling (default: 0)
*/
function showIndirectLinks( $level, $target, $limit, $from = 0, $back = 0 ) {
global $wgMaxRedirectLinksRetrieved;
global $wgMaxRedirectLinksRetrieved, $wgUseLinkNamespaceDBFields;
$out = $this->getOutput();
$dbr = wfGetDB( DB_SLAVE );
$options = array();
$hidelinks = $this->opts->getValue( 'hidelinks' );
$hideredirs = $this->opts->getValue( 'hideredirs' );
@ -113,77 +113,85 @@ class SpecialWhatLinksHere extends IncludableSpecialPage {
$fetchlinks = ( !$hidelinks || !$hideredirs );
// Make the query
$plConds = array(
'page_id=pl_from',
// Build query conds in concert for all three tables...
$conds['pagelinks'] = array(
'pl_namespace' => $target->getNamespace(),
'pl_title' => $target->getDBkey(),
);
if ( $hideredirs ) {
$plConds['rd_from'] = null;
} elseif ( $hidelinks ) {
$plConds[] = 'rd_from is NOT NULL';
}
$tlConds = array(
'page_id=tl_from',
$conds['templatelinks'] = array(
'tl_namespace' => $target->getNamespace(),
'tl_title' => $target->getDBkey(),
);
$ilConds = array(
'page_id=il_from',
$conds['imagelinks'] = array(
'il_to' => $target->getDBkey(),
);
$namespace = $this->opts->getValue( 'namespace' );
if ( is_int( $namespace ) ) {
$plConds['page_namespace'] = $namespace;
$tlConds['page_namespace'] = $namespace;
$ilConds['page_namespace'] = $namespace;
if ( $wgUseLinkNamespaceDBFields ) {
$conds['pagelinks']['pl_from_namespace'] = $namespace;
$conds['templatelinks']['tl_from_namespace'] = $namespace;
$conds['imagelinks']['il_from_namespace'] = $namespace;
} else {
$conds['pagelinks']['page_namespace'] = $namespace;
$conds['templatelinks']['page_namespace'] = $namespace;
$conds['imagelinks']['page_namespace'] = $namespace;
}
}
if ( $from ) {
$tlConds[] = "tl_from >= $from";
$plConds[] = "pl_from >= $from";
$ilConds[] = "il_from >= $from";
$conds['templatelinks'][] = "tl_from >= $from";
$conds['pagelinks'][] = "pl_from >= $from";
$conds['imagelinks'][] = "il_from >= $from";
}
// Read an extra row as an at-end check
$queryLimit = $limit + 1;
if ( $hideredirs ) {
$conds['pagelinks']['rd_from'] = null;
} elseif ( $hidelinks ) {
$conds['pagelinks'][] = 'rd_from is NOT NULL';
}
$options['LIMIT'] = $queryLimit;
$fields = array( 'page_id', 'page_namespace', 'page_title', 'rd_from' );
$joinConds = array( 'redirect' => array( 'LEFT JOIN', array(
'rd_from = page_id',
'rd_namespace' => $target->getNamespace(),
'rd_title' => $target->getDBkey(),
'rd_interwiki = ' . $dbr->addQuotes( '' ) . ' OR rd_interwiki IS NULL'
) ) );
$queryFunc = function( $dbr, $table, $fromCol ) use ( $conds, $target, $limit ) {
global $wgUseLinkNamespaceDBFields;
// Read an extra row as an at-end check
$queryLimit = $limit + 1;
$on = array(
"rd_from = $fromCol",
'rd_title' => $target->getDBkey(),
'rd_interwiki = ' . $dbr->addQuotes( '' ) . ' OR rd_interwiki IS NULL'
);
if ( $wgUseLinkNamespaceDBFields ) { // migration check
$on['rd_namespace'] = $target->getNamespace();
}
// Inner LIMIT is 2X in case of stale backlinks with no page
$subQuery = $dbr->selectSqlText(
array( $table, 'redirect' ),
array( $fromCol, 'rd_from' ),
$conds[$table],
__CLASS__ . '::showIndirectLinks',
array( 'ORDER BY' => $fromCol, 'LIMIT' => 2 * $queryLimit ),
array( 'redirect' => array( 'LEFT JOIN', $on ) )
);
return $dbr->select(
array( 'page', 'temp_backlink_range' => "($subQuery)" ),
array( 'page_id', 'page_namespace', 'page_title', 'rd_from' ),
array(),
__CLASS__ . '::showIndirectLinks',
array( 'ORDER BY' => 'page_id', 'LIMIT' => $queryLimit ),
array( 'page' => array( 'INNER JOIN', "$fromCol = page_id" ) )
);
};
if ( $fetchlinks ) {
$options['ORDER BY'] = 'pl_from';
$plRes = $dbr->select( array( 'pagelinks', 'page', 'redirect' ), $fields,
$plConds, __METHOD__, $options,
$joinConds
);
$plRes = $queryFunc( $dbr, 'pagelinks', 'pl_from' );
}
if ( !$hidetrans ) {
$options['ORDER BY'] = 'tl_from';
$tlRes = $dbr->select( array( 'templatelinks', 'page', 'redirect' ), $fields,
$tlConds, __METHOD__, $options,
$joinConds
);
$tlRes = $queryFunc( $dbr, 'templatelinks', 'tl_from' );
}
if ( !$hideimages ) {
$options['ORDER BY'] = 'il_from';
$ilRes = $dbr->select( array( 'imagelinks', 'page', 'redirect' ), $fields,
$ilConds, __METHOD__, $options,
$joinConds
);
$ilRes = $queryFunc( $dbr, 'imagelinks', 'il_from' );
}
if ( ( !$fetchlinks || !$plRes->numRows() )

View file

@ -0,0 +1,4 @@
ALTER TABLE /*_*/imagelinks
ADD COLUMN il_from_namespace int NOT NULL default 0;
CREATE INDEX /*i*/il_backlinks_namespace ON /*_*/imagelinks (il_to,il_from_namespace,il_from);

View file

@ -0,0 +1,4 @@
ALTER TABLE /*_*/pagelinks
ADD COLUMN pl_from_namespace int NOT NULL default 0;
CREATE INDEX /*i*/pl_backlinks_namespace ON /*_*/pagelinks (pl_namespace,pl_title,pl_from_namespace,pl_from);

View file

@ -0,0 +1,4 @@
ALTER TABLE /*_*/templatelinks
ADD COLUMN tl_from_namespace int NOT NULL default 0;
CREATE INDEX /*i*/tl_backlinks_namespace ON /*_*/templatelinks (tl_namespace,tl_title,tl_from_namespace,tl_from);

View file

@ -0,0 +1,93 @@
<?php
/**
* Optional upgrade script to populate *_from_namespace fields
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script to populate *_from_namespace fields
*
* @ingroup Maintenance
*/
class PopulateBacklinkNamespace extends LoggedUpdateMaintenance {
public function __construct() {
parent::__construct();
$this->mDescription = "Populate the *_from_namespace fields";
}
protected function getUpdateKey() {
return 'populate *_from_namespace';
}
protected function updateSkippedMessage() {
return '*_from_namespace column of backlink tables already populated.';
}
public function doDBUpdates() {
$force = $this->getOption( 'force' );
$db = $this->getDB( DB_MASTER );
$this->output( "Updating *_from_namespace fields in links tables.\n" );
$start = $db->selectField( 'page', 'MIN(page_id)', false, __METHOD__ );
if ( !$start ) {
$this->output( "Nothing to do." );
return false;
}
$end = $db->selectField( 'page', 'MAX(page_id)', false, __METHOD__ );
# Do remaining chunk
$end += $this->mBatchSize - 1;
$blockStart = $start;
$blockEnd = $start + $this->mBatchSize - 1;
while ( $blockEnd <= $end ) {
$this->output( "...doing page_id from $blockStart to $blockEnd\n" );
$cond = "page_id BETWEEN $blockStart AND $blockEnd";
$res = $db->select( 'page', array( 'page_id', 'page_namespace' ), $cond, __METHOD__ );
foreach ( $res as $row ) {
$db->update( 'pagelinks',
array( 'pl_from_namespace' => $row->page_namespace ),
array( 'pl_from' => $row->page_id ),
__METHOD__
);
$db->update( 'templatelinks',
array( 'tl_from_namespace' => $row->page_namespace ),
array( 'tl_from' => $row->page_id ),
__METHOD__
);
$db->update( 'imagelinks',
array( 'il_from_namespace' => $row->page_namespace ),
array( 'il_from' => $row->page_id ),
__METHOD__
);
}
$blockStart += $this->mBatchSize - 1;
$blockEnd += $this->mBatchSize - 1;
wfWaitForSlaves();
}
return true;
}
}
$maintClass = "PopulateBacklinkNamespace";
require_once RUN_MAINTENANCE_IF_MAIN;

View file

@ -206,6 +206,7 @@ CREATE INDEX redirect_ns_title ON redirect (rd_namespace,rd_title,rd_from);
CREATE TABLE pagelinks (
pl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
pl_from_namespace INTEGER NOT NULL DEFAULT 0,
pl_namespace SMALLINT NOT NULL,
pl_title TEXT NOT NULL
);
@ -214,6 +215,7 @@ CREATE INDEX pagelinks_title ON pagelinks (pl_title);
CREATE TABLE templatelinks (
tl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
tl_from_namespace INTEGER NOT NULL DEFAULT 0,
tl_namespace SMALLINT NOT NULL,
tl_title TEXT NOT NULL
);
@ -222,6 +224,7 @@ CREATE INDEX templatelinks_from ON templatelinks (tl_from);
CREATE TABLE imagelinks (
il_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
il_from_namespace INTEGER NOT NULL DEFAULT 0,
il_to TEXT NOT NULL
);
CREATE UNIQUE INDEX il_from ON imagelinks (il_to,il_from);

View file

@ -473,6 +473,8 @@ CREATE INDEX /*i*/ar_revid ON /*_*/archive (ar_rev_id);
CREATE TABLE /*_*/pagelinks (
-- Key to the page_id of the page containing the link.
pl_from int unsigned NOT NULL default 0,
-- Namespace for this page
pl_from_namespace int NOT NULL default 0,
-- Key to page_namespace/page_title of the target page.
-- The target page may or may not exist, and due to renames
@ -483,7 +485,8 @@ CREATE TABLE /*_*/pagelinks (
) /*$wgDBTableOptions*/;
CREATE UNIQUE INDEX /*i*/pl_from ON /*_*/pagelinks (pl_from,pl_namespace,pl_title);
CREATE UNIQUE INDEX /*i*/pl_namespace ON /*_*/pagelinks (pl_namespace,pl_title,pl_from);
CREATE INDEX /*i*/pl_namespace ON /*_*/pagelinks (pl_namespace,pl_title,pl_from);
CREATE INDEX /*i*/pl_backlinks_namespace ON /*_*/pagelinks (pl_namespace,pl_title,pl_from_namespace,pl_from);
--
@ -492,6 +495,8 @@ CREATE UNIQUE INDEX /*i*/pl_namespace ON /*_*/pagelinks (pl_namespace,pl_title,p
CREATE TABLE /*_*/templatelinks (
-- Key to the page_id of the page containing the link.
tl_from int unsigned NOT NULL default 0,
-- Namespace for this page
tl_from_namespace int NOT NULL default 0,
-- Key to page_namespace/page_title of the target page.
-- The target page may or may not exist, and due to renames
@ -502,7 +507,8 @@ CREATE TABLE /*_*/templatelinks (
) /*$wgDBTableOptions*/;
CREATE UNIQUE INDEX /*i*/tl_from ON /*_*/templatelinks (tl_from,tl_namespace,tl_title);
CREATE UNIQUE INDEX /*i*/tl_namespace ON /*_*/templatelinks (tl_namespace,tl_title,tl_from);
CREATE INDEX /*i*/tl_namespace ON /*_*/templatelinks (tl_namespace,tl_title,tl_from);
CREATE INDEX /*i*/tl_backlinks_namespace ON /*_*/templatelinks (tl_namespace,tl_title,tl_from_namespace,tl_from);
--
@ -513,6 +519,8 @@ CREATE UNIQUE INDEX /*i*/tl_namespace ON /*_*/templatelinks (tl_namespace,tl_tit
CREATE TABLE /*_*/imagelinks (
-- Key to page_id of the page containing the image / media link.
il_from int unsigned NOT NULL default 0,
-- Namespace for this page
il_from_namespace int NOT NULL default 0,
-- Filename of target image.
-- This is also the page_title of the file's description page;
@ -521,7 +529,8 @@ CREATE TABLE /*_*/imagelinks (
) /*$wgDBTableOptions*/;
CREATE UNIQUE INDEX /*i*/il_from ON /*_*/imagelinks (il_from,il_to);
CREATE UNIQUE INDEX /*i*/il_to ON /*_*/imagelinks (il_to,il_from);
CREATE INDEX /*i*/il_to ON /*_*/imagelinks (il_to,il_from);
CREATE INDEX /*i*/il_backlinks_namespace ON /*_*/imagelinks (il_to,il_from_namespace,il_from);
--