refreshLinks.php: allow refreshing by categories, tracking or not
Needed for selective updates of pages using a particular feature. Intended to be run in production, so needs to scale. Bug: T149723 Change-Id: If20fb1f91de8d4227def5b07d6d52b91161ed3fd
This commit is contained in:
parent
c06e055f4f
commit
13054a4c70
6 changed files with 239 additions and 104 deletions
|
|
@ -206,6 +206,8 @@ changes to languages because of Phabricator reports.
|
|||
* Article::doEditContent() was marked as deprecated, to be removed in 1.30
|
||||
or later.
|
||||
* ContentHandler::runLegacyHooks() was removed.
|
||||
* refreshLinks.php now can be limited to a particular category with --category=...
|
||||
or a tracking category with --tracking-category=...
|
||||
|
||||
== Compatibility ==
|
||||
|
||||
|
|
|
|||
|
|
@ -1459,6 +1459,7 @@ $wgAutoloadLocalClasses = [
|
|||
'TitlePrefixSearch' => __DIR__ . '/includes/PrefixSearch.php',
|
||||
'TitleValue' => __DIR__ . '/includes/title/TitleValue.php',
|
||||
'TrackBlobs' => __DIR__ . '/maintenance/storage/trackBlobs.php',
|
||||
'TrackingCategories' => __DIR__ . '/includes/TrackingCategories.php',
|
||||
'TraditionalImageGallery' => __DIR__ . '/includes/gallery/TraditionalImageGallery.php',
|
||||
'TransactionProfiler' => __DIR__ . '/includes/libs/rdbms/TransactionProfiler.php',
|
||||
'TransformParameterError' => __DIR__ . '/includes/media/MediaTransformOutput.php',
|
||||
|
|
|
|||
130
includes/TrackingCategories.php
Normal file
130
includes/TrackingCategories.php
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @ingroup Categories
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class performs some operations related to tracking categories, such as creating
|
||||
* a list of all such categories.
|
||||
*/
|
||||
class TrackingCategories {
|
||||
/** @var Config */
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Tracking categories that exist in core
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $coreTrackingCategories = [
|
||||
'index-category',
|
||||
'noindex-category',
|
||||
'duplicate-args-category',
|
||||
'expensive-parserfunction-category',
|
||||
'post-expand-template-argument-category',
|
||||
'post-expand-template-inclusion-category',
|
||||
'hidden-category-category',
|
||||
'broken-file-category',
|
||||
'node-count-exceeded-category',
|
||||
'expansion-depth-exceeded-category',
|
||||
'restricted-displaytitle-ignored',
|
||||
'deprecated-self-close-category',
|
||||
];
|
||||
|
||||
/**
|
||||
* @param Config $config
|
||||
*/
|
||||
public function __construct( Config $config ) {
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the global and extract title objects from the corresponding messages
|
||||
* @return array Array( 'msg' => Title, 'cats' => Title[] )
|
||||
*/
|
||||
public function getTrackingCategories() {
|
||||
$categories = array_merge(
|
||||
self::$coreTrackingCategories,
|
||||
ExtensionRegistry::getInstance()->getAttribute( 'TrackingCategories' ),
|
||||
$this->config->get( 'TrackingCategories' ) // deprecated
|
||||
);
|
||||
|
||||
// Only show magic link tracking categories if they are enabled
|
||||
$enableMagicLinks = $this->config->get( 'EnableMagicLinks' );
|
||||
if ( $enableMagicLinks['ISBN'] ) {
|
||||
$categories[] = 'magiclink-tracking-isbn';
|
||||
}
|
||||
if ( $enableMagicLinks['RFC'] ) {
|
||||
$categories[] = 'magiclink-tracking-rfc';
|
||||
}
|
||||
if ( $enableMagicLinks['PMID'] ) {
|
||||
$categories[] = 'magiclink-tracking-pmid';
|
||||
}
|
||||
|
||||
$trackingCategories = [];
|
||||
foreach ( $categories as $catMsg ) {
|
||||
/*
|
||||
* Check if the tracking category varies by namespace
|
||||
* Otherwise only pages in the current namespace will be displayed
|
||||
* If it does vary, show pages considering all namespaces
|
||||
*/
|
||||
$msgObj = wfMessage( $catMsg )->inContentLanguage();
|
||||
$allCats = [];
|
||||
$catMsgTitle = Title::makeTitleSafe( NS_MEDIAWIKI, $catMsg );
|
||||
if ( !$catMsgTitle ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match things like {{NAMESPACE}} and {{NAMESPACENUMBER}}.
|
||||
// False positives are ok, this is just an efficiency shortcut
|
||||
if ( strpos( $msgObj->plain(), '{{' ) !== false ) {
|
||||
$ns = MWNamespace::getValidNamespaces();
|
||||
foreach ( $ns as $namesp ) {
|
||||
$tempTitle = Title::makeTitleSafe( $namesp, $catMsg );
|
||||
if ( !$tempTitle ) {
|
||||
continue;
|
||||
}
|
||||
$catName = $msgObj->title( $tempTitle )->text();
|
||||
# Allow tracking categories to be disabled by setting them to "-"
|
||||
if ( $catName !== '-' ) {
|
||||
$catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
|
||||
if ( $catTitle ) {
|
||||
$allCats[] = $catTitle;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$catName = $msgObj->text();
|
||||
# Allow tracking categories to be disabled by setting them to "-"
|
||||
if ( $catName !== '-' ) {
|
||||
$catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
|
||||
if ( $catTitle ) {
|
||||
$allCats[] = $catTitle;
|
||||
}
|
||||
}
|
||||
}
|
||||
$trackingCategories[$catMsg] = [
|
||||
'cats' => $allCats,
|
||||
'msg' => $catMsgTitle,
|
||||
];
|
||||
}
|
||||
|
||||
return $trackingCategories;
|
||||
}
|
||||
}
|
||||
|
|
@ -696,6 +696,8 @@ class ParserOutput extends CacheTime {
|
|||
* to SpecialTrackingCategories::$coreTrackingCategories, and extensions
|
||||
* should add to "TrackingCategories" in their extension.json.
|
||||
*
|
||||
* @todo Migrate some code to TrackingCategories
|
||||
*
|
||||
* @param string $msg Message key
|
||||
* @param Title $title title of the page which is being tracked
|
||||
* @return bool Whether the addition was successful
|
||||
|
|
|
|||
|
|
@ -36,26 +36,6 @@ class SpecialTrackingCategories extends SpecialPage {
|
|||
parent::__construct( 'TrackingCategories' );
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracking categories that exist in core
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $coreTrackingCategories = [
|
||||
'index-category',
|
||||
'noindex-category',
|
||||
'duplicate-args-category',
|
||||
'expensive-parserfunction-category',
|
||||
'post-expand-template-argument-category',
|
||||
'post-expand-template-inclusion-category',
|
||||
'hidden-category-category',
|
||||
'broken-file-category',
|
||||
'node-count-exceeded-category',
|
||||
'expansion-depth-exceeded-category',
|
||||
'restricted-displaytitle-ignored',
|
||||
'deprecated-self-close-category',
|
||||
];
|
||||
|
||||
function execute( $par ) {
|
||||
$this->setHeaders();
|
||||
$this->outputHeader();
|
||||
|
|
@ -76,10 +56,11 @@ class SpecialTrackingCategories extends SpecialPage {
|
|||
</tr></thead>"
|
||||
);
|
||||
|
||||
$trackingCategories = $this->prepareTrackingCategoriesData();
|
||||
$trackingCategories = new TrackingCategories( $this->getConfig() );
|
||||
$categoryList = $trackingCategories->getTrackingCategories();
|
||||
|
||||
$batch = new LinkBatch();
|
||||
foreach ( $trackingCategories as $catMsg => $data ) {
|
||||
foreach ( $categoryList as $catMsg => $data ) {
|
||||
$batch->addObj( $data['msg'] );
|
||||
foreach ( $data['cats'] as $catTitle ) {
|
||||
$batch->addObj( $catTitle );
|
||||
|
|
@ -87,11 +68,11 @@ class SpecialTrackingCategories extends SpecialPage {
|
|||
}
|
||||
$batch->execute();
|
||||
|
||||
Hooks::run( 'SpecialTrackingCategories::preprocess', [ $this, $trackingCategories ] );
|
||||
Hooks::run( 'SpecialTrackingCategories::preprocess', [ $this, $categoryList ] );
|
||||
|
||||
$linkRenderer = $this->getLinkRenderer();
|
||||
|
||||
foreach ( $trackingCategories as $catMsg => $data ) {
|
||||
foreach ( $categoryList as $catMsg => $data ) {
|
||||
$allMsgs = [];
|
||||
$catDesc = $catMsg . '-desc';
|
||||
|
||||
|
|
@ -143,80 +124,6 @@ class SpecialTrackingCategories extends SpecialPage {
|
|||
$this->getOutput()->addHTML( Html::closeElement( 'table' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the global and extract title objects from the corresponding messages
|
||||
* @return array Array( 'msg' => Title, 'cats' => Title[] )
|
||||
*/
|
||||
private function prepareTrackingCategoriesData() {
|
||||
$categories = array_merge(
|
||||
self::$coreTrackingCategories,
|
||||
ExtensionRegistry::getInstance()->getAttribute( 'TrackingCategories' ),
|
||||
$this->getConfig()->get( 'TrackingCategories' ) // deprecated
|
||||
);
|
||||
|
||||
// Only show magic link tracking categories if they are enabled
|
||||
$enableMagicLinks = $this->getConfig()->get( 'EnableMagicLinks' );
|
||||
if ( $enableMagicLinks['ISBN'] ) {
|
||||
$categories[] = 'magiclink-tracking-isbn';
|
||||
}
|
||||
if ( $enableMagicLinks['RFC'] ) {
|
||||
$categories[] = 'magiclink-tracking-rfc';
|
||||
}
|
||||
if ( $enableMagicLinks['PMID'] ) {
|
||||
$categories[] = 'magiclink-tracking-pmid';
|
||||
}
|
||||
|
||||
$trackingCategories = [];
|
||||
foreach ( $categories as $catMsg ) {
|
||||
/*
|
||||
* Check if the tracking category varies by namespace
|
||||
* Otherwise only pages in the current namespace will be displayed
|
||||
* If it does vary, show pages considering all namespaces
|
||||
*/
|
||||
$msgObj = $this->msg( $catMsg )->inContentLanguage();
|
||||
$allCats = [];
|
||||
$catMsgTitle = Title::makeTitleSafe( NS_MEDIAWIKI, $catMsg );
|
||||
if ( !$catMsgTitle ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match things like {{NAMESPACE}} and {{NAMESPACENUMBER}}.
|
||||
// False positives are ok, this is just an efficiency shortcut
|
||||
if ( strpos( $msgObj->plain(), '{{' ) !== false ) {
|
||||
$ns = MWNamespace::getValidNamespaces();
|
||||
foreach ( $ns as $namesp ) {
|
||||
$tempTitle = Title::makeTitleSafe( $namesp, $catMsg );
|
||||
if ( !$tempTitle ) {
|
||||
continue;
|
||||
}
|
||||
$catName = $msgObj->title( $tempTitle )->text();
|
||||
# Allow tracking categories to be disabled by setting them to "-"
|
||||
if ( $catName !== '-' ) {
|
||||
$catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
|
||||
if ( $catTitle ) {
|
||||
$allCats[] = $catTitle;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$catName = $msgObj->text();
|
||||
# Allow tracking categories to be disabled by setting them to "-"
|
||||
if ( $catName !== '-' ) {
|
||||
$catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
|
||||
if ( $catTitle ) {
|
||||
$allCats[] = $catTitle;
|
||||
}
|
||||
}
|
||||
}
|
||||
$trackingCategories[$catMsg] = [
|
||||
'cats' => $allCats,
|
||||
'msg' => $catMsgTitle,
|
||||
];
|
||||
}
|
||||
|
||||
return $trackingCategories;
|
||||
}
|
||||
|
||||
protected function getGroupName() {
|
||||
return 'pages';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ require_once __DIR__ . '/Maintenance.php';
|
|||
* @ingroup Maintenance
|
||||
*/
|
||||
class RefreshLinks extends Maintenance {
|
||||
const REPORTING_INTERVAL = 100;
|
||||
|
||||
/** @var int|bool */
|
||||
protected $namespace = false;
|
||||
|
||||
|
|
@ -43,6 +45,8 @@ class RefreshLinks extends Maintenance {
|
|||
$this->addOption( 'dfn-chunk-size', 'Maximum number of existent IDs to check per ' .
|
||||
'query, default 100000', false, true );
|
||||
$this->addOption( 'namespace', 'Only fix pages in this namespace', false, true );
|
||||
$this->addOption( 'category', 'Only fix pages in this category', false, true );
|
||||
$this->addOption( 'tracking-category', 'Only fix pages in this tracking category', false, true );
|
||||
$this->addArg( 'start', 'Page_id to start from, default 1', false );
|
||||
$this->setBatchSize( 100 );
|
||||
}
|
||||
|
|
@ -61,7 +65,15 @@ class RefreshLinks extends Maintenance {
|
|||
} else {
|
||||
$this->namespace = (int)$ns;
|
||||
}
|
||||
if ( !$this->hasOption( 'dfn-only' ) ) {
|
||||
if ( ( $category = $this->getOption( 'category', false ) ) !== false ) {
|
||||
$title = Title::makeTitleSafe( NS_CATEGORY, $category );
|
||||
if ( !$title ) {
|
||||
$this->error( "'$category' is an invalid category name!\n", true );
|
||||
}
|
||||
$this->refreshCategory( $category );
|
||||
} elseif ( ( $category = $this->getOption( 'tracking-category', false ) ) !== false ) {
|
||||
$this->refreshTrackingCategory( $category );
|
||||
} elseif ( !$this->hasOption( 'dfn-only' ) ) {
|
||||
$new = $this->getOption( 'new-only', false );
|
||||
$redir = $this->getOption( 'redirects-only', false );
|
||||
$oldRedir = $this->getOption( 'old-redirects-only', false );
|
||||
|
|
@ -89,7 +101,6 @@ class RefreshLinks extends Maintenance {
|
|||
private function doRefreshLinks( $start, $newOnly = false,
|
||||
$end = null, $redirectsOnly = false, $oldRedirectsOnly = false
|
||||
) {
|
||||
$reportingInterval = 100;
|
||||
$dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
|
||||
|
||||
if ( $start === null ) {
|
||||
|
|
@ -124,7 +135,7 @@ class RefreshLinks extends Maintenance {
|
|||
$i = 0;
|
||||
|
||||
foreach ( $res as $row ) {
|
||||
if ( !( ++$i % $reportingInterval ) ) {
|
||||
if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
|
||||
$this->output( "$i\n" );
|
||||
wfWaitForSlaves();
|
||||
}
|
||||
|
|
@ -145,7 +156,7 @@ class RefreshLinks extends Maintenance {
|
|||
|
||||
$i = 0;
|
||||
foreach ( $res as $row ) {
|
||||
if ( !( ++$i % $reportingInterval ) ) {
|
||||
if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
|
||||
$this->output( "$i\n" );
|
||||
wfWaitForSlaves();
|
||||
}
|
||||
|
|
@ -166,7 +177,7 @@ class RefreshLinks extends Maintenance {
|
|||
|
||||
for ( $id = $start; $id <= $end; $id++ ) {
|
||||
|
||||
if ( !( $id % $reportingInterval ) ) {
|
||||
if ( !( $id % self::REPORTING_INTERVAL ) ) {
|
||||
$this->output( "$id\n" );
|
||||
wfWaitForSlaves();
|
||||
}
|
||||
|
|
@ -179,7 +190,7 @@ class RefreshLinks extends Maintenance {
|
|||
|
||||
for ( $id = $start; $id <= $end; $id++ ) {
|
||||
|
||||
if ( !( $id % $reportingInterval ) ) {
|
||||
if ( !( $id % self::REPORTING_INTERVAL ) ) {
|
||||
$this->output( "$id\n" );
|
||||
wfWaitForSlaves();
|
||||
}
|
||||
|
|
@ -379,6 +390,7 @@ class RefreshLinks extends Maintenance {
|
|||
* @param string $var Field name
|
||||
* @param mixed $start First value to include or null
|
||||
* @param mixed $end Last value to include or null
|
||||
* @return string
|
||||
*/
|
||||
private static function intervalCond( IDatabase $db, $var, $start, $end ) {
|
||||
if ( $start === null && $end === null ) {
|
||||
|
|
@ -391,6 +403,87 @@ class RefreshLinks extends Maintenance {
|
|||
return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refershes links for pages in a tracking category
|
||||
*
|
||||
* @param string $category Category key
|
||||
*/
|
||||
private function refreshTrackingCategory( $category ) {
|
||||
$cats = $this->getPossibleCategories( $category );
|
||||
|
||||
if ( !$cats ) {
|
||||
$this->error( "Tracking category '$category' is disabled\n" );
|
||||
// Output to stderr but don't bail out,
|
||||
}
|
||||
|
||||
foreach ( $cats as $cat ) {
|
||||
$this->refreshCategory( $cat );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refreshes links to a category
|
||||
*
|
||||
* @param Title $category
|
||||
*/
|
||||
private function refreshCategory( Title $category ) {
|
||||
$this->output( "Refreshing pages in category '{$category->getText()}'...\n" );
|
||||
|
||||
$dbr = $this->getDB( DB_REPLICA );
|
||||
$conds = [
|
||||
'page_id=cl_from',
|
||||
'cl_to' => $category->getDBkey(),
|
||||
];
|
||||
if ( $this->namespace !== false ) {
|
||||
$conds['page_namespace'] = $this->namespace;
|
||||
}
|
||||
|
||||
$i = 0;
|
||||
$timestamp = '';
|
||||
$lastId = 0;
|
||||
do {
|
||||
$finalConds = $conds;
|
||||
$timestamp = $dbr->addQuotes( $timestamp );
|
||||
$finalConds []=
|
||||
"(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
|
||||
$res = $dbr->select( [ 'page', 'categorylinks' ],
|
||||
[ 'page_id', 'cl_timestamp' ],
|
||||
$finalConds,
|
||||
__METHOD__,
|
||||
[
|
||||
'ORDER BY' => [ 'cl_timestamp', 'cl_from' ],
|
||||
'LIMIT' => $this->mBatchSize,
|
||||
]
|
||||
);
|
||||
|
||||
foreach ( $res as $row ) {
|
||||
if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
|
||||
$this->output( "$i\n" );
|
||||
wfWaitForSlaves();
|
||||
}
|
||||
$lastId = $row->page_id;
|
||||
$timestamp = $row->cl_timestamp;
|
||||
self::fixLinksFromArticle( $row->page_id );
|
||||
}
|
||||
|
||||
} while ( $res->numRows() == $this->mBatchSize );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of possible categories for a given tracking category key
|
||||
*
|
||||
* @param string $categoryKey
|
||||
* @return Title[]
|
||||
*/
|
||||
private function getPossibleCategories( $categoryKey ) {
|
||||
$trackingCategories = new TrackingCategories( $this->getConfig() );
|
||||
$cats = $trackingCategories->getTrackingCategories();
|
||||
if ( isset( $cats[$categoryKey] ) ) {
|
||||
return $cats[$categoryKey]['cats'];
|
||||
}
|
||||
$this->error( "Unknown tracking category {$categoryKey}\n", true );
|
||||
}
|
||||
}
|
||||
|
||||
$maintClass = 'RefreshLinks';
|
||||
|
|
|
|||
Loading…
Reference in a new issue