And start using them instead of wfGetDB(), LB/LBF connection methods or worse, $this->getDB(). $this->getDB() reuses the database object regardless of whether you're calling a replica or primary, leading to returning a replica on a primary and other way around. Bug: T330641 Change-Id: I9e2cf85ca277022284fc26b9f37db57bd12aaa81
180 lines
5.3 KiB
PHP
180 lines
5.3 KiB
PHP
<?php
|
|
/**
|
|
* Send purge requests for listed pages to CDN
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Maintenance
|
|
*/
|
|
|
|
use MediaWiki\Title\Title;
|
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
|
|
/**
|
|
* Maintenance script that sends purge requests for listed pages to CDN.
|
|
*
|
|
* @ingroup Maintenance
|
|
*/
|
|
class PurgeList extends Maintenance {
|
|
/** @var string|null */
|
|
private $namespaceId;
|
|
/** @var bool */
|
|
private $allNamespaces;
|
|
/** @var bool */
|
|
private $doDbTouch;
|
|
/** @var int */
|
|
private $delay;
|
|
|
|
public function __construct() {
|
|
parent::__construct();
|
|
$this->addDescription( "Send purge requests for listed pages to CDN.\n"
|
|
. "By default this expects a list of URLs or page names from STDIN. "
|
|
. "To query the database for input, use --namespace or --all-namespaces instead."
|
|
);
|
|
$this->addOption( 'namespace', 'Purge pages with this namespace number', false, true );
|
|
$this->addOption( 'all-namespaces', 'Purge pages in all namespaces', false, false );
|
|
$this->addOption( 'db-touch',
|
|
"Update the page.page_touched database field.\n"
|
|
. "This is only considered when purging by title, not when purging by namespace or URL.",
|
|
false,
|
|
false
|
|
);
|
|
$this->addOption( 'delay', 'Number of seconds to delay between each purge', false, true );
|
|
$this->addOption( 'verbose', 'Show more output', false, false, 'v' );
|
|
$this->setBatchSize( 100 );
|
|
}
|
|
|
|
public function execute() {
|
|
$this->namespaceId = $this->getOption( 'namespace' );
|
|
$this->allNamespaces = $this->hasOption( 'all-namespaces' );
|
|
$this->doDbTouch = $this->hasOption( 'db-touch' );
|
|
$this->delay = intval( $this->getOption( 'delay', '0' ) );
|
|
|
|
if ( $this->allNamespaces ) {
|
|
$this->purgeNamespace( false );
|
|
} elseif ( $this->namespaceId !== null ) {
|
|
$this->purgeNamespace( intval( $this->namespaceId ) );
|
|
} else {
|
|
$this->doPurge();
|
|
}
|
|
$this->output( "Done!\n" );
|
|
}
|
|
|
|
/**
|
|
* Purge URL coming from stdin
|
|
*/
|
|
private function doPurge() {
|
|
$stdin = $this->getStdin();
|
|
$urls = [];
|
|
$htmlCacheUpdater = $this->getServiceContainer()->getHtmlCacheUpdater();
|
|
|
|
while ( !feof( $stdin ) ) {
|
|
$page = trim( fgets( $stdin ) );
|
|
if ( preg_match( '%^https?://%', $page ) ) {
|
|
$urls[] = $page;
|
|
} elseif ( $page !== '' ) {
|
|
$title = Title::newFromText( $page );
|
|
if ( $title ) {
|
|
$newUrls = $htmlCacheUpdater->getUrls( $title );
|
|
|
|
foreach ( $newUrls as $url ) {
|
|
$this->output( "$url\n" );
|
|
}
|
|
|
|
$urls = array_merge( $urls, $newUrls );
|
|
|
|
if ( $this->doDbTouch ) {
|
|
$title->invalidateCache();
|
|
}
|
|
} else {
|
|
$this->output( "(Invalid title '$page')\n" );
|
|
}
|
|
}
|
|
}
|
|
$this->output( "Purging " . count( $urls ) . " urls\n" );
|
|
$this->sendPurgeRequest( $urls );
|
|
}
|
|
|
|
/**
|
|
* Purge a namespace or all pages
|
|
*
|
|
* @param int|bool $namespace
|
|
*/
|
|
private function purgeNamespace( $namespace = false ) {
|
|
if ( $this->doDbTouch ) {
|
|
// NOTE: If support for this is added in the future,
|
|
// it MUST NOT be allowed when $wgMiserMode is enabled.
|
|
// Change this to a check and error about instead! (T263957)
|
|
$this->fatalError( 'The --db-touch option is not supported when purging by namespace.' );
|
|
}
|
|
|
|
$dbr = $this->getReplicaDB();
|
|
$htmlCacheUpdater = $this->getServiceContainer()->getHtmlCacheUpdater();
|
|
$startId = 0;
|
|
if ( $namespace === false ) {
|
|
$conds = [];
|
|
} else {
|
|
$conds = [ 'page_namespace' => $namespace ];
|
|
}
|
|
while ( true ) {
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
->select( [ 'page_id', 'page_namespace', 'page_title' ] )
|
|
->from( 'page' )
|
|
->where( $conds )
|
|
->andWhere( $dbr->expr( 'page_id', '>', $startId ) )
|
|
->orderBy( 'page_id' )
|
|
->limit( $this->getBatchSize() )
|
|
->caller( __METHOD__ )->fetchResultSet();
|
|
if ( !$res->numRows() ) {
|
|
break;
|
|
}
|
|
$urls = [];
|
|
foreach ( $res as $row ) {
|
|
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
|
|
$urls = array_merge( $urls, $htmlCacheUpdater->getUrls( $title ) );
|
|
$startId = $row->page_id;
|
|
}
|
|
$this->sendPurgeRequest( $urls );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper to purge an array of $urls
|
|
* @param array $urls List of URLS to purge from CDNs
|
|
*/
|
|
private function sendPurgeRequest( $urls ) {
|
|
$hcu = $this->getServiceContainer()->getHtmlCacheUpdater();
|
|
if ( $this->delay > 0 ) {
|
|
foreach ( $urls as $url ) {
|
|
if ( $this->hasOption( 'verbose' ) ) {
|
|
$this->output( $url . "\n" );
|
|
}
|
|
$hcu->purgeUrls( $url, $hcu::PURGE_NAIVE );
|
|
sleep( $this->delay );
|
|
}
|
|
} else {
|
|
if ( $this->hasOption( 'verbose' ) ) {
|
|
$this->output( implode( "\n", $urls ) . "\n" );
|
|
}
|
|
$hcu->purgeUrls( $urls, $hcu::PURGE_NAIVE );
|
|
}
|
|
}
|
|
}
|
|
|
|
$maintClass = PurgeList::class;
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|