This does not include use of MediaWiki\Maintenance\Maintenance, assuming the maintenance scripts going into the same namespace Change-Id: I488f95b537ce86eb5e463be7bce3653610dd13d9
147 lines
4.1 KiB
PHP
147 lines
4.1 KiB
PHP
<?php
|
|
// phpcs:disable MediaWiki.Files.ClassMatchesFilename.NotMatch
|
|
use MediaWiki\Content\TextContent;
|
|
use MediaWiki\Language\Language;
|
|
use MediaWiki\Page\WikiPageFactory;
|
|
use MediaWiki\Revision\RevisionRecord;
|
|
use MediaWiki\Title\Title;
|
|
use MediaWiki\WikiMap\WikiMap;
|
|
use Wikimedia\Rdbms\IExpression;
|
|
use Wikimedia\Rdbms\LikeValue;
|
|
|
|
// @codeCoverageIgnoreStart
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
// @codeCoverageIgnoreEnd
|
|
|
|
/**
|
|
* Search pages for a given regex
|
|
*
|
|
* @ingroup Maintenance
|
|
*/
|
|
class GrepPages extends Maintenance {
|
|
/** @var Language */
|
|
private $contLang;
|
|
|
|
/** @var WikiPageFactory */
|
|
private $wikiPageFactory;
|
|
|
|
public function __construct() {
|
|
parent::__construct();
|
|
$this->addDescription( 'Search the source text of pages for lines matching ' .
|
|
'a given regex, and print the lines.' );
|
|
$this->addOption( 'prefix',
|
|
'Title prefix. Can be specified more than once. ' .
|
|
'Use e.g. --prefix=Talk: to search an entire namespace.',
|
|
false, true, false, true );
|
|
$this->addOption( 'show-wiki', 'Add the wiki ID to the output' );
|
|
$this->addOption( 'pages-with-matches',
|
|
'Suppress normal output; instead print the title of each page ' .
|
|
'from which output would normally have been printed.',
|
|
false, false, 'l' );
|
|
$this->addArg( 'regex', 'The regex to search for' );
|
|
}
|
|
|
|
private function init() {
|
|
$services = $this->getServiceContainer();
|
|
$this->contLang = $services->getContentLanguage();
|
|
$this->wikiPageFactory = $services->getWikiPageFactory();
|
|
}
|
|
|
|
public function execute() {
|
|
$this->init();
|
|
|
|
$showWiki = $this->getOption( 'show-wiki' );
|
|
$wikiId = WikiMap::getCurrentWikiId();
|
|
$prefix = $this->getOption( 'prefix' );
|
|
$regex = $this->getArg( 0 );
|
|
$titleOnly = $this->hasOption( 'pages-with-matches' );
|
|
|
|
if ( ( $regex[0] ?? '' ) === '/' ) {
|
|
$delimRegex = $regex;
|
|
} else {
|
|
$delimRegex = '{' . $regex . '}';
|
|
}
|
|
|
|
foreach ( $this->findPages( $prefix ) as $page ) {
|
|
$content = $page->getContent( RevisionRecord::RAW );
|
|
$titleText = $page->getTitle()->getPrefixedDBkey();
|
|
if ( !$content ) {
|
|
$this->error( "Page has no content: $titleText" );
|
|
continue;
|
|
}
|
|
if ( !$content instanceof TextContent ) {
|
|
$this->error( "Page has a non-text content model: $titleText" );
|
|
continue;
|
|
}
|
|
|
|
$text = $content->getText();
|
|
|
|
if ( $titleOnly ) {
|
|
if ( preg_match( $delimRegex, $text ) ) {
|
|
if ( $showWiki ) {
|
|
echo "$wikiId\t$titleText\n";
|
|
} else {
|
|
echo "$titleText\n";
|
|
}
|
|
}
|
|
} else {
|
|
foreach ( StringUtils::explode( "\n", $text ) as $lineNum => $line ) {
|
|
$lineNum++;
|
|
if ( preg_match( $delimRegex, $line ) ) {
|
|
if ( $showWiki ) {
|
|
echo "$wikiId\t$titleText:$lineNum:$line\n";
|
|
} else {
|
|
echo "$titleText:$lineNum:$line\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public function findPages( $prefixes = null ) {
|
|
$dbr = $this->getReplicaDB();
|
|
$orConds = [];
|
|
if ( $prefixes !== null ) {
|
|
foreach ( $prefixes as $prefix ) {
|
|
$colonPos = strpos( $prefix, ':' );
|
|
if ( $colonPos !== false ) {
|
|
$ns = $this->contLang->getNsIndex( substr( $prefix, 0, $colonPos ) );
|
|
$prefixDBkey = substr( $prefix, $colonPos + 1 );
|
|
} else {
|
|
$ns = NS_MAIN;
|
|
$prefixDBkey = $prefix;
|
|
}
|
|
$prefixExpr = $dbr->expr( 'page_namespace', '=', $ns );
|
|
if ( $prefixDBkey !== '' ) {
|
|
$prefixExpr = $prefixExpr->and(
|
|
'page_title',
|
|
IExpression::LIKE,
|
|
new LikeValue( $prefixDBkey, $dbr->anyString() )
|
|
);
|
|
}
|
|
$orConds[] = $prefixExpr;
|
|
}
|
|
}
|
|
$lastId = 0;
|
|
do {
|
|
$res = $dbr->newSelectQueryBuilder()
|
|
->queryInfo( WikiPage::getQueryInfo() )
|
|
->where( $orConds ? $dbr->orExpr( $orConds ) : [] )
|
|
->andWhere( $dbr->expr( 'page_id', '>', $lastId ) )
|
|
->limit( 200 )
|
|
->caller( __METHOD__ )
|
|
->fetchResultSet();
|
|
foreach ( $res as $row ) {
|
|
$title = Title::newFromRow( $row );
|
|
yield $this->wikiPageFactory->newFromTitle( $title );
|
|
$lastId = $row->page_id;
|
|
}
|
|
} while ( $res->numRows() );
|
|
}
|
|
}
|
|
|
|
// @codeCoverageIgnoreStart
|
|
$maintClass = GrepPages::class;
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|
|
// @codeCoverageIgnoreEnd
|