wiki.techinc.nl/maintenance/prewarmParsoidParserCache.php

158 lines
4.4 KiB
PHP
Raw Normal View History

<?php
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageLookup;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use Wikimedia\Rdbms\SelectQueryBuilder;
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script for populating parser cache with parsoid output.
*
* @since 1.41
*
* @license GPL-2.0-or-later
* @author Richika Rana
*/
class PrewarmParsoidParserCache extends Maintenance {
private int $forceParse = 0;
private ParsoidOutputAccess $parsoidOutputAccess;
private PageLookup $pageLookup;
private RevisionLookup $revisionLookup;
public function __construct() {
parent::__construct();
$this->addDescription(
'Populate parser cache with parsoid output. By default, script attempt to run' .
'for supported content model pages (in a specified batch if provided)'
);
$this->addOption(
'force',
'Re-parse pages even if the cached entry seems up to date',
false,
false
);
$this->addOption( 'start-from', 'Start from this page ID', false, true );
$this->setBatchSize( 100 );
}
private function getPageLookup(): PageLookup {
$this->pageLookup = MediaWikiServices::getInstance()->getPageStore();
return $this->pageLookup;
}
private function getRevisionLookup(): RevisionLookup {
$this->revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
return $this->revisionLookup;
}
private function getParsoidOutputAccess(): ParsoidOutputAccess {
$this->parsoidOutputAccess = MediaWikiServices::getInstance()->getParsoidOutputAccess();
return $this->parsoidOutputAccess;
}
private function getQueryBuilder(): SelectQueryBuilder {
$dbr = $this->getDB( DB_REPLICA );
return $dbr->newSelectQueryBuilder()
->select( [ 'page_id' ] )
->from( 'page' )
->caller( __METHOD__ )
->orderBy( 'page_id', SelectQueryBuilder::SORT_ASC );
}
private function parse(
PageIdentity $page,
RevisionRecord $revision
) {
return $this->parsoidOutputAccess->getParserOutput(
$page,
ParserOptions::newFromAnon(),
$revision,
$this->forceParse | ParsoidOutputAccess::OPT_LOG_LINT_DATA
);
}
/**
* Populate parser cache with parsoid output.
*
* @return bool
*/
public function execute() {
$force = $this->getOption( 'force' );
$startFrom = $this->getOption( 'start-from' );
if ( $force !== null ) {
// If --force is supplied, for a parse for supported pages or supported
// pages in the specified batch.
$this->forceParse = ParsoidOutputAccess::OPT_FORCE_PARSE;
}
$startFrom = (int)$startFrom;
$this->output( "\nWarming parsoid parser cache with Parsoid output...\n\n" );
while ( true ) {
$query = $this->getQueryBuilder()->where( 'page_id >= ' . $startFrom )
->limit( $this->getBatchSize() );
$result = $query->fetchResultSet();
if ( !$result->numRows() ) {
break;
}
$currentBatch = $startFrom + ( $this->getBatchSize() - 1 );
$this->output( "\n\nBatch: $startFrom - $currentBatch\n----\n" );
// Look through pages by pageId and populate the parserCache
foreach ( $result as $row ) {
$page = $this->getPageLookup()->getPageById( $row->page_id );
$startFrom = ( (int)$row->page_id + 1 );
if ( $page === null ) {
$this->output( "\n[Skipped] Page ID: $row->page_id not found.\n" );
continue;
}
$latestRevision = $page->getLatest();
$revision = $this->getRevisionLookup()->getRevisionById( $latestRevision );
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
// POA will write a dummy output to PC, but we don't want that here. Just skip!
if ( !$this->getParsoidOutputAccess()->supportsContentModel( $mainSlot->getModel() ) ) {
$this->output(
'[Skipped] Content model "' .
$mainSlot->getModel() .
"\" not supported for page ID: $row->page_id.\n"
);
continue;
}
$status = $this->parse( $page, $revision );
if ( !$status->isOK() ) {
$this->output(
__METHOD__ .
": Error parsing page ID: $row->page_id or writing to parser cache\n"
);
continue;
}
$this->output( "[Done] Page ID: $row->page_id ✔️\n" );
}
$this->waitForReplication();
}
$this->output( "\nDone pre-warming parsoid parser cache...\n" );
return true;
}
}
$maintClass = PrewarmParsoidParserCache::class;
require_once RUN_MAINTENANCE_IF_MAIN;