addDescription( 'Populate parser cache with parsoid output. By default, script attempt to run' . 'for supported content model pages (in a specified batch if provided)' ); $this->addOption( 'force', 'Re-parse pages even if the cached entry seems up to date', false, false ); $this->addOption( 'start-from', 'Start from this page ID', false, true ); $this->setBatchSize( 100 ); } private function getPageLookup(): PageLookup { $this->pageLookup = MediaWikiServices::getInstance()->getPageStore(); return $this->pageLookup; } private function getRevisionLookup(): RevisionLookup { $this->revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup(); return $this->revisionLookup; } private function getParsoidOutputAccess(): ParsoidOutputAccess { $this->parsoidOutputAccess = MediaWikiServices::getInstance()->getParsoidOutputAccess(); return $this->parsoidOutputAccess; } private function getQueryBuilder(): SelectQueryBuilder { $dbr = $this->getDB( DB_REPLICA ); return $dbr->newSelectQueryBuilder() ->select( [ 'page_id' ] ) ->from( 'page' ) ->caller( __METHOD__ ) ->orderBy( 'page_id', SelectQueryBuilder::SORT_ASC ); } private function parse( PageIdentity $page, RevisionRecord $revision ) { return $this->parsoidOutputAccess->getParserOutput( $page, ParserOptions::newFromAnon(), $revision, $this->forceParse | ParsoidOutputAccess::OPT_LOG_LINT_DATA ); } /** * Populate parser cache with parsoid output. * * @return bool */ public function execute() { $force = $this->getOption( 'force' ); $startFrom = $this->getOption( 'start-from' ); if ( $force !== null ) { // If --force is supplied, for a parse for supported pages or supported // pages in the specified batch. $this->forceParse = ParsoidOutputAccess::OPT_FORCE_PARSE; } $startFrom = (int)$startFrom; $this->output( "\nWarming parsoid parser cache with Parsoid output...\n\n" ); while ( true ) { $query = $this->getQueryBuilder()->where( 'page_id >= ' . $startFrom ) ->limit( $this->getBatchSize() ); $result = $query->fetchResultSet(); if ( !$result->numRows() ) { break; } $currentBatch = $startFrom + ( $this->getBatchSize() - 1 ); $this->output( "\n\nBatch: $startFrom - $currentBatch\n----\n" ); // Look through pages by pageId and populate the parserCache foreach ( $result as $row ) { $page = $this->getPageLookup()->getPageById( $row->page_id ); $startFrom = ( (int)$row->page_id + 1 ); if ( $page === null ) { $this->output( "\n[Skipped] Page ID: $row->page_id not found.\n" ); continue; } $latestRevision = $page->getLatest(); $revision = $this->getRevisionLookup()->getRevisionById( $latestRevision ); $mainSlot = $revision->getSlot( SlotRecord::MAIN ); // POA will write a dummy output to PC, but we don't want that here. Just skip! if ( !$this->getParsoidOutputAccess()->supportsContentModel( $mainSlot->getModel() ) ) { $this->output( '[Skipped] Content model "' . $mainSlot->getModel() . "\" not supported for page ID: $row->page_id.\n" ); continue; } $status = $this->parse( $page, $revision ); if ( !$status->isOK() ) { $this->output( __METHOD__ . ": Error parsing page ID: $row->page_id or writing to parser cache\n" ); continue; } $this->output( "[Done] Page ID: $row->page_id ✔️\n" ); } $this->waitForReplication(); } $this->output( "\nDone pre-warming parsoid parser cache...\n" ); return true; } } $maintClass = PrewarmParsoidParserCache::class; require_once RUN_MAINTENANCE_IF_MAIN;