assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->config = $config; $this->repoGroup = $repoGroup; $this->badFileLookup = $badFileLookup; $this->hookContainer = $hookContainer; $this->contentTransformer = $contentTransformer; $this->readOnlyMode = $readOnlyMode; $this->linkBatchFactory = $linkBatchFactory; $this->hookRunner = new HookRunner( $hookContainer ); $this->parserFactory = $parserFactory; $this->previousPageConfig = null; // ensure we initialize parser options } /** * @param IPageConfig $pageConfig * @param File $file * @param array $hp * @return array */ private function makeTransformOptions( IPageConfig $pageConfig, $file, array $hp ): array { // Validate the input parameters like Parser::makeImage() $handler = $file->getHandler(); if ( !$handler ) { return []; // will get iconThumb() } foreach ( $hp as $name => $value ) { if ( !$handler->validateParam( $name, $value ) ) { unset( $hp[$name] ); } } // This part is similar to Linker::makeImageLink(). If there is no width, // set one based on the source file size. $page = $hp['page'] ?? 0; if ( !isset( $hp['width'] ) ) { if ( isset( $hp['height'] ) && $file->isVectorized() ) { // If it's a vector image, and user only specifies height // we don't want it to be limited by its "normal" width. $hp['width'] = $this->config->get( MainConfigNames::SVGMaxSize ); } else { $hp['width'] = $file->getWidth( $page ); } // We don't need to fill in a default thumbnail width here, since // that is done by Parsoid. Parsoid always sets the width parameter // for thumbnails. } // Parser::makeImage() always sets this $hp['targetlang'] = LanguageCode::bcp47ToInternal( $pageConfig->getPageLanguageBcp47() ); return $hp; } /** @inheritDoc */ public function getPageInfo( $pageConfigOrTitle, array $titles ): array { if ( $pageConfigOrTitle instanceof IPageConfig ) { $context_title = Title::newFromLinkTarget( $pageConfigOrTitle->getLinkTarget() ); } elseif ( is_string( $pageConfigOrTitle ) ) { // Temporary, deprecated. $context_title = Title::newFromTextThrow( $pageConfigOrTitle ); } elseif ( $pageConfigOrTitle instanceof ParsoidLinkTarget ) { $context_title = Title::newFromLinkTarget( $pageConfigOrTitle ); } else { throw new UnreachableException( "Bad type for argument 1" ); } $titleObjs = []; $pagemap = []; $classes = []; $ret = []; foreach ( $titles as $name ) { $t = Title::newFromText( $name ); // Filter out invalid titles. Title::newFromText in core (not our bespoke // version in src/Utils/Title.php) can return null for invalid titles. if ( !$t ) { // FIXME: This is a bandaid to patch up the fact that Env::makeTitle treats // this as a valid title, but Title::newFromText treats it as invalid. // T237535 // This matches what ApiQuery::outputGeneralPageInfo() would // return for an invalid title. $ret[$name] = [ 'pageId' => -1, 'revId' => -1, 'invalid' => true, 'invalidreason' => 'The requested page title is invalid', ]; } else { $titleObjs[$name] = $t; } } $linkBatch = $this->linkBatchFactory->newLinkBatch( $titleObjs ); $linkBatch->setCaller( __METHOD__ ); $linkBatch->execute(); foreach ( $titleObjs as $obj ) { $pdbk = $obj->getPrefixedDBkey(); $pagemap[$obj->getArticleID()] = $pdbk; $classes[$pdbk] = $obj->isRedirect() ? 'mw-redirect' : ''; } $this->hookRunner->onGetLinkColours( # $classes is passed by reference and mutated $pagemap, $classes, $context_title ); foreach ( $titleObjs as $name => $obj ) { /** @var Title $obj */ $pdbk = $obj->getPrefixedDBkey(); $c = preg_split( '/\s+/', $classes[$pdbk] ?? '', -1, PREG_SPLIT_NO_EMPTY ); $ret[$name] = [ 'pageId' => $obj->getArticleID(), 'revId' => $obj->getLatestRevID(), 'missing' => !$obj->exists(), 'known' => $obj->isKnown(), 'redirect' => $obj->isRedirect(), 'linkclasses' => $c, # See ApiQueryInfo::getLinkClasses() in core ]; } return $ret; } /** @inheritDoc */ public function getFileInfo( IPageConfig $pageConfig, array $files ): array { $page = Title::newFromLinkTarget( $pageConfig->getLinkTarget() ); $keys = []; foreach ( $files as $f ) { $keys[] = $f[0]; } $fileObjs = $this->repoGroup->findFiles( $keys ); $ret = []; foreach ( $files as $f ) { $filename = $f[0]; $dims = $f[1]; /** @var File $file */ $file = $fileObjs[$filename] ?? null; if ( !$file ) { $ret[] = null; continue; } // See Linker::makeImageLink; 'page' is a key in $handlerParams // core uses 'false' as the default then casts to (int) => 0 $pageNum = $dims['page'] ?? 0; $result = [ 'width' => $file->getWidth( $pageNum ), 'height' => $file->getHeight( $pageNum ), 'size' => $file->getSize(), 'mediatype' => $file->getMediaType(), 'mime' => $file->getMimeType(), 'url' => $file->getFullUrl(), 'mustRender' => $file->mustRender(), 'badFile' => $this->badFileLookup->isBadFile( $filename, $page ), 'timestamp' => $file->getTimestamp(), 'sha1' => $file->getSha1(), ]; $length = $file->getLength(); if ( $length ) { $result['duration'] = (float)$length; } if ( isset( $dims['seek'] ) ) { $dims['thumbtime'] = $dims['seek']; } $txopts = $this->makeTransformOptions( $pageConfig, $file, $dims ); $mto = $file->transform( $txopts ); if ( $mto ) { if ( $mto->isError() && $mto instanceof MediaTransformError ) { $result['thumberror'] = $mto->toText(); } else { if ( $txopts ) { // Do srcset scaling Linker::processResponsiveImages( $file, $mto, $txopts ); if ( count( $mto->responsiveUrls ) ) { $result['responsiveUrls'] = []; foreach ( $mto->responsiveUrls as $density => $url ) { $result['responsiveUrls'][$density] = $url; } } } // Proposed MediaTransformOutput serialization method for T51896 etc. // Note that getAPIData(['fullurl']) would return // wfExpandUrl(), which wouldn't respect the wiki's // protocol preferences -- instead it would use the // protocol used for the API request. if ( is_callable( [ $mto, 'getAPIData' ] ) ) { $result['thumbdata'] = $mto->getAPIData( [ 'withhash' ] ); } $result['thumburl'] = $mto->getUrl(); $result['thumbwidth'] = $mto->getWidth(); $result['thumbheight'] = $mto->getHeight(); } } else { $result['thumberror'] = "Presumably, invalid parameters, despite validation."; } $ret[] = $result; } return $ret; } /** * Prepare MediaWiki's parser for preprocessing or extension tag parsing, * clearing its state if necessary. * * @param IPageConfig $pageConfig * @param int $outputType * @return Parser */ private function prepareParser( IPageConfig $pageConfig, int $outputType ) { '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig // Clear the state only when the PageConfig changes, so that Parser's internal caches can // be retained. This should also provide better compatibility with extension tags. $clearState = $this->previousPageConfig !== $pageConfig; $this->previousPageConfig = $pageConfig; // Use the same legacy parser object for all calls to extension tag // processing, for greater compatibility. $this->parser ??= $this->parserFactory->create(); $this->parser->startExternalParse( Title::newFromLinkTarget( $pageConfig->getLinkTarget() ), $pageConfig->getParserOptions(), $outputType, $clearState, $pageConfig->getRevisionId() ); $this->parser->resetOutput(); // Retain a PPFrame object between preprocess requests since it contains // some useful caches. if ( $clearState ) { $this->ppFrame = $this->parser->getPreprocessor()->newFrame(); } return $this->parser; } /** @inheritDoc */ public function doPst( IPageConfig $pageConfig, string $wikitext ): string { '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig // This could use prepareParser(), but it's only called once per page, // so it's not essential. $titleObj = Title::newFromLinkTarget( $pageConfig->getLinkTarget() ); $user = $pageConfig->getParserOptions()->getUserIdentity(); $content = ContentHandler::makeContent( $wikitext, $titleObj, CONTENT_MODEL_WIKITEXT ); return $this->contentTransformer->preSaveTransform( $content, $titleObj, $user, $pageConfig->getParserOptions() )->serialize(); } /** @inheritDoc */ public function parseWikitext( IPageConfig $pageConfig, ContentMetadataCollector $metadata, string $wikitext ): string { $parser = $this->prepareParser( $pageConfig, Parser::OT_HTML ); $html = $parser->parseExtensionTagAsTopLevelDoc( $wikitext ); // XXX: Ideally we will eventually have the legacy parser use our // ContentMetadataCollector instead of having a new ParserOutput // created (implicitly in ::prepareParser()/Parser::resetOutput() ) // which we then have to manually merge. $out = $parser->getOutput(); $out->setRawText( $html ); $out->collectMetadata( $metadata ); # merges $out into $metadata return Parser::extractBody( $out->getRawText() ); } /** @inheritDoc */ public function preprocessWikitext( IPageConfig $pageConfig, ContentMetadataCollector $metadata, string $wikitext ): string { $parser = $this->prepareParser( $pageConfig, Parser::OT_PREPROCESS ); $this->hookRunner->onParserBeforePreprocess( # $wikitext is passed by reference and mutated $parser, $wikitext, $parser->getStripState() ); $wikitext = $parser->replaceVariables( $wikitext, $this->ppFrame ); // FIXME (T289545): StripState markers protect content that need to be protected from further // "wikitext processing". So, where the result has strip state markers, we actually // need to tunnel this content through rather than unwrap and let it go through the // rest of the parsoid pipeline. For example, some parser functions might return HTML // not wikitext, and where the content might contain wikitext characters, we are now // going to potentially mangle that output. $wikitext = $parser->getStripState()->unstripBoth( $wikitext ); // XXX: Ideally we will eventually have the legacy parser use our // ContentMetadataCollector instead of having a new ParserOutput // created (implicitly in ::prepareParser()/Parser::resetOutput() ) // which we then have to manually merge. $out = $parser->getOutput(); $out->collectMetadata( $metadata ); # merges $out into $metadata return $wikitext; } /** @inheritDoc */ public function fetchTemplateSource( IPageConfig $pageConfig, $title ): ?IPageContent { '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig if ( is_string( $title ) ) { $titleObj = Title::newFromTextThrow( $title ); } else { $titleObj = Title::newFromLinkTarget( $title ); } // Use the PageConfig to take advantage of custom template // fetch hooks like FlaggedRevisions, etc. $revRecord = $pageConfig->fetchRevisionRecordOfTemplate( $titleObj ); return $revRecord ? new PageContent( $revRecord ) : null; } /** @inheritDoc */ public function fetchTemplateData( IPageConfig $pageConfig, $title ): ?array { $ret = []; if ( !is_string( $title ) ) { $titleObj = Title::newFromLinkTarget( $title ); $title = $titleObj->getPrefixedText(); } // @todo: This hook needs some clean up: T304899 $this->hookRunner->onParserFetchTemplateData( [ $title ], $ret # value returned by reference ); // Cast value to array since the hook returns this as a stdclass $tplData = $ret[$title] ?? null; if ( $tplData ) { // Deep convert to associative array $tplData = json_decode( json_encode( $tplData ), true ); } return $tplData; } /** @inheritDoc */ public function logLinterData( IPageConfig $pageConfig, array $lints ): void { if ( $this->readOnlyMode->isReadOnly() ) { return; } $revId = $pageConfig->getRevisionId(); $title = Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(); $pageInfo = $this->getPageInfo( $pageConfig, [ $title ] ); $latest = $pageInfo[$title]['revId']; // Only send the request if it the latest revision if ( $revId !== null && $revId === $latest ) { $this->hookRunner->onParserLogLinterData( $title, $revId, $lints ); } } }