This feels like a no-brainer unless I'm missing something obvious Bug: T321882 Change-Id: Id49c3d0dd6ea4593211048850856b5b8e05a8fb3
446 lines
14 KiB
PHP
446 lines
14 KiB
PHP
<?php
|
|
/**
|
|
* Copyright (C) 2011-2022 Wikimedia Foundation and others.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
namespace MediaWiki\Parser\Parsoid\Config;
|
|
|
|
use ContentHandler;
|
|
use File;
|
|
use LinkBatch;
|
|
use MediaTransformError;
|
|
use MediaWiki\BadFileLookup;
|
|
use MediaWiki\Config\ServiceOptions;
|
|
use MediaWiki\Content\Transform\ContentTransformer;
|
|
use MediaWiki\HookContainer\HookContainer;
|
|
use MediaWiki\HookContainer\HookRunner;
|
|
use MediaWiki\Linker\Linker;
|
|
use MediaWiki\MainConfigNames;
|
|
use Parser;
|
|
use ParserFactory;
|
|
use ReadOnlyMode;
|
|
use RepoGroup;
|
|
use Title;
|
|
use Wikimedia\Parsoid\Config\DataAccess as IDataAccess;
|
|
use Wikimedia\Parsoid\Config\PageConfig as IPageConfig;
|
|
use Wikimedia\Parsoid\Config\PageContent as IPageContent;
|
|
use Wikimedia\Parsoid\Core\ContentMetadataCollector;
|
|
|
|
/**
|
|
* Implement Parsoid's abstract class for data access.
|
|
*
|
|
* @since 1.39
|
|
*/
|
|
class DataAccess extends IDataAccess {
|
|
|
|
/** @var RepoGroup */
|
|
private $repoGroup;
|
|
|
|
/** @var BadFileLookup */
|
|
private $badFileLookup;
|
|
|
|
/** @var HookContainer */
|
|
private $hookContainer;
|
|
|
|
/** @var HookRunner */
|
|
private $hookRunner;
|
|
|
|
/** @var ContentTransformer */
|
|
private $contentTransformer;
|
|
|
|
/** @var Parser */
|
|
private $parser;
|
|
|
|
/** @var \PPFrame */
|
|
private $ppFrame;
|
|
|
|
/** @var ?PageConfig */
|
|
private $previousPageConfig;
|
|
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
MainConfigNames::SVGMaxSize,
|
|
];
|
|
|
|
/** @var ServiceOptions */
|
|
private $config;
|
|
|
|
/** @var ReadOnlyMode */
|
|
private $readOnlyMode;
|
|
|
|
/**
|
|
* @param ServiceOptions $config MediaWiki main configuration object
|
|
* @param RepoGroup $repoGroup
|
|
* @param BadFileLookup $badFileLookup
|
|
* @param HookContainer $hookContainer
|
|
* @param ContentTransformer $contentTransformer
|
|
* @param ReadOnlyMode $readOnlyMode used to disable linting when the
|
|
* database is read-only.
|
|
* @param ParserFactory $parserFactory A legacy parser factory,
|
|
* for PST/preprocessing/extension handling
|
|
*/
|
|
public function __construct(
|
|
ServiceOptions $config,
|
|
RepoGroup $repoGroup,
|
|
BadFileLookup $badFileLookup,
|
|
HookContainer $hookContainer,
|
|
ContentTransformer $contentTransformer,
|
|
ReadOnlyMode $readOnlyMode,
|
|
ParserFactory $parserFactory
|
|
) {
|
|
$config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
|
$this->config = $config;
|
|
$this->repoGroup = $repoGroup;
|
|
$this->badFileLookup = $badFileLookup;
|
|
$this->hookContainer = $hookContainer;
|
|
$this->contentTransformer = $contentTransformer;
|
|
$this->readOnlyMode = $readOnlyMode;
|
|
|
|
$this->hookRunner = new HookRunner( $hookContainer );
|
|
|
|
// Use the same legacy parser object for all calls to extension tag
|
|
// processing, for greater compatibility.
|
|
$this->parser = $parserFactory->create();
|
|
$this->previousPageConfig = null; // ensure we initialize parser options
|
|
}
|
|
|
|
/**
|
|
* @param IPageConfig $pageConfig
|
|
* @param File $file
|
|
* @param array $hp
|
|
* @return array
|
|
*/
|
|
private function makeTransformOptions( IPageConfig $pageConfig, $file, array $hp ): array {
|
|
// Validate the input parameters like Parser::makeImage()
|
|
$handler = $file->getHandler();
|
|
if ( !$handler ) {
|
|
return []; // will get iconThumb()
|
|
}
|
|
foreach ( $hp as $name => $value ) {
|
|
if ( !$handler->validateParam( $name, $value ) ) {
|
|
unset( $hp[$name] );
|
|
}
|
|
}
|
|
|
|
// This part is similar to Linker::makeImageLink(). If there is no width,
|
|
// set one based on the source file size.
|
|
$page = $hp['page'] ?? 0;
|
|
if ( !isset( $hp['width'] ) ) {
|
|
if ( isset( $hp['height'] ) && $file->isVectorized() ) {
|
|
// If it's a vector image, and user only specifies height
|
|
// we don't want it to be limited by its "normal" width.
|
|
$hp['width'] = $this->config->get( MainConfigNames::SVGMaxSize );
|
|
} else {
|
|
$hp['width'] = $file->getWidth( $page );
|
|
}
|
|
|
|
// We don't need to fill in a default thumbnail width here, since
|
|
// that is done by Parsoid. Parsoid always sets the width parameter
|
|
// for thumbnails.
|
|
}
|
|
|
|
// Parser::makeImage() always sets this
|
|
$hp['targetlang'] = $pageConfig->getPageLanguage();
|
|
|
|
return $hp;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function getPageInfo( IPageConfig $pageConfig, array $titles ): array {
|
|
$titleObjs = [];
|
|
$pagemap = [];
|
|
$classes = [];
|
|
$ret = [];
|
|
foreach ( $titles as $name ) {
|
|
$t = Title::newFromText( $name );
|
|
// Filter out invalid titles. Title::newFromText in core (not our bespoke
|
|
// version in src/Utils/Title.php) can return null for invalid titles.
|
|
if ( !$t ) {
|
|
// FIXME: This is a bandaid to patch up the fact that Env::makeTitle treats
|
|
// this as a valid title, but Title::newFromText treats it as invalid.
|
|
// T237535
|
|
// This matches what ApiQuery::outputGeneralPageInfo() would
|
|
// return for an invalid title.
|
|
$ret[$name] = [
|
|
'pageId' => -1,
|
|
'revId' => -1,
|
|
'invalid' => true,
|
|
'invalidreason' => 'The requested page title is invalid',
|
|
];
|
|
} else {
|
|
$titleObjs[$name] = $t;
|
|
}
|
|
}
|
|
$linkBatch = new LinkBatch( $titleObjs );
|
|
$linkBatch->execute();
|
|
|
|
foreach ( $titleObjs as $obj ) {
|
|
$pdbk = $obj->getPrefixedDBkey();
|
|
$pagemap[$obj->getArticleID()] = $pdbk;
|
|
$classes[$pdbk] = $obj->isRedirect() ? 'mw-redirect' : '';
|
|
}
|
|
$context_title = Title::newFromText( $pageConfig->getTitle() );
|
|
$this->hookRunner->onGetLinkColours(
|
|
# $classes is passed by reference and mutated
|
|
$pagemap, $classes, $context_title
|
|
);
|
|
|
|
foreach ( $titleObjs as $name => $obj ) {
|
|
/** @var Title $obj */
|
|
$pdbk = $obj->getPrefixedDBkey();
|
|
$c = preg_split(
|
|
'/\s+/', $classes[$pdbk] ?? '', -1, PREG_SPLIT_NO_EMPTY
|
|
);
|
|
$ret[$name] = [
|
|
'pageId' => $obj->getArticleID(),
|
|
'revId' => $obj->getLatestRevID(),
|
|
'missing' => !$obj->exists(),
|
|
'known' => $obj->isKnown(),
|
|
'redirect' => $obj->isRedirect(),
|
|
'linkclasses' => $c, # See ApiQueryInfo::getLinkClasses() in core
|
|
];
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function getFileInfo( IPageConfig $pageConfig, array $files ): array {
|
|
$page = Title::newFromText( $pageConfig->getTitle() );
|
|
|
|
$keys = [];
|
|
foreach ( $files as $f ) {
|
|
$keys[] = $f[0];
|
|
}
|
|
$fileObjs = $this->repoGroup->findFiles( $keys );
|
|
|
|
$ret = [];
|
|
foreach ( $files as $f ) {
|
|
$filename = $f[0];
|
|
$dims = $f[1];
|
|
|
|
/** @var File $file */
|
|
$file = $fileObjs[$filename] ?? null;
|
|
if ( !$file ) {
|
|
$ret[] = null;
|
|
continue;
|
|
}
|
|
|
|
// See Linker::makeImageLink; 'page' is a key in $handlerParams
|
|
// core uses 'false' as the default then casts to (int) => 0
|
|
$pageNum = $dims['page'] ?? 0;
|
|
|
|
$result = [
|
|
'width' => $file->getWidth( $pageNum ),
|
|
'height' => $file->getHeight( $pageNum ),
|
|
'size' => $file->getSize(),
|
|
'mediatype' => $file->getMediaType(),
|
|
'mime' => $file->getMimeType(),
|
|
'url' => $file->getFullUrl(),
|
|
'mustRender' => $file->mustRender(),
|
|
'badFile' => $this->badFileLookup->isBadFile( $filename, $page ?: false ),
|
|
];
|
|
|
|
$length = $file->getLength();
|
|
if ( $length ) {
|
|
$result['duration'] = (float)$length;
|
|
}
|
|
|
|
if ( isset( $dims['seek'] ) ) {
|
|
$dims['thumbtime'] = $dims['seek'];
|
|
}
|
|
|
|
$txopts = $this->makeTransformOptions( $pageConfig, $file, $dims );
|
|
$mto = $file->transform( $txopts );
|
|
if ( $mto ) {
|
|
if ( $mto->isError() && $mto instanceof MediaTransformError ) {
|
|
$result['thumberror'] = $mto->toText();
|
|
} else {
|
|
if ( $txopts ) {
|
|
// Do srcset scaling
|
|
Linker::processResponsiveImages( $file, $mto, $txopts );
|
|
if ( count( $mto->responsiveUrls ) ) {
|
|
$result['responsiveUrls'] = [];
|
|
foreach ( $mto->responsiveUrls as $density => $url ) {
|
|
$result['responsiveUrls'][$density] = $url;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Proposed MediaTransformOutput serialization method for T51896 etc.
|
|
// Note that getAPIData(['fullurl']) would return
|
|
// wfExpandUrl(), which wouldn't respect the wiki's
|
|
// protocol preferences -- instead it would use the
|
|
// protocol used for the API request.
|
|
if ( is_callable( [ $mto, 'getAPIData' ] ) ) {
|
|
$result['thumbdata'] = $mto->getAPIData( [ 'withhash' ] );
|
|
}
|
|
|
|
$result['thumburl'] = $mto->getUrl();
|
|
$result['thumbwidth'] = $mto->getWidth();
|
|
$result['thumbheight'] = $mto->getHeight();
|
|
}
|
|
} else {
|
|
$result['thumberror'] = "Presumably, invalid parameters, despite validation.";
|
|
}
|
|
|
|
$ret[] = $result;
|
|
}
|
|
|
|
return $ret;
|
|
}
|
|
|
|
/**
|
|
* Prepare MediaWiki's parser for preprocessing or extension tag parsing,
|
|
* clearing its state if necessary.
|
|
*
|
|
* @param IPageConfig $pageConfig
|
|
* @param int $outputType
|
|
* @return Parser
|
|
*/
|
|
private function prepareParser( IPageConfig $pageConfig, int $outputType ) {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
// Clear the state only when the PageConfig changes, so that Parser's internal caches can
|
|
// be retained. This should also provide better compatibility with extension tags.
|
|
$clearState = $this->previousPageConfig !== $pageConfig;
|
|
$this->previousPageConfig = $pageConfig;
|
|
$this->parser->startExternalParse(
|
|
Title::newFromText( $pageConfig->getTitle() ), $pageConfig->getParserOptions(),
|
|
$outputType, $clearState, $pageConfig->getRevisionId() );
|
|
$this->parser->resetOutput();
|
|
|
|
// Retain a PPFrame object between preprocess requests since it contains
|
|
// some useful caches.
|
|
if ( $clearState ) {
|
|
$this->ppFrame = $this->parser->getPreprocessor()->newFrame();
|
|
}
|
|
return $this->parser;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function doPst( IPageConfig $pageConfig, string $wikitext ): string {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
// This could use prepareParser(), but it's only called once per page,
|
|
// so it's not essential.
|
|
$titleObj = Title::newFromText( $pageConfig->getTitle() );
|
|
$user = $pageConfig->getParserOptions()->getUserIdentity();
|
|
$content = ContentHandler::makeContent( $wikitext, $titleObj, CONTENT_MODEL_WIKITEXT );
|
|
return $this->contentTransformer->preSaveTransform(
|
|
$content,
|
|
$titleObj,
|
|
$user,
|
|
$pageConfig->getParserOptions()
|
|
)->serialize();
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function parseWikitext(
|
|
IPageConfig $pageConfig,
|
|
ContentMetadataCollector $metadata,
|
|
string $wikitext
|
|
): string {
|
|
$parser = $this->prepareParser( $pageConfig, Parser::OT_HTML );
|
|
$html = $parser->parseExtensionTagAsTopLevelDoc( $wikitext );
|
|
// XXX: Ideally we will eventually have the legacy parser use our
|
|
// ContentMetadataCollector instead of having a new ParserOutput
|
|
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
|
|
// which we then have to manually merge.
|
|
$out = $parser->getOutput();
|
|
$out->setText( $html );
|
|
$out->collectMetadata( $metadata ); # merges $out into $metadata
|
|
return $out->getText( [ 'unwrap' => true ] ); # HTML
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function preprocessWikitext(
|
|
IPageConfig $pageConfig,
|
|
ContentMetadataCollector $metadata,
|
|
string $wikitext
|
|
): string {
|
|
$parser = $this->prepareParser( $pageConfig, Parser::OT_PREPROCESS );
|
|
$this->hookRunner->onParserBeforePreprocess(
|
|
# $wikitext is passed by reference and mutated
|
|
$parser, $wikitext, $parser->getStripState()
|
|
);
|
|
$wikitext = $parser->replaceVariables( $wikitext, $this->ppFrame );
|
|
// FIXME (T289545): StripState markers protect content that need to be protected from further
|
|
// "wikitext processing". So, where the result has strip state markers, we actually
|
|
// need to tunnel this content through rather than unwrap and let it go through the
|
|
// rest of the parsoid pipeline. For example, some parser functions might return HTML
|
|
// not wikitext, and where the content might contain wikitext characters, we are now
|
|
// going to potentially mangle that output.
|
|
$wikitext = $parser->getStripState()->unstripBoth( $wikitext );
|
|
|
|
// XXX: Ideally we will eventually have the legacy parser use our
|
|
// ContentMetadataCollector instead of having an new ParserOutput
|
|
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
|
|
// which we then have to manually merge.
|
|
$out = $parser->getOutput();
|
|
$out->collectMetadata( $metadata ); # merges $out into $metadata
|
|
return $wikitext;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function fetchTemplateSource(
|
|
IPageConfig $pageConfig, string $title
|
|
): ?IPageContent {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
$titleObj = Title::newFromText( $title );
|
|
|
|
// Use the PageConfig to take advantage of custom template
|
|
// fetch hooks like FlaggedRevisions, etc.
|
|
$revRecord = $pageConfig->fetchRevisionRecordOfTemplate( $titleObj );
|
|
|
|
return $revRecord ? new PageContent( $revRecord ) : null;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function fetchTemplateData( IPageConfig $pageConfig, string $title ): ?array {
|
|
$ret = [];
|
|
// @todo: This hook needs some clean up: T304899
|
|
$this->hookRunner->onParserFetchTemplateData(
|
|
[ $title ],
|
|
$ret # value returned by reference
|
|
);
|
|
|
|
// Cast value to array since the hook returns this as a stdclass
|
|
$tplData = $ret[$title] ?? null;
|
|
if ( $tplData ) {
|
|
// Deep convert to associative array
|
|
$tplData = json_decode( json_encode( $tplData ), true );
|
|
}
|
|
return $tplData;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function logLinterData( IPageConfig $pageConfig, array $lints ): void {
|
|
if ( $this->readOnlyMode->isReadOnly() ) {
|
|
return;
|
|
}
|
|
|
|
$revId = $pageConfig->getRevisionId();
|
|
$title = $pageConfig->getTitle();
|
|
$pageInfo = $this->getPageInfo( $pageConfig, [ $title ] );
|
|
$latest = $pageInfo[$title]['revId'];
|
|
|
|
// Only send the request if it the latest revision
|
|
if ( $revId !== null && $revId === $latest ) {
|
|
$this->hookRunner->onParserLogLinterData(
|
|
$title, $revId, $lints
|
|
);
|
|
}
|
|
}
|
|
|
|
}
|