This has been replaced by ::getLinkTarget(), which returns a Parsoid LinkTarget. This is identical to the core LinkTarget interface, but we can't quite alias them for technical reasons (sigh). In actual practice, LinkTargets generated by core are usually Title objects, so Title::newFromLinkTarget() is a no-op that just returns the argument after a type check. It appears that newer code uses a TitleFormatter rather than calling methods on Title, but TitleFormatter currently takes LinkTarget not a ParsoidLinkTarget. That would force us to go via TitleValue::newFromLinkTarget() which isn't a simple type check. Change-Id: I490bb38108d0202b43ea2a9b391b2e664e7d2d48
463 lines
15 KiB
PHP
463 lines
15 KiB
PHP
<?php
|
|
/**
|
|
* Copyright (C) 2011-2022 Wikimedia Foundation and others.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
namespace MediaWiki\Parser\Parsoid\Config;
|
|
|
|
use ContentHandler;
|
|
use File;
|
|
use LanguageCode;
|
|
use MediaTransformError;
|
|
use MediaWiki\Cache\LinkBatchFactory;
|
|
use MediaWiki\Config\ServiceOptions;
|
|
use MediaWiki\Content\Transform\ContentTransformer;
|
|
use MediaWiki\HookContainer\HookContainer;
|
|
use MediaWiki\HookContainer\HookRunner;
|
|
use MediaWiki\Linker\Linker;
|
|
use MediaWiki\MainConfigNames;
|
|
use MediaWiki\Page\File\BadFileLookup;
|
|
use MediaWiki\Title\Title;
|
|
use Parser;
|
|
use ParserFactory;
|
|
use PPFrame;
|
|
use RepoGroup;
|
|
use Wikimedia\Assert\UnreachableException;
|
|
use Wikimedia\Parsoid\Config\DataAccess as IDataAccess;
|
|
use Wikimedia\Parsoid\Config\PageConfig as IPageConfig;
|
|
use Wikimedia\Parsoid\Config\PageContent as IPageContent;
|
|
use Wikimedia\Parsoid\Core\ContentMetadataCollector;
|
|
use Wikimedia\Parsoid\Core\LinkTarget as ParsoidLinkTarget;
|
|
use Wikimedia\Rdbms\ReadOnlyMode;
|
|
|
|
/**
|
|
* Implement Parsoid's abstract class for data access.
|
|
*
|
|
* @since 1.39
|
|
* @internal
|
|
*/
|
|
class DataAccess extends IDataAccess {
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
MainConfigNames::SVGMaxSize,
|
|
];
|
|
|
|
private RepoGroup $repoGroup;
|
|
private BadFileLookup $badFileLookup;
|
|
private HookContainer $hookContainer;
|
|
private HookRunner $hookRunner;
|
|
private ContentTransformer $contentTransformer;
|
|
private ParserFactory $parserFactory;
|
|
/** Lazy-created via self::prepareParser() */
|
|
private ?Parser $parser = null;
|
|
private PPFrame $ppFrame;
|
|
private ?PageConfig $previousPageConfig = null;
|
|
private ServiceOptions $config;
|
|
private ReadOnlyMode $readOnlyMode;
|
|
private LinkBatchFactory $linkBatchFactory;
|
|
|
|
/**
|
|
* @param ServiceOptions $config MediaWiki main configuration object
|
|
* @param RepoGroup $repoGroup
|
|
* @param BadFileLookup $badFileLookup
|
|
* @param HookContainer $hookContainer
|
|
* @param ContentTransformer $contentTransformer
|
|
* @param ReadOnlyMode $readOnlyMode used to disable linting when the
|
|
* database is read-only.
|
|
* @param ParserFactory $parserFactory A legacy parser factory,
|
|
* for PST/preprocessing/extension handling
|
|
* @param LinkBatchFactory $linkBatchFactory
|
|
*/
|
|
public function __construct(
|
|
ServiceOptions $config,
|
|
RepoGroup $repoGroup,
|
|
BadFileLookup $badFileLookup,
|
|
HookContainer $hookContainer,
|
|
ContentTransformer $contentTransformer,
|
|
ReadOnlyMode $readOnlyMode,
|
|
ParserFactory $parserFactory,
|
|
LinkBatchFactory $linkBatchFactory
|
|
) {
|
|
$config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
|
$this->config = $config;
|
|
$this->repoGroup = $repoGroup;
|
|
$this->badFileLookup = $badFileLookup;
|
|
$this->hookContainer = $hookContainer;
|
|
$this->contentTransformer = $contentTransformer;
|
|
$this->readOnlyMode = $readOnlyMode;
|
|
$this->linkBatchFactory = $linkBatchFactory;
|
|
|
|
$this->hookRunner = new HookRunner( $hookContainer );
|
|
|
|
$this->parserFactory = $parserFactory;
|
|
$this->previousPageConfig = null; // ensure we initialize parser options
|
|
}
|
|
|
|
/**
|
|
* @param IPageConfig $pageConfig
|
|
* @param File $file
|
|
* @param array $hp
|
|
* @return array
|
|
*/
|
|
private function makeTransformOptions( IPageConfig $pageConfig, $file, array $hp ): array {
|
|
// Validate the input parameters like Parser::makeImage()
|
|
$handler = $file->getHandler();
|
|
if ( !$handler ) {
|
|
return []; // will get iconThumb()
|
|
}
|
|
foreach ( $hp as $name => $value ) {
|
|
if ( !$handler->validateParam( $name, $value ) ) {
|
|
unset( $hp[$name] );
|
|
}
|
|
}
|
|
|
|
// This part is similar to Linker::makeImageLink(). If there is no width,
|
|
// set one based on the source file size.
|
|
$page = $hp['page'] ?? 0;
|
|
if ( !isset( $hp['width'] ) ) {
|
|
if ( isset( $hp['height'] ) && $file->isVectorized() ) {
|
|
// If it's a vector image, and user only specifies height
|
|
// we don't want it to be limited by its "normal" width.
|
|
$hp['width'] = $this->config->get( MainConfigNames::SVGMaxSize );
|
|
} else {
|
|
$hp['width'] = $file->getWidth( $page );
|
|
}
|
|
|
|
// We don't need to fill in a default thumbnail width here, since
|
|
// that is done by Parsoid. Parsoid always sets the width parameter
|
|
// for thumbnails.
|
|
}
|
|
|
|
// Parser::makeImage() always sets this
|
|
$hp['targetlang'] = LanguageCode::bcp47ToInternal(
|
|
$pageConfig->getPageLanguageBcp47()
|
|
);
|
|
|
|
return $hp;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function getPageInfo( $pageConfigOrTitle, array $titles ): array {
|
|
if ( $pageConfigOrTitle instanceof IPageConfig ) {
|
|
$context_title = Title::newFromLinkTarget(
|
|
$pageConfigOrTitle->getLinkTarget()
|
|
);
|
|
} elseif ( is_string( $pageConfigOrTitle ) ) {
|
|
// Temporary, deprecated.
|
|
$context_title = Title::newFromTextThrow( $pageConfigOrTitle );
|
|
} elseif ( $pageConfigOrTitle instanceof ParsoidLinkTarget ) {
|
|
$context_title = Title::newFromLinkTarget( $pageConfigOrTitle );
|
|
} else {
|
|
throw new UnreachableException( "Bad type for argument 1" );
|
|
}
|
|
$titleObjs = [];
|
|
$pagemap = [];
|
|
$classes = [];
|
|
$ret = [];
|
|
foreach ( $titles as $name ) {
|
|
$t = Title::newFromText( $name );
|
|
// Filter out invalid titles. Title::newFromText in core (not our bespoke
|
|
// version in src/Utils/Title.php) can return null for invalid titles.
|
|
if ( !$t ) {
|
|
// FIXME: This is a bandaid to patch up the fact that Env::makeTitle treats
|
|
// this as a valid title, but Title::newFromText treats it as invalid.
|
|
// T237535
|
|
// This matches what ApiQuery::outputGeneralPageInfo() would
|
|
// return for an invalid title.
|
|
$ret[$name] = [
|
|
'pageId' => -1,
|
|
'revId' => -1,
|
|
'invalid' => true,
|
|
'invalidreason' => 'The requested page title is invalid',
|
|
];
|
|
} else {
|
|
$titleObjs[$name] = $t;
|
|
}
|
|
}
|
|
$linkBatch = $this->linkBatchFactory->newLinkBatch( $titleObjs );
|
|
$linkBatch->setCaller( __METHOD__ );
|
|
$linkBatch->execute();
|
|
|
|
foreach ( $titleObjs as $obj ) {
|
|
$pdbk = $obj->getPrefixedDBkey();
|
|
$pagemap[$obj->getArticleID()] = $pdbk;
|
|
$classes[$pdbk] = $obj->isRedirect() ? 'mw-redirect' : '';
|
|
}
|
|
$this->hookRunner->onGetLinkColours(
|
|
# $classes is passed by reference and mutated
|
|
$pagemap, $classes, $context_title
|
|
);
|
|
|
|
foreach ( $titleObjs as $name => $obj ) {
|
|
/** @var Title $obj */
|
|
$pdbk = $obj->getPrefixedDBkey();
|
|
$c = preg_split(
|
|
'/\s+/', $classes[$pdbk] ?? '', -1, PREG_SPLIT_NO_EMPTY
|
|
);
|
|
$ret[$name] = [
|
|
'pageId' => $obj->getArticleID(),
|
|
'revId' => $obj->getLatestRevID(),
|
|
'missing' => !$obj->exists(),
|
|
'known' => $obj->isKnown(),
|
|
'redirect' => $obj->isRedirect(),
|
|
'linkclasses' => $c, # See ApiQueryInfo::getLinkClasses() in core
|
|
];
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function getFileInfo( IPageConfig $pageConfig, array $files ): array {
|
|
$page = Title::newFromLinkTarget( $pageConfig->getLinkTarget() );
|
|
|
|
$keys = [];
|
|
foreach ( $files as $f ) {
|
|
$keys[] = $f[0];
|
|
}
|
|
$fileObjs = $this->repoGroup->findFiles( $keys );
|
|
|
|
$ret = [];
|
|
foreach ( $files as $f ) {
|
|
$filename = $f[0];
|
|
$dims = $f[1];
|
|
|
|
/** @var File $file */
|
|
$file = $fileObjs[$filename] ?? null;
|
|
if ( !$file ) {
|
|
$ret[] = null;
|
|
continue;
|
|
}
|
|
|
|
// See Linker::makeImageLink; 'page' is a key in $handlerParams
|
|
// core uses 'false' as the default then casts to (int) => 0
|
|
$pageNum = $dims['page'] ?? 0;
|
|
|
|
$result = [
|
|
'width' => $file->getWidth( $pageNum ),
|
|
'height' => $file->getHeight( $pageNum ),
|
|
'size' => $file->getSize(),
|
|
'mediatype' => $file->getMediaType(),
|
|
'mime' => $file->getMimeType(),
|
|
'url' => $file->getFullUrl(),
|
|
'mustRender' => $file->mustRender(),
|
|
'badFile' => $this->badFileLookup->isBadFile( $filename, $page ),
|
|
];
|
|
|
|
$length = $file->getLength();
|
|
if ( $length ) {
|
|
$result['duration'] = (float)$length;
|
|
}
|
|
|
|
if ( isset( $dims['seek'] ) ) {
|
|
$dims['thumbtime'] = $dims['seek'];
|
|
}
|
|
|
|
$txopts = $this->makeTransformOptions( $pageConfig, $file, $dims );
|
|
$mto = $file->transform( $txopts );
|
|
if ( $mto ) {
|
|
if ( $mto->isError() && $mto instanceof MediaTransformError ) {
|
|
$result['thumberror'] = $mto->toText();
|
|
} else {
|
|
if ( $txopts ) {
|
|
// Do srcset scaling
|
|
Linker::processResponsiveImages( $file, $mto, $txopts );
|
|
if ( count( $mto->responsiveUrls ) ) {
|
|
$result['responsiveUrls'] = [];
|
|
foreach ( $mto->responsiveUrls as $density => $url ) {
|
|
$result['responsiveUrls'][$density] = $url;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Proposed MediaTransformOutput serialization method for T51896 etc.
|
|
// Note that getAPIData(['fullurl']) would return
|
|
// wfExpandUrl(), which wouldn't respect the wiki's
|
|
// protocol preferences -- instead it would use the
|
|
// protocol used for the API request.
|
|
if ( is_callable( [ $mto, 'getAPIData' ] ) ) {
|
|
$result['thumbdata'] = $mto->getAPIData( [ 'withhash' ] );
|
|
}
|
|
|
|
$result['thumburl'] = $mto->getUrl();
|
|
$result['thumbwidth'] = $mto->getWidth();
|
|
$result['thumbheight'] = $mto->getHeight();
|
|
}
|
|
} else {
|
|
$result['thumberror'] = "Presumably, invalid parameters, despite validation.";
|
|
}
|
|
|
|
$ret[] = $result;
|
|
}
|
|
|
|
return $ret;
|
|
}
|
|
|
|
/**
|
|
* Prepare MediaWiki's parser for preprocessing or extension tag parsing,
|
|
* clearing its state if necessary.
|
|
*
|
|
* @param IPageConfig $pageConfig
|
|
* @param int $outputType
|
|
* @return Parser
|
|
*/
|
|
private function prepareParser( IPageConfig $pageConfig, int $outputType ) {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
// Clear the state only when the PageConfig changes, so that Parser's internal caches can
|
|
// be retained. This should also provide better compatibility with extension tags.
|
|
$clearState = $this->previousPageConfig !== $pageConfig;
|
|
$this->previousPageConfig = $pageConfig;
|
|
// Use the same legacy parser object for all calls to extension tag
|
|
// processing, for greater compatibility.
|
|
$this->parser ??= $this->parserFactory->create();
|
|
$this->parser->startExternalParse(
|
|
Title::newFromLinkTarget( $pageConfig->getLinkTarget() ),
|
|
$pageConfig->getParserOptions(),
|
|
$outputType, $clearState, $pageConfig->getRevisionId() );
|
|
$this->parser->resetOutput();
|
|
|
|
// Retain a PPFrame object between preprocess requests since it contains
|
|
// some useful caches.
|
|
if ( $clearState ) {
|
|
$this->ppFrame = $this->parser->getPreprocessor()->newFrame();
|
|
}
|
|
return $this->parser;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function doPst( IPageConfig $pageConfig, string $wikitext ): string {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
// This could use prepareParser(), but it's only called once per page,
|
|
// so it's not essential.
|
|
$titleObj = Title::newFromLinkTarget( $pageConfig->getLinkTarget() );
|
|
$user = $pageConfig->getParserOptions()->getUserIdentity();
|
|
$content = ContentHandler::makeContent( $wikitext, $titleObj, CONTENT_MODEL_WIKITEXT );
|
|
return $this->contentTransformer->preSaveTransform(
|
|
$content,
|
|
$titleObj,
|
|
$user,
|
|
$pageConfig->getParserOptions()
|
|
)->serialize();
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function parseWikitext(
|
|
IPageConfig $pageConfig,
|
|
ContentMetadataCollector $metadata,
|
|
string $wikitext
|
|
): string {
|
|
$parser = $this->prepareParser( $pageConfig, Parser::OT_HTML );
|
|
$html = $parser->parseExtensionTagAsTopLevelDoc( $wikitext );
|
|
// XXX: Ideally we will eventually have the legacy parser use our
|
|
// ContentMetadataCollector instead of having a new ParserOutput
|
|
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
|
|
// which we then have to manually merge.
|
|
$out = $parser->getOutput();
|
|
$out->setText( $html );
|
|
$out->collectMetadata( $metadata ); # merges $out into $metadata
|
|
return Parser::extractBody( $out->getRawText() );
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function preprocessWikitext(
|
|
IPageConfig $pageConfig,
|
|
ContentMetadataCollector $metadata,
|
|
string $wikitext
|
|
): string {
|
|
$parser = $this->prepareParser( $pageConfig, Parser::OT_PREPROCESS );
|
|
$this->hookRunner->onParserBeforePreprocess(
|
|
# $wikitext is passed by reference and mutated
|
|
$parser, $wikitext, $parser->getStripState()
|
|
);
|
|
$wikitext = $parser->replaceVariables( $wikitext, $this->ppFrame );
|
|
// FIXME (T289545): StripState markers protect content that need to be protected from further
|
|
// "wikitext processing". So, where the result has strip state markers, we actually
|
|
// need to tunnel this content through rather than unwrap and let it go through the
|
|
// rest of the parsoid pipeline. For example, some parser functions might return HTML
|
|
// not wikitext, and where the content might contain wikitext characters, we are now
|
|
// going to potentially mangle that output.
|
|
$wikitext = $parser->getStripState()->unstripBoth( $wikitext );
|
|
|
|
// XXX: Ideally we will eventually have the legacy parser use our
|
|
// ContentMetadataCollector instead of having a new ParserOutput
|
|
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
|
|
// which we then have to manually merge.
|
|
$out = $parser->getOutput();
|
|
$out->collectMetadata( $metadata ); # merges $out into $metadata
|
|
return $wikitext;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function fetchTemplateSource(
|
|
IPageConfig $pageConfig, $title
|
|
): ?IPageContent {
|
|
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
|
|
if ( is_string( $title ) ) {
|
|
$titleObj = Title::newFromTextThrow( $title );
|
|
} else {
|
|
$titleObj = Title::newFromLinkTarget( $title );
|
|
}
|
|
|
|
// Use the PageConfig to take advantage of custom template
|
|
// fetch hooks like FlaggedRevisions, etc.
|
|
$revRecord = $pageConfig->fetchRevisionRecordOfTemplate( $titleObj );
|
|
|
|
return $revRecord ? new PageContent( $revRecord ) : null;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function fetchTemplateData( IPageConfig $pageConfig, $title ): ?array {
|
|
$ret = [];
|
|
if ( !is_string( $title ) ) {
|
|
$titleObj = Title::newFromLinkTarget( $title );
|
|
$title = $titleObj->getPrefixedText();
|
|
}
|
|
// @todo: This hook needs some clean up: T304899
|
|
$this->hookRunner->onParserFetchTemplateData(
|
|
[ $title ],
|
|
$ret # value returned by reference
|
|
);
|
|
|
|
// Cast value to array since the hook returns this as a stdclass
|
|
$tplData = $ret[$title] ?? null;
|
|
if ( $tplData ) {
|
|
// Deep convert to associative array
|
|
$tplData = json_decode( json_encode( $tplData ), true );
|
|
}
|
|
return $tplData;
|
|
}
|
|
|
|
/** @inheritDoc */
|
|
public function logLinterData( IPageConfig $pageConfig, array $lints ): void {
|
|
if ( $this->readOnlyMode->isReadOnly() ) {
|
|
return;
|
|
}
|
|
|
|
$revId = $pageConfig->getRevisionId();
|
|
$title = Title::newFromLinkTarget(
|
|
$pageConfig->getLinkTarget()
|
|
)->getPrefixedText();
|
|
$pageInfo = $this->getPageInfo( $pageConfig, [ $title ] );
|
|
$latest = $pageInfo[$title]['revId'];
|
|
|
|
// Only send the request if it the latest revision
|
|
if ( $revId !== null && $revId === $latest ) {
|
|
$this->hookRunner->onParserLogLinterData(
|
|
$title, $revId, $lints
|
|
);
|
|
}
|
|
}
|
|
|
|
}
|