Copy over Parsoid's Config and ServiceWiring classes

* This is the first step of migrating Parsoid integration code into
  core and transitioning Parsoid from an extension to a pure library.

* Parsoid already has conditional code to skip loading Parsoid's
  copy of its classes, but it relies on the existence of ParsoidServices.
  Technically ParsoidServices isn't needed once Parsoid is migrated to
  core -- users can just use MediaWikiServices instead -- but we need
  to temporarily add ParsoidServices as a marker class during the
  transition.

This version of Parsoid's ServiceWiring comes from Parsoid commit
898c813fd832b3f2d7b5a37f60bd65e8368ce18f.

Bug: T302118
Change-Id: I0b388d93143a782c2c3b72e46407572e5c586e4a
This commit is contained in:
C. Scott Ananian 2022-03-25 16:34:04 -04:00
parent c11db77cb8
commit 2d66ee70a2
16 changed files with 2651 additions and 7 deletions

View file

@ -1087,13 +1087,6 @@ $wgAutoloadLocalClasses = [
'MediaWiki\\Languages\\LanguageFactory' => __DIR__ . '/includes/language/LanguageFactory.php',
'MediaWiki\\Languages\\LanguageFallback' => __DIR__ . '/includes/language/LanguageFallback.php',
'MediaWiki\\Languages\\LanguageNameUtils' => __DIR__ . '/includes/language/LanguageNameUtils.php',
'MediaWiki\\Parser\\ParserCacheFactory' => __DIR__ . '/includes/parser/ParserCacheFactory.php',
'MediaWiki\\Parser\\ParserCacheMetadata' => __DIR__ . '/includes/parser/ParserCacheMetadata.php',
'MediaWiki\\Parser\\ParserObserver' => __DIR__ . '/includes/parser/ParserObserver.php',
'MediaWiki\\Parser\\ParserOutputFlags' => __DIR__ . '/includes/parser/ParserOutputFlags.php',
'MediaWiki\\Parser\\RemexRemoveTagHandler' => __DIR__ . '/includes/parser/RemexRemoveTagHandler.php',
'MediaWiki\\Parser\\RemexStripTagHandler' => __DIR__ . '/includes/parser/RemexStripTagHandler.php',
'MediaWiki\\Parser\\RevisionOutputCache' => __DIR__ . '/includes/parser/RevisionOutputCache.php',
'MediaWiki\\ProcOpenError' => __DIR__ . '/includes/exception/ProcOpenError.php',
'MediaWiki\\ShellDisabledError' => __DIR__ . '/includes/exception/ShellDisabledError.php',
'MediaWiki\\Skin\\SkinComponent' => __DIR__ . '/includes/skins/components/SkinComponent.php',

View file

@ -4006,6 +4006,15 @@ config-schema:
See includes/tidy/RemexDriver.php for detail on configuration.
Overriding the default configuration is strongly discouraged in
production.
ParsoidSettings:
default:
useSelser: true
type: object
description: |-
Default Parsoid configuration.
Overriding the default configuration is strongly discouraged in
production.
@since 1.39
ParserEnableLegacyMediaDOM:
default: true
description: |-

View file

@ -163,6 +163,7 @@ class AutoLoader {
'MediaWiki\\Logger\Monolog\\' => __DIR__ . '/debug/logger/monolog/',
'MediaWiki\\Mail\\' => __DIR__ . '/mail/',
'MediaWiki\\Page\\' => __DIR__ . '/page/',
'MediaWiki\\Parser\\' => __DIR__ . '/parser/',
'MediaWiki\\Preferences\\' => __DIR__ . '/preferences/',
'MediaWiki\\ResourceLoader\\' => __DIR__ . '/resourceloader/',
'MediaWiki\\Search\\' => __DIR__ . '/search/',

View file

@ -3266,6 +3266,16 @@ $wgAllowImageTag = false;
$wgTidyConfig = [
];
/**
* Variable for the ParsoidSettings setting, for use in LocalSettings.php
* @see MainConfigSchema::ParsoidSettings
* @note Do not change manually, generated by maintenance/generateConfigDefaultSettings.php!
* @var array
*/
$wgParsoidSettings = [
'useSelser' => true,
];
/**
* Variable for the ParserEnableLegacyMediaDOM setting, for use in LocalSettings.php
* @see MainConfigSchema::ParserEnableLegacyMediaDOM

View file

@ -2344,6 +2344,12 @@ class MainConfigNames {
*/
public const TidyConfig = 'TidyConfig';
/**
* Name constant for the ParsoidSettings setting, for use with Config::get()
* @see MainConfigSchema::ParsoidSettings
*/
public const ParsoidSettings = 'ParsoidSettings';
/**
* Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
* @see MainConfigSchema::ParserEnableLegacyMediaDOM

View file

@ -6049,6 +6049,21 @@ class MainConfigSchema {
'type' => 'object',
];
/**
* Default Parsoid configuration.
*
* Overriding the default configuration is strongly discouraged in
* production.
*
* @since 1.39
*/
public const ParsoidSettings = [
'default' => [
'useSelser' => true,
],
'type' => 'object',
];
/**
* Enable legacy media HTML structure in the output from the Parser. The
* alternative modern HTML structure that replaces it is described at

View file

@ -79,6 +79,7 @@ use MediaWiki\Page\RollbackPageFactory;
use MediaWiki\Page\UndeletePageFactory;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Permissions\GrantsInfo;
use MediaWiki\Permissions\GrantsLocalization;
use MediaWiki\Permissions\GroupPermissionsLookup;
@ -158,6 +159,8 @@ use Wikimedia\Message\IMessageFormatterFactory;
use Wikimedia\Metrics\MetricsFactory;
use Wikimedia\NonSerializable\NonSerializableTrait;
use Wikimedia\ObjectFactory\ObjectFactory;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Rdbms\ILoadBalancer;
use Wikimedia\Rdbms\LBFactory;
use Wikimedia\RequestTimeout\CriticalSectionProvider;
@ -1398,6 +1401,30 @@ class MediaWikiServices extends ServiceContainer {
return $this->getService( 'ParserOutputAccess' );
}
/**
* @return DataAccess
* @since 1.39
*/
public function getParsoidDataAccess(): DataAccess {
return $this->getService( 'ParsoidDataAccess' );
}
/**
* @return PageConfigFactory
* @since 1.39
*/
public function getParsoidPageConfigFactory(): PageConfigFactory {
return $this->getService( 'ParsoidPageConfigFactory' );
}
/**
* @return SiteConfig
* @since 1.39
*/
public function getParsoidSiteConfig(): SiteConfig {
return $this->getService( 'ParsoidSiteConfig' );
}
/**
* @since 1.32
* @return PasswordFactory

View file

@ -114,6 +114,9 @@ use MediaWiki\Page\UndeletePageFactory;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\ParserObserver;
use MediaWiki\Parser\Parsoid\Config\DataAccess as MWDataAccess;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory as MWPageConfigFactory;
use MediaWiki\Parser\Parsoid\Config\SiteConfig as MWSiteConfig;
use MediaWiki\Permissions\GrantsInfo;
use MediaWiki\Permissions\GrantsLocalization;
use MediaWiki\Permissions\GroupPermissionsLookup;
@ -169,6 +172,10 @@ use Wikimedia\DependencyStore\SqlModuleDependencyStore;
use Wikimedia\Message\IMessageFormatterFactory;
use Wikimedia\Metrics\MetricsFactory;
use Wikimedia\ObjectFactory\ObjectFactory;
use Wikimedia\Parsoid\Config\Api\DataAccess as ApiDataAccess;
use Wikimedia\Parsoid\Config\Api\SiteConfig as ApiSiteConfig;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\RequestTimeout\CriticalSectionProvider;
use Wikimedia\RequestTimeout\RequestTimeout;
use Wikimedia\Services\RecursiveServiceDependencyException;
@ -1278,6 +1285,53 @@ return [
);
},
'ParsoidDataAccess' => static function ( MediaWikiServices $services ): DataAccess {
$parsoidSettings = $services->getMainConfig()->get( 'ParsoidSettings' );
if ( !empty( $parsoidSettings['debugApi'] ) ) {
return ApiDataAccess::fromSettings( $parsoidSettings );
}
return new MWDataAccess(
$services->getRepoGroup(),
$services->getBadFileLookup(),
$services->getHookContainer(),
$services->getContentTransformer(),
$services->getParserFactory() // *legacy* parser factory
);
},
'ParsoidPageConfigFactory' => static function ( MediaWikiServices $services ): MWPageConfigFactory {
return new MWPageConfigFactory( $services->getRevisionStore(),
$services->getSlotRoleRegistry() );
},
'ParsoidSiteConfig' => static function ( MediaWikiServices $services ): SiteConfig {
$mainConfig = $services->getMainConfig();
$parsoidSettings = $mainConfig->get( 'ParsoidSettings' );
if ( !empty( $parsoidSettings['debugApi'] ) ) {
return ApiSiteConfig::fromSettings( $parsoidSettings );
}
return new MWSiteConfig(
new ServiceOptions( MWSiteConfig::CONSTRUCTOR_OPTIONS, $mainConfig ),
$parsoidSettings,
$services->getObjectFactory(),
$services->getContentLanguage(),
$services->getStatsdDataFactory(),
$services->getMagicWordFactory(),
$services->getNamespaceInfo(),
$services->getSpecialPageFactory(),
$services->getInterwikiLookup(),
$services->getUserOptionsLookup(),
$services->getLanguageFactory(),
$services->getLanguageConverterFactory(),
$services->getLanguageNameUtils(),
// These arguments are temporary and will be removed once
// better solutions are found.
$services->getParser(), // T268776
$mainConfig, // T268777
$services->getHookContainer() // T300546
);
},
'PasswordFactory' => static function ( MediaWikiServices $services ): PasswordFactory {
$config = $services->getMainConfig();
return new PasswordFactory(

View file

@ -1807,6 +1807,12 @@ return [
],
'type' => 'object',
],
'ParsoidSettings' => [
'default' => [
'useSelser' => true,
],
'type' => 'object',
],
'ParserEnableLegacyMediaDOM' => [
'default' => true,
],

View file

@ -0,0 +1,416 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
namespace MediaWiki\Parser\Parsoid\Config;
use ContentHandler;
use File;
use LinkBatch;
use Linker;
use MediaTransformError;
use MediaWiki\BadFileLookup;
use MediaWiki\Content\Transform\ContentTransformer;
use MediaWiki\HookContainer\HookContainer;
use Parser;
use ParserFactory;
use RepoGroup;
use Title;
use Wikimedia\Parsoid\Config\DataAccess as IDataAccess;
use Wikimedia\Parsoid\Config\PageConfig as IPageConfig;
use Wikimedia\Parsoid\Config\PageContent as IPageContent;
use Wikimedia\Parsoid\Core\ContentMetadataCollector;
class DataAccess extends IDataAccess {
/** @var RepoGroup */
private $repoGroup;
/** @var BadFileLookup */
private $badFileLookup;
/** @var HookContainer */
private $hookContainer;
/** @var ContentTransformer */
private $contentTransformer;
/** @var Parser */
private $parser;
/** @var \PPFrame */
private $ppFrame;
/** @var ?PageConfig */
private $previousPageConfig;
/**
* @param RepoGroup $repoGroup
* @param BadFileLookup $badFileLookup
* @param HookContainer $hookContainer
* @param ContentTransformer $contentTransformer
* @param ParserFactory $parserFactory A legacy parser factory,
* for PST/preprocessing/extension handling
*/
public function __construct(
RepoGroup $repoGroup,
BadFileLookup $badFileLookup,
HookContainer $hookContainer,
ContentTransformer $contentTransformer,
ParserFactory $parserFactory
) {
$this->repoGroup = $repoGroup;
$this->badFileLookup = $badFileLookup;
$this->hookContainer = $hookContainer;
$this->contentTransformer = $contentTransformer;
// Use the same legacy parser object for all calls to extension tag
// processing, for greater compatibility.
$this->parser = $parserFactory->create();
$this->previousPageConfig = null; // ensure we initialize parser options
}
/**
* @param IPageConfig $pageConfig
* @param File $file
* @param array $hp
* @return array
*/
private function makeTransformOptions( IPageConfig $pageConfig, $file, array $hp ): array {
// Validate the input parameters like Parser::makeImage()
$handler = $file->getHandler();
if ( !$handler ) {
return []; // will get iconThumb()
}
foreach ( $hp as $name => $value ) {
if ( !$handler->validateParam( $name, $value ) ) {
unset( $hp[$name] );
}
}
// This part is similar to Linker::makeImageLink(). If there is no width,
// set one based on the source file size.
$page = $hp['page'] ?? 0;
if ( !isset( $hp['width'] ) ) {
if ( isset( $hp['height'] ) && $file->isVectorized() ) {
// If it's a vector image, and user only specifies height
// we don't want it to be limited by its "normal" width.
global $wgSVGMaxSize;
$hp['width'] = $wgSVGMaxSize;
} else {
$hp['width'] = $file->getWidth( $page );
}
// We don't need to fill in a default thumbnail width here, since
// that is done by Parsoid. Parsoid always sets the width parameter
// for thumbnails.
}
// Parser::makeImage() always sets this
$hp['targetlang'] = $pageConfig->getPageLanguage();
return $hp;
}
/** @inheritDoc */
public function getPageInfo( IPageConfig $pageConfig, array $titles ): array {
$titleObjs = [];
$pagemap = [];
$classes = [];
$ret = [];
foreach ( $titles as $name ) {
$t = Title::newFromText( $name );
// Filter out invalid titles. Title::newFromText in core (not our bespoke
// version in src/Utils/Title.php) can return null for invalid titles.
if ( !$t ) {
// FIXME: This is a bandaid to patch up the fact that Env::makeTitle treats
// this as a valid title, but Title::newFromText treats it as invalid.
// T237535
// This matches what ApiQuery::outputGeneralPageInfo() would
// return for an invalid title.
$ret[$name] = [
'pageId' => -1,
'revId' => -1,
'invalid' => true,
'invalidreason' => 'The requested page title is invalid',
];
} else {
$titleObjs[$name] = $t;
}
}
$linkBatch = new LinkBatch( $titleObjs );
$linkBatch->execute();
foreach ( $titleObjs as $obj ) {
$pdbk = $obj->getPrefixedDBkey();
$pagemap[$obj->getArticleID()] = $pdbk;
$classes[$pdbk] = $obj->isRedirect() ? 'mw-redirect' : '';
}
$context_title = Title::newFromText( $pageConfig->getTitle() );
$this->hookContainer->run(
'GetLinkColours',
[ $pagemap, &$classes, $context_title ]
);
foreach ( $titleObjs as $name => $obj ) {
/** @var Title $obj */
$pdbk = $obj->getPrefixedDBkey();
$c = preg_split(
'/\s+/', $classes[$pdbk] ?? '', -1, PREG_SPLIT_NO_EMPTY
);
$ret[$name] = [
'pageId' => $obj->getArticleID(),
'revId' => $obj->getLatestRevID(),
'missing' => !$obj->exists(),
'known' => $obj->isKnown(),
'redirect' => $obj->isRedirect(),
'linkclasses' => $c, # See ApiQueryInfo::getLinkClasses() in core
];
}
return $ret;
}
/** @inheritDoc */
public function getFileInfo( IPageConfig $pageConfig, array $files ): array {
$page = Title::newFromText( $pageConfig->getTitle() );
$keys = [];
foreach ( $files as $f ) {
$keys[] = $f[0];
}
$fileObjs = $this->repoGroup->findFiles( $keys );
$ret = [];
foreach ( $files as $f ) {
$filename = $f[0];
$dims = $f[1];
/** @var File $file */
$file = $fileObjs[$filename] ?? null;
if ( !$file ) {
$ret[] = null;
continue;
}
// See Linker::makeImageLink; 'page' is a key in $handlerParams
// core uses 'false' as the default then casts to (int) => 0
$pageNum = $dims['page'] ?? 0;
$result = [
'width' => $file->getWidth( $pageNum ),
'height' => $file->getHeight( $pageNum ),
'size' => $file->getSize(),
'mediatype' => $file->getMediaType(),
'mime' => $file->getMimeType(),
'url' => $file->getFullUrl(),
'mustRender' => $file->mustRender(),
'badFile' => $this->badFileLookup->isBadFile( $filename, $page ?: false ),
];
$length = $file->getLength();
if ( $length ) {
$result['duration'] = (float)$length;
}
if ( isset( $dims['seek'] ) ) {
$dims['thumbtime'] = $dims['seek'];
}
$txopts = $this->makeTransformOptions( $pageConfig, $file, $dims );
$mto = $file->transform( $txopts );
if ( $mto ) {
if ( $mto->isError() && $mto instanceof MediaTransformError ) {
$result['thumberror'] = $mto->toText();
} else {
if ( $txopts ) {
// Do srcset scaling
Linker::processResponsiveImages( $file, $mto, $txopts );
if ( count( $mto->responsiveUrls ) ) {
$result['responsiveUrls'] = [];
foreach ( $mto->responsiveUrls as $density => $url ) {
$result['responsiveUrls'][$density] = $url;
}
}
}
// Proposed MediaTransformOutput serialization method for T51896 etc.
// Note that getAPIData(['fullurl']) would return
// wfExpandUrl(), which wouldn't respect the wiki's
// protocol preferences -- instead it would use the
// protocol used for the API request.
if ( is_callable( [ $mto, 'getAPIData' ] ) ) {
$result['thumbdata'] = $mto->getAPIData( [ 'withhash' ] );
}
$result['thumburl'] = $mto->getUrl();
$result['thumbwidth'] = $mto->getWidth();
$result['thumbheight'] = $mto->getHeight();
}
} else {
$result['thumberror'] = "Presumably, invalid parameters, despite validation.";
}
$ret[] = $result;
}
return $ret;
}
/**
* Prepare MediaWiki's parser for preprocessing or extension tag parsing,
* clearing its state if necessary.
*
* @param IPageConfig $pageConfig
* @param int $outputType
* @return Parser
*/
private function prepareParser( IPageConfig $pageConfig, int $outputType ) {
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
// Clear the state only when the PageConfig changes, so that Parser's internal caches can
// be retained. This should also provide better compatibility with extension tags.
$clearState = $this->previousPageConfig !== $pageConfig;
$this->previousPageConfig = $pageConfig;
$this->parser->startExternalParse(
Title::newFromText( $pageConfig->getTitle() ), $pageConfig->getParserOptions(),
$outputType, $clearState, $pageConfig->getRevisionId() );
$this->parser->resetOutput();
// Retain a PPFrame object between preprocess requests since it contains
// some useful caches.
if ( $clearState ) {
$this->ppFrame = $this->parser->getPreprocessor()->newFrame();
}
return $this->parser;
}
/** @inheritDoc */
public function doPst( IPageConfig $pageConfig, string $wikitext ): string {
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
// This could use prepareParser(), but it's only called once per page,
// so it's not essential.
$titleObj = Title::newFromText( $pageConfig->getTitle() );
$user = $pageConfig->getParserOptions()->getUserIdentity();
$content = ContentHandler::makeContent( $wikitext, $titleObj, CONTENT_MODEL_WIKITEXT );
return $this->contentTransformer->preSaveTransform(
$content,
$titleObj,
$user,
$pageConfig->getParserOptions()
)->serialize();
}
/** @inheritDoc */
public function parseWikitext(
IPageConfig $pageConfig,
ContentMetadataCollector $metadata,
string $wikitext
): string {
$parser = $this->prepareParser( $pageConfig, Parser::OT_HTML );
$html = $parser->parseExtensionTagAsTopLevelDoc( $wikitext );
// XXX: Ideally we will eventually have the legacy parser use our
// ContentMetadataCollector instead of having a new ParserOutput
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
// which we then have to manually merge.
$out = $parser->getOutput();
$out->setText( $html );
$out->collectMetadata( $metadata ); # merges $out into $metadata
return $out->getText( [ 'unwrap' => true ] ); # HTML
}
/** @inheritDoc */
public function preprocessWikitext(
IPageConfig $pageConfig,
ContentMetadataCollector $metadata,
string $wikitext
): string {
$parser = $this->prepareParser( $pageConfig, Parser::OT_PREPROCESS );
$this->hookContainer->run(
'ParserBeforePreprocess',
[ $parser, &$wikitext, $parser->getStripState() ]
);
$wikitext = $parser->replaceVariables( $wikitext, $this->ppFrame );
// FIXME (T289545): StripState markers protect content that need to be protected from further
// "wikitext processing". So, where the result has strip state markers, we actually
// need to tunnel this content through rather than unwrap and let it go through the
// rest of the parsoid pipeline. For example, some parser functions might return HTML
// not wikitext, and where the content might contain wikitext characters, we are now
// going to potentially mangle that output.
$wikitext = $parser->getStripState()->unstripBoth( $wikitext );
// XXX: Ideally we will eventually have the legacy parser use our
// ContentMetadataCollector instead of having an new ParserOutput
// created (implicitly in ::prepareParser()/Parser::resetOutput() )
// which we then have to manually merge.
$out = $parser->getOutput();
$out->collectMetadata( $metadata ); # merges $out into $metadata
return $wikitext;
}
/** @inheritDoc */
public function fetchTemplateSource(
IPageConfig $pageConfig, string $title
): ?IPageContent {
'@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig
$titleObj = Title::newFromText( $title );
// Use the PageConfig to take advantage of custom template
// fetch hooks like FlaggedRevisions, etc.
$revRecord = $pageConfig->fetchRevisionRecordOfTemplate( $titleObj );
return $revRecord ? new PageContent( $revRecord ) : null;
}
/** @inheritDoc */
public function fetchTemplateData( IPageConfig $pageConfig, string $title ): ?array {
$ret = [];
// @todo: Document this hook in MediaWiki / Extension:TemplateData
$this->hookContainer->run(
'ParserFetchTemplateData', [ [ $title ], &$ret ]
);
// Cast value to array since the hook returns this as a stdclass
$tplData = $ret[$title] ?? null;
if ( $tplData ) {
// Deep convert to associative array
$tplData = json_decode( json_encode( $tplData ), true );
}
return $tplData;
}
/** @inheritDoc */
public function logLinterData( IPageConfig $pageConfig, array $lints ): void {
global $wgReadOnly;
if ( $wgReadOnly ) {
return;
}
$revId = $pageConfig->getRevisionId();
$title = $pageConfig->getTitle();
$pageInfo = $this->getPageInfo( $pageConfig, [ $title ] );
$latest = $pageInfo[$title]['revId'];
// Only send the request if it the latest revision
if ( $revId !== null && $revId === $latest ) {
// @todo: Document this hook in MediaWiki / Extension:Linter
$this->hookContainer->run(
'ParserLogLinterData', [ $title, $revId, $lints ]
);
}
}
}

View file

@ -0,0 +1,241 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
namespace MediaWiki\Parser\Parsoid\Config;
use Language;
use MediaWiki\MediaWikiServices;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SlotRoleHandler;
use ParserOptions;
use Title;
use Wikimedia\Parsoid\Config\PageConfig as IPageConfig;
use Wikimedia\Parsoid\Config\PageContent as IPageContent;
/**
* Page-level configuration interface for Parsoid
*
* @todo This belongs in MediaWiki, not Parsoid. We'll move it there when we
* get to the point of integrating the two.
* @todo We should probably deprecate ParserOptions somehow, using a version of
* this directly instead.
*/
class PageConfig extends IPageConfig {
/** @var ParserOptions */
private $parserOptions;
/** @var SlotRoleHandler */
private $slotRoleHandler;
/** @var Title */
private $title;
/** @var ?RevisionRecord */
private $revision;
/** @var string|null */
private $pagelanguage;
/** @var string|null */
private $pagelanguageDir;
/**
* @param ParserOptions $parserOptions
* @param SlotRoleHandler $slotRoleHandler
* @param Title $title Title being parsed
* @param ?RevisionRecord $revision
* @param ?string $pagelanguage
* @param ?string $pagelanguageDir
*/
public function __construct(
ParserOptions $parserOptions,
SlotRoleHandler $slotRoleHandler, Title $title,
?RevisionRecord $revision = null, ?string $pagelanguage = null,
?string $pagelanguageDir = null
) {
$this->parserOptions = $parserOptions;
$this->slotRoleHandler = $slotRoleHandler;
$this->title = $title;
$this->revision = $revision;
$this->pagelanguage = $pagelanguage;
$this->pagelanguageDir = $pagelanguageDir;
}
/**
* Get content model
* @return string
*/
public function getContentModel(): string {
// @todo Check just the main slot, or all slots, or what?
$rev = $this->getRevision();
if ( $rev ) {
$content = $rev->getContent( SlotRecord::MAIN );
if ( $content ) {
return $content->getModel();
} else {
// The page does have a content model but we can't see it. Returning the
// default model is not really correct. But we can't see the content either
// so it won't matter much what we do here.
return $this->slotRoleHandler->getDefaultModel( $this->title );
}
} else {
return $this->slotRoleHandler->getDefaultModel( $this->title );
}
}
public function hasLintableContentModel(): bool {
// @todo Check just the main slot, or all slots, or what?
$content = $this->getRevisionContent();
$model = $content ? $content->getModel( SlotRecord::MAIN ) : null;
return $content && ( $model === CONTENT_MODEL_WIKITEXT || $model === 'proofread-page' );
}
/** @inheritDoc */
public function getTitle(): string {
return $this->title->getPrefixedText();
}
/** @inheritDoc */
public function getNs(): int {
return $this->title->getNamespace();
}
/** @inheritDoc */
public function getPageId(): int {
return $this->title->getArticleID();
}
/** @inheritDoc */
public function getPageLanguage(): string {
return $this->pagelanguage ??
$this->title->getPageLanguage()->getCode();
}
/**
* Helper function: get the Language object corresponding to
* PageConfig::getPageLanguage()
* @return Language
*/
private function getPageLanguageObject(): Language {
return $this->pagelanguage ?
MediaWikiServices::getInstance()->getLanguageFactory()
->getLanguage( $this->pagelanguage ) :
$this->title->getPageLanguage();
}
/** @inheritDoc */
public function getPageLanguageDir(): string {
return $this->pagelanguageDir ??
$this->getPageLanguageObject()->getDir();
}
/**
* @return ParserOptions
*/
public function getParserOptions(): ParserOptions {
return $this->parserOptions;
}
/**
* Use ParserOptions::getTemplateCallback() to fetch the correct
* (usually latest) RevisionRecord for the given title.
*
* @param Title $title
* @return ?RevisionRecord
*/
public function fetchRevisionRecordOfTemplate( Title $title ): ?RevisionRecord {
// See Parser::fetchTemplateAndTitle(), but stateless
// (Parsoid will track dependencies, etc, itself.)
// The callback defaults to Parser::statelessFetchTemplate()
$templateCb = $this->parserOptions->getTemplateCallback();
$stuff = call_user_func( $templateCb, $title, $this );
if ( isset( $stuff['revision-record'] ) ) {
$revRecord = $stuff['revision-record'];
} else {
$revRecord = null;
}
return $revRecord;
}
/**
* @return ?RevisionRecord
*/
private function getRevision(): ?RevisionRecord {
return $this->revision;
}
/** @inheritDoc */
public function getRevisionId(): ?int {
$rev = $this->getRevision();
return $rev ? $rev->getId() : null;
}
/** @inheritDoc */
public function getParentRevisionId(): ?int {
$rev = $this->getRevision();
return $rev ? $rev->getParentId() : null;
}
/** @inheritDoc */
public function getRevisionTimestamp(): ?string {
$rev = $this->getRevision();
return $rev ? $rev->getTimestamp() : null;
}
/** @inheritDoc */
public function getRevisionUser(): ?string {
$rev = $this->getRevision();
$user = $rev ? $rev->getUser() : null;
return $user ? $user->getName() : null;
}
/** @inheritDoc */
public function getRevisionUserId(): ?int {
$rev = $this->getRevision();
$user = $rev ? $rev->getUser() : null;
return $user ? $user->getId() : null;
}
/** @inheritDoc */
public function getRevisionSha1(): ?string {
$rev = $this->getRevision();
if ( $rev ) {
// This matches what the Parsoid/JS gets from the API
// FIXME: Maybe we don't need to do this in the future?
return \Wikimedia\base_convert( $rev->getSha1(), 36, 16, 40 );
} else {
return null;
}
}
/** @inheritDoc */
public function getRevisionSize(): ?int {
$rev = $this->getRevision();
return $rev ? $rev->getSize() : null;
}
/** @inheritDoc */
public function getRevisionContent(): ?IPageContent {
$rev = $this->getRevision();
return $rev ? new PageContent( $rev ) : null;
}
}

View file

@ -0,0 +1,182 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
namespace MediaWiki\Parser\Parsoid\Config;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\RevisionStore;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SlotRoleRegistry;
use MediaWiki\User\UserIdentity;
use ParserOptions;
use Title;
use Wikimedia\Parsoid\Config\Api\PageConfig as ApiPageConfig;
use WikitextContent;
class PageConfigFactory extends \Wikimedia\Parsoid\Config\PageConfigFactory {
/** @var RevisionStore */
private $revisionStore;
/** @var SlotRoleRegistry */
private $slotRoleRegistry;
/**
* @param RevisionStore $revisionStore
* @param SlotRoleRegistry $slotRoleRegistry
*/
public function __construct(
RevisionStore $revisionStore,
SlotRoleRegistry $slotRoleRegistry
) {
$this->revisionStore = $revisionStore;
$this->slotRoleRegistry = $slotRoleRegistry;
}
/**
* Create a new PageConfig.
*
* Note that Parsoid isn't supposed to use the user context by design; all
* user-specific processing is expected to be introduced as a post-parse
* transform. The $user parameter is therefore usually null, especially
* in background job parsing, although there are corner cases during
* extension processing where a non-null $user could affect the output.
*
* @param LinkTarget $title The page represented by the PageConfig.
* @param ?UserIdentity $user User who is doing rendering (for parsing options).
* @param ?int $revisionId The revision of the page.
* @param ?string $wikitextOverride Wikitext to use instead of the
* contents of the specific $revision; used when $revision is null
* (a new page) or when we are parsing a stashed text.
* @param ?string $pagelanguageOverride
* @param ?array $parsoidSettings Used to enable the debug API if requested
* @return \Wikimedia\Parsoid\Config\PageConfig
*/
public function create(
LinkTarget $title, ?UserIdentity $user = null, ?int $revisionId = null,
?string $wikitextOverride = null, ?string $pagelanguageOverride = null,
?array $parsoidSettings = null
): \Wikimedia\Parsoid\Config\PageConfig {
$title = Title::newFromLinkTarget( $title );
if ( !empty( $parsoidSettings['debugApi'] ) ) {
return ApiPageConfig::fromSettings( $parsoidSettings, [
"title" => $title->getPrefixedText(),
"pageContent" => $wikitextOverride,
"pageLanguage" => $pagelanguageOverride,
"revid" => $revisionId,
"loadData" => true,
] );
}
if ( $revisionId === null ) {
// Fetch the 'latest' revision for the given title.
// Note: This initial fetch of the page context revision is
// *not* using Parser::fetchCurrentRevisionRecordOfTitle()
// (which usually invokes Parser::statelessFetchRevisionRecord
// and from there RevisionStore::getKnownCurrentRevision)
// because we don't have a Parser object to give to that callback.
// We could create one if needed for greater compatibility.
$revisionRecord = $this->revisionStore->getKnownCurrentRevision(
$title
) ?: null;
// Note that $revisionRecord could still be null here if no
// page with that $title yet exists.
} else {
// Fetch the correct revision record by the supplied id.
// This accesses the replica DB and may (or may not) fail over to
// the primary DB if the revision isn't found.
$revisionRecord = $this->revisionStore->getRevisionById(
$revisionId
);
if ( $revisionRecord === null ) {
// This revision really ought to exist. Check the primary DB.
// This *could* cause two requests to the primary DB if there
// were pending writes, but this codepath should be very rare.
// [T259855]
$revisionRecord = $this->revisionStore->getRevisionById(
$revisionId, RevisionStore::READ_LATEST
);
$success = ( $revisionRecord !== null ) ? 'success' : 'failure';
LoggerFactory::getInstance( 'Parsoid' )->error(
"Retried revision fetch after failure: {$success}", [
'id' => $revisionId,
'title' => $title->getPrefixedText(),
]
);
}
if ( $revisionRecord === null ) {
throw new RevisionAccessException( "Can't find revision {$revisionId}" );
}
}
// If we have a revision record, check that we are allowed to see it.
// Mirrors the check from RevisionRecord::getContent
if (
$revisionRecord !== null &&
!$revisionRecord->audienceCan(
RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC
)
) {
throw new RevisionAccessException( 'Not an available content version.' );
}
if ( $wikitextOverride !== null ) {
if ( $revisionRecord ) {
// PORT-FIXME this is not really the right thing to do; need
// a clone-like constructor for MutableRevisionRecord
$revisionRecord = MutableRevisionRecord::newFromParentRevision(
$revisionRecord
);
} else {
$revisionRecord = new MutableRevisionRecord( $title );
}
$revisionRecord->setSlot(
SlotRecord::newUnsaved(
SlotRecord::MAIN,
new WikitextContent( $wikitextOverride )
)
);
}
$parserOptions =
$user
? ParserOptions::newFromUser( $user )
: ParserOptions::newFromAnon();
// Turn off some options since Parsoid/JS currently doesn't
// do anything with this. As we proceed with closer integration,
// we can figure out if there is any value to these limit reports.
$parserOptions->setOption( 'enableLimitReport', false );
$slotRoleHandler = $this->slotRoleRegistry->getRoleHandler( SlotRecord::MAIN );
return new PageConfig(
$parserOptions,
$slotRoleHandler,
$title,
$revisionRecord,
$pagelanguageOverride
);
}
}

View file

@ -0,0 +1,91 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
namespace MediaWiki\Parser\Parsoid\Config;
use InvalidArgumentException;
use MediaWiki\Revision\RevisionRecord;
use Wikimedia\Parsoid\Config\PageContent as IPageContent;
/**
* PageContent implementation for MediaWiki
*
* @todo This belongs in MediaWiki, not Parsoid. We'll move it there when we
* get to the point of integrating the two.
*/
class PageContent extends IPageContent {
/** @var RevisionRecord */
private $rev;
/**
* @param RevisionRecord $rev
*/
public function __construct( RevisionRecord $rev ) {
$this->rev = $rev;
}
/** @inheritDoc */
public function getRoles(): array {
return $this->rev->getSlotRoles();
}
/** @inheritDoc */
public function hasRole( string $role ): bool {
return $this->rev->hasSlot( $role );
}
/**
* Throw if the revision doesn't have the named role
* @param string $role
* @throws InvalidArgumentException
*/
private function checkRole( string $role ): void {
if ( !$this->rev->hasSlot( $role ) ) {
throw new InvalidArgumentException( "PageContent does not have role '$role'" );
}
}
/** @inheritDoc */
public function getModel( string $role ): string {
$this->checkRole( $role );
return $this->rev->getContent( $role )->getModel();
}
/** @inheritDoc */
public function getFormat( string $role ): string {
$this->checkRole( $role );
return $this->rev->getContent( $role )->getDefaultFormat();
}
/** @inheritDoc */
public function getContent( string $role ): string {
$this->checkRole( $role );
return $this->rev->getContent( $role )->serialize();
}
/** @inheritDoc */
public function getRedirectTarget(): ?string {
$content = $this->rev->getContent( 'main' );
$target = $content ? $content->getRedirectTarget() : null;
return $target ? $target->getPrefixedDBkey() : null;
}
}

View file

@ -0,0 +1,729 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
// NO_PRELOAD -- anonymous class in parent
namespace MediaWiki\Parser\Parsoid\Config;
use Config;
use ExtensionRegistry;
use Language;
use LanguageConverter;
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use MagicWordArray;
use MagicWordFactory;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\Interwiki\InterwikiLookup;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\SpecialPage\SpecialPageFactory;
use MediaWiki\User\UserOptionsLookup;
use MutableConfig;
use MWException;
use NamespaceInfo;
use Parser;
use ParserOutput;
use PrefixingStatsdDataFactoryProxy;
use Psr\Log\LoggerInterface;
use Title;
use UnexpectedValueException;
use WikiMap;
use Wikimedia\ObjectFactory\ObjectFactory;
use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig;
use Wikimedia\Parsoid\Core\ContentMetadataCollector;
use Wikimedia\Parsoid\DOM\Document;
use Wikimedia\Parsoid\Utils\Utils;
/**
* Site-level configuration for Parsoid
*
* This includes both global configuration and wiki-level configuration.
*
* @todo This belongs in MediaWiki, not Parsoid. We'll move it there when we
* get to the point of integrating the two.
*/
class SiteConfig extends ISiteConfig {
/**
* Regular expression fragment for matching wikitext comments.
* Meant for inclusion in other regular expressions.
*/
protected const COMMENT_REGEXP_FRAGMENT = '<!--(?>[\s\S]*?-->)';
public const CONSTRUCTOR_OPTIONS = [
'GalleryOptions',
'AllowExternalImages',
'AllowExternalImagesFrom',
'Server',
'ArticlePath',
'InterwikiMagic',
'ExtraInterlanguageLinkPrefixes',
'LocalInterwikis',
'LanguageCode',
'NamespaceAliases',
'UrlProtocols',
'Script',
'ScriptPath',
'LoadScript',
'LocalTZoffset',
'ThumbLimits',
'MaxTemplateDepth',
];
/** @var ServiceOptions */
private $config;
/** @var Config */
private $optionalConfig;
/** @var array Parsoid-specific options array from $config */
private $parsoidSettings;
/** @var Language */
private $contLang;
/** @var StatsdDataFactoryInterface */
private $stats;
/** @var MagicWordFactory */
private $magicWordFactory;
/** @var NamespaceInfo */
private $namespaceInfo;
/** @var SpecialPageFactory */
private $specialPageFactory;
/** @var InterwikiLookup */
private $interwikiLookup;
/** @var Parser */
private $parser;
/** @var UserOptionsLookup */
private $userOptionsLookup;
/** @var ObjectFactory */
private $objectFactory;
/** @var LanguageFactory */
private $languageFactory;
/** @var LanguageConverterFactory */
private $languageConverterFactory;
/** @var LanguageNameUtils */
private $languageNameUtils;
/** @var string|null */
private $baseUri;
/** @var string|null */
private $relativeLinkPrefix;
/** @var array|null */
private $interwikiMap;
/** @var array|null */
private $variants;
/** @var array */
private $extensionTags;
/**
* @param ServiceOptions $config MediaWiki main configuration object
* @param array $parsoidSettings Parsoid-specific options array from main configuration.
* @param ObjectFactory $objectFactory
* @param Language $contentLanguage Content language.
* @param StatsdDataFactoryInterface $stats
* @param MagicWordFactory $magicWordFactory
* @param NamespaceInfo $namespaceInfo
* @param SpecialPageFactory $specialPageFactory
* @param InterwikiLookup $interwikiLookup
* @param UserOptionsLookup $userOptionsLookup
* @param LanguageFactory $languageFactory
* @param LanguageConverterFactory $languageConverterFactory
* @param LanguageNameUtils $languageNameUtils
* @param Parser $parser
* @param Config $optionalConfig
* @param HookContainer $hookContainer
*/
public function __construct(
ServiceOptions $config,
array $parsoidSettings,
ObjectFactory $objectFactory,
Language $contentLanguage,
StatsdDataFactoryInterface $stats,
MagicWordFactory $magicWordFactory,
NamespaceInfo $namespaceInfo,
SpecialPageFactory $specialPageFactory,
InterwikiLookup $interwikiLookup,
UserOptionsLookup $userOptionsLookup,
LanguageFactory $languageFactory,
LanguageConverterFactory $languageConverterFactory,
LanguageNameUtils $languageNameUtils,
// These arguments are temporary and will be removed once
// better solutions are found.
Parser $parser, // T268776
Config $optionalConfig, // T268777
HookContainer $hookContainer // T300546
) {
parent::__construct();
$config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
$this->config = $config;
$this->optionalConfig = $optionalConfig;
$this->parsoidSettings = $parsoidSettings;
$this->objectFactory = $objectFactory;
$this->contLang = $contentLanguage;
$this->stats = $stats;
$this->magicWordFactory = $magicWordFactory;
$this->namespaceInfo = $namespaceInfo;
$this->specialPageFactory = $specialPageFactory;
$this->interwikiLookup = $interwikiLookup;
$this->parser = $parser;
$this->userOptionsLookup = $userOptionsLookup;
$this->languageFactory = $languageFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->languageNameUtils = $languageNameUtils;
// Override parent default
// Override parent default
if ( isset( $this->parsoidSettings['linting'] ) ) {
// @todo: Add this setting to MW's DefaultSettings.php
$this->linterEnabled = $this->parsoidSettings['linting'];
}
if ( isset( $this->parsoidSettings['wt2htmlLimits'] ) ) {
$this->wt2htmlLimits = $this->parsoidSettings['wt2htmlLimits'] + $this->wt2htmlLimits;
}
if ( isset( $this->parsoidSettings['html2wtLimits'] ) ) {
$this->html2wtLimits = $this->parsoidSettings['html2wtLimits'] + $this->html2wtLimits;
}
// Register extension modules
// TODO: inject this (T257586)
$parsoidModules = ExtensionRegistry::getInstance()->getAttribute( 'ParsoidModules' );
foreach ( $parsoidModules as $configOrSpec ) {
$this->registerExtensionModule( $configOrSpec );
}
// This is a temporary hook and will be removed! T300546
$hookContainer->run( 'ParsoidSiteConfigInit', [ $this ] );
}
/** @inheritDoc */
public function getObjectFactory(): ObjectFactory {
return $this->objectFactory;
}
/** @inheritDoc */
public function getLogger(): LoggerInterface {
// TODO: inject
if ( $this->logger === null ) {
$this->logger = LoggerFactory::getInstance( 'Parsoid' );
}
return $this->logger;
}
public function metrics(): ?StatsdDataFactoryInterface {
// TODO: inject
static $prefixedMetrics = null;
if ( $prefixedMetrics === null ) {
$prefixedMetrics = new PrefixingStatsdDataFactoryProxy(
// Our stats will also get prefixed with 'MediaWiki.'
$this->stats,
$this->parsoidSettings['metricsPrefix'] ?? 'Parsoid.'
);
}
return $prefixedMetrics;
}
public function nativeGalleryEnabled(): bool {
return $this->parsoidSettings['nativeGalleryEnabled'] ?? false;
}
public function galleryOptions(): array {
return $this->config->get( 'GalleryOptions' );
}
public function allowedExternalImagePrefixes(): array {
if ( $this->config->get( 'AllowExternalImages' ) ) {
return [ '' ];
} else {
$allowFrom = $this->config->get( 'AllowExternalImagesFrom' );
return $allowFrom ? (array)$allowFrom : [];
}
}
/**
* Determine the article base URI and relative prefix
*
* Populates `$this->baseUri` and `$this->relativeLinkPrefix` based on
* `$wgServer` and `$wgArticlePath`, by splitting it at the last '/' in the
* path portion.
*/
private function determineArticlePath(): void {
$url = $this->config->get( 'Server' ) . $this->config->get( 'ArticlePath' );
if ( substr( $url, -2 ) !== '$1' ) {
throw new UnexpectedValueException( "Article path '$url' does not have '$1' at the end" );
}
$url = substr( $url, 0, -2 );
$bits = wfParseUrl( $url );
if ( !$bits ) {
throw new UnexpectedValueException( "Failed to parse article path '$url'" );
}
if ( empty( $bits['path'] ) ) {
$path = '/';
} else {
$path = wfRemoveDotSegments( $bits['path'] );
}
$relParts = [ 'query' => true, 'fragment' => true ];
$base = array_diff_key( $bits, $relParts );
$rel = array_intersect_key( $bits, $relParts );
$i = strrpos( $path, '/' );
$base['path'] = substr( $path, 0, $i + 1 );
$rel['path'] = '.' . substr( $path, $i );
$this->baseUri = wfAssembleUrl( $base );
$this->relativeLinkPrefix = wfAssembleUrl( $rel );
}
public function baseURI(): string {
if ( $this->baseUri === null ) {
$this->determineArticlePath();
}
return $this->baseUri;
}
public function relativeLinkPrefix(): string {
if ( $this->relativeLinkPrefix === null ) {
$this->determineArticlePath();
}
return $this->relativeLinkPrefix;
}
/**
* This is very similar to MagicWordArray::getBaseRegex() except we
* don't emit the named grouping constructs, which can cause havoc
* when embedded in other regexps with grouping constructs.
*
* @param MagicWordArray $magicWordArray
* @param string $delimiter
* @return string
*/
private static function mwaToRegex(
MagicWordArray $magicWordArray,
string $delimiter = '/'
): string {
return implode( '|', $magicWordArray->getBaseRegex( false, $delimiter ) );
}
public function redirectRegexp(): string {
$redirect = self::mwaToRegex( $this->magicWordFactory->newArray( [ 'redirect' ] ), '@' );
return "@$redirect@Su";
}
public function categoryRegexp(): string {
$canon = $this->namespaceInfo->getCanonicalName( NS_CATEGORY );
$result = [ $canon ];
foreach ( $this->contLang->getNamespaceAliases() as $alias => $ns ) {
if ( $ns === NS_CATEGORY && $alias !== $canon ) {
$result[] = $alias;
}
}
$category = implode( '|', array_map( function ( $v ) {
return $this->quoteTitleRe( $v, '@' );
}, $result ) );
return "@(?i:$category)@";
}
public function bswRegexp(): string {
$bsw = self::mwaToRegex( $this->magicWordFactory->getDoubleUnderscoreArray(), '@' );
// Aliases for double underscore mws include the underscores
// So, strip them since the base regexp will have included them
// and they aren't expected at the use sites of bswRegexp
$bsw = str_replace( '__', '', $bsw );
return "@$bsw@Su";
}
/** @inheritDoc */
public function canonicalNamespaceId( string $name ): ?int {
$ret = $this->namespaceInfo->getCanonicalIndex( $name );
return $ret === false ? null : $ret;
}
/** @inheritDoc */
public function namespaceId( string $name ): ?int {
$ret = $this->contLang->getNsIndex( $name );
return $ret === false ? null : $ret;
}
/** @inheritDoc */
public function namespaceName( int $ns ): ?string {
$ret = $this->contLang->getFormattedNsText( $ns );
return $ret === '' && $ns !== NS_MAIN ? null : $ret;
}
/** @inheritDoc */
public function namespaceHasSubpages( int $ns ): bool {
return $this->namespaceInfo->hasSubpages( $ns );
}
/** @inheritDoc */
public function namespaceCase( int $ns ): string {
return $this->namespaceInfo->isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive';
}
/** @inheritDoc */
public function namespaceIsTalk( int $ns ): bool {
return $this->namespaceInfo->isTalk( $ns );
}
/** @inheritDoc */
public function ucfirst( string $str ): string {
return $this->contLang->ucfirst( $str );
}
/** @inheritDoc */
public function specialPageLocalName( string $alias ): ?string {
$aliases = $this->specialPageFactory->resolveAlias( $alias );
return $aliases[0] !== null ? $this->specialPageFactory->getLocalNameFor( ...$aliases ) : $alias;
}
public function interwikiMagic(): bool {
return $this->config->get( 'InterwikiMagic' );
}
public function interwikiMap(): array {
// Unfortunate that this mostly duplicates \ApiQuerySiteinfo::appendInterwikiMap()
if ( $this->interwikiMap === null ) {
$this->interwikiMap = [];
$getPrefixes = $this->interwikiLookup->getAllPrefixes();
$langNames = $this->languageNameUtils->getLanguageNames();
$extraLangPrefixes = $this->config->get( 'ExtraInterlanguageLinkPrefixes' );
$localInterwikis = $this->config->get( 'LocalInterwikis' );
foreach ( $getPrefixes as $row ) {
$prefix = $row['iw_prefix'];
$val = [];
$val['prefix'] = $prefix;
// ApiQuerySiteInfo::appendInterwikiMap uses PROTO_CURRENT here,
// but that's the 'current' protocol *of the API request*; use
// PROTO_CANONICAL instead.
$val['url'] = wfExpandUrl( $row['iw_url'], PROTO_CANONICAL );
// Fix up broken interwiki hrefs that are missing a $1 placeholder
// Just append the placeholder at the end.
// This makes sure that the interwikiMatcher adds one match
// group per URI, and that interwiki links work as expected.
if ( strpos( $val['url'], '$1' ) === false ) {
$val['url'] .= '$1';
}
if ( substr( $row['iw_url'], 0, 2 ) == '//' ) {
$val['protorel'] = true;
}
if ( isset( $row['iw_local'] ) && $row['iw_local'] == '1' ) {
$val['local'] = true;
}
if ( isset( $langNames[$prefix] ) ) {
$val['language'] = true;
}
if ( in_array( $prefix, $localInterwikis, true ) ) {
$val['localinterwiki'] = true;
}
if ( in_array( $prefix, $extraLangPrefixes, true ) ) {
$val['extralanglink'] = true;
/**
* ApiQuerySiteinfo adds a 'linktext' field, but Parsoid
* doesn't use this -- and because it uses wfMessage()
* it implicitly uses a MessageCache which would have to
* be injected here.
*/
// $linktext = wfMessage( "interlanguage-link-$prefix" );
// if ( !$linktext->isDisabled() ) {
// $val['linktext'] = $linktext->text();
// }
}
$this->interwikiMap[$prefix] = $val;
}
}
return $this->interwikiMap;
}
public function iwp(): string {
return WikiMap::getCurrentWikiId();
}
public function legalTitleChars(): string {
return Title::legalChars();
}
public function linkPrefixRegex(): ?string {
if ( !$this->contLang->linkPrefixExtension() ) {
return null;
}
return '/[' . $this->contLang->linkPrefixCharset() . ']+$/Du';
}
/** @inheritDoc */
protected function linkTrail(): string {
return $this->contLang->linkTrail();
}
public function lang(): string {
return $this->config->get( 'LanguageCode' );
}
public function mainpage(): string {
// @todo Perhaps should inject TitleFactory here?
return Title::newMainPage()->getPrefixedText();
}
public function responsiveReferences(): array {
// @todo This is from the Cite extension, which shouldn't be known about by core
// T268777
return [
'enabled' => $this->optionalConfig->has( 'CiteResponsiveReferences' ) ?
$this->optionalConfig->get( 'CiteResponsiveReferences' ) : false,
'threshold' => 10,
];
}
public function rtl(): bool {
return $this->contLang->isRTL();
}
/** @inheritDoc */
public function langConverterEnabled( string $lang ): bool {
if ( $this->languageConverterFactory->isConversionDisabled() ) {
return false;
}
if ( !in_array( $lang, LanguageConverter::$languagesWithVariants, true ) ) {
return false;
}
try {
$langObject = $this->languageFactory->getLanguage( $lang );
$converter = $this->languageConverterFactory->getLanguageConverter( $langObject );
return $converter->hasVariants();
} catch ( MWException $ex ) {
// Probably a syntactically invalid language code
return false;
}
}
public function script(): string {
return $this->config->get( 'Script' );
}
public function scriptpath(): string {
return $this->config->get( 'ScriptPath' );
}
public function server(): string {
return $this->config->get( 'Server' );
}
/** @inheritDoc */
public function exportMetadataToHead(
Document $document,
ContentMetadataCollector $metadata,
string $defaultTitle,
string $lang
): void {
'@phan-var ParserOutput $metadata'; // @var ParserOutput $metadata
// Look for a displaytitle.
$displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?:
// Use the default title, properly escaped
Utils::escapeHtml( $defaultTitle );
$this->exportMetadataHelper(
$document,
$this->config->get( 'LoadScript' ),
$metadata->getModules(),
$metadata->getModuleStyles(),
$metadata->getJsConfigVars(),
$displayTitle,
$lang
);
}
public function timezoneOffset(): int {
return $this->config->get( 'LocalTZoffset' );
}
public function variants(): array {
if ( $this->variants === null ) {
$this->variants = [];
$langNames = LanguageConverter::$languagesWithVariants;
if ( $this->languageConverterFactory->isConversionDisabled() ) {
// Ensure result is empty if language conversion is disabled.
$langNames = [];
}
foreach ( $langNames as $langCode ) {
$lang = $this->languageFactory->getLanguage( $langCode );
$converter = $this->languageConverterFactory->getLanguageConverter( $lang );
if ( !$converter->hasVariants() ) {
continue;
}
$variants = $converter->getVariants();
foreach ( $variants as $v ) {
$fallbacks = $converter->getVariantFallbacks( $v );
if ( !is_array( $fallbacks ) ) {
$fallbacks = [ $fallbacks ];
}
$this->variants[$v] = [
'base' => $langCode,
'fallbacks' => $fallbacks,
];
}
}
}
return $this->variants;
}
public function widthOption(): int {
// Even though this looks like Parsoid is supporting per-user thumbsize
// options, that is not the case, Parsoid doesn't receive user session state
$thumbsize = $this->userOptionsLookup->getDefaultOption( 'thumbsize' );
return $this->config->get( 'ThumbLimits' )[$thumbsize];
}
/** @inheritDoc */
protected function getVariableIDs(): array {
return $this->magicWordFactory->getVariableIDs();
}
/** @inheritDoc */
protected function getFunctionSynonyms(): array {
return $this->parser->getFunctionSynonyms();
}
/** @inheritDoc */
protected function getMagicWords(): array {
return $this->contLang->getMagicWords();
}
/** @inheritDoc */
public function getMagicWordMatcher( string $id ): string {
return $this->magicWordFactory->get( $id )->getRegexStartToEnd();
}
/** @inheritDoc */
public function getParameterizedAliasMatcher( array $words ): callable {
// PORT-FIXME: this should be combined with
// getMediaPrefixParameterizedAliasMatcher; see PORT-FIXME comment
// in that method.
// Filter out timedmedia-* unless that extension is loaded, so Parsoid
// doesn't have a hard dependency on an extension.
if ( !ExtensionRegistry::getInstance()->isLoaded( 'TimedMediaHandler' ) ) {
$words = preg_grep( '/^timedmedia_/', $words, PREG_GREP_INVERT );
}
$words = $this->magicWordFactory->newArray( $words );
return static function ( $text ) use ( $words ) {
$ret = $words->matchVariableStartToEnd( $text );
if ( $ret[0] === false || $ret[1] === false ) {
return null;
} else {
return [ 'k' => $ret[0], 'v' => $ret[1] ];
}
};
}
private function populateExtensionTags(): void {
$this->extensionTags = array_fill_keys( $this->parser->getTags(), true );
}
/** @inheritDoc */
protected function getNonNativeExtensionTags(): array {
if ( $this->extensionTags === null ) {
$this->populateExtensionTags();
}
return $this->extensionTags;
}
/** @inheritDoc */
public function getMaxTemplateDepth(): int {
return (int)$this->config->get( 'MaxTemplateDepth' );
}
/**
* Overrides the max template depth in the MediaWiki configuration.
* @param int $depth
*/
public function setMaxTemplateDepth( int $depth ): void {
// Parsoid's command-line tools let you set the max template depth
// as a CLI argument. Since we currently invoke the legacy
// preprocessor in some situations, we can't just override
// ::getMaxTemplateDepth() above, we need to reset the Config
// service.
if ( $this->config instanceof MutableConfig ) {
$this->config->set( 'MaxTemplateDepth', $depth );
} else {
// Fall back on global variable (hopefully we're using
// a GlobalVarConfig and this will work)
$GLOBALS['wgMaxTemplateDepth'] = $depth;
}
}
/** @inheritDoc */
protected function getSpecialNSAliases(): array {
$nsAliases = [
'Special',
$this->quoteTitleRe( $this->contLang->getNsText( NS_SPECIAL ) )
];
foreach (
$this->contLang->getNamespaceAliases() + $this->config->get( 'NamespaceAliases' )
as $name => $ns
) {
if ( $ns === NS_SPECIAL ) {
$nsAliases[] = $this->quoteTitleRe( $name );
}
}
return $nsAliases;
}
/** @inheritDoc */
protected function getSpecialPageAliases( string $specialPage ): array {
return array_merge( [ $specialPage ],
$this->contLang->getSpecialPageAliases()[$specialPage] ?? []
);
}
/** @inheritDoc */
protected function getProtocols(): array {
return $this->config->get( 'UrlProtocols' );
}
}

View file

@ -0,0 +1,66 @@
<?php
/**
* Copyright (C) 2011-2020 Wikimedia Foundation and others.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
declare( strict_types = 1 );
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
/**
* @deprecated since 1.39. This is a marker class indicating that certain
* code has been moved from Parsoid to core; it will be removed once the
* transition is complete. Use MediaWikiServices instead.
*/
class ParsoidServices {
/** @var MediaWikiServices */
private $services;
/**
* @param MediaWikiServices $services
*/
public function __construct( MediaWikiServices $services ) {
$this->services = $services;
}
/**
* @return DataAccess
*/
public function getParsoidDataAccess(): DataAccess {
return $this->services->getParsoidDataAccess();
}
/**
* @return PageConfigFactory
*/
public function getParsoidPageConfigFactory(): PageConfigFactory {
return $this->services->getParsoidPageConfigFactory();
}
/**
* @return SiteConfig
*/
public function getParsoidSiteConfig(): SiteConfig {
return $this->services->getParsoidSiteConfig();
}
}

View file

@ -0,0 +1,798 @@
<?php
namespace MediaWiki\Tests\Unit\Parser\Parsoid\Config;
use HashConfig;
use ILanguageConverter;
use Language;
use MagicWord;
use MagicWordArray;
use MagicWordFactory;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\Interwiki\InterwikiLookup;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Parser\Parsoid\Config\SiteConfig;
use MediaWiki\SpecialPage\SpecialPageFactory;
use MediaWiki\User\UserOptionsLookup;
use MediaWikiUnitTestCase;
use MessageCache;
use MWException;
use NamespaceInfo;
use NullStatsdDataFactory;
use Parser;
use UnexpectedValueException;
use Wikimedia\TestingAccessWrapper;
use ZhConverter;
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig
* @package MediaWiki\Tests\Unit\Parser\Parsoid\Config
*/
class SiteConfigTest extends MediaWikiUnitTestCase {
private const DEFAULT_CONFIG = [
'GalleryOptions' => [],
'AllowExternalImages' => false,
'AllowExternalImagesFrom' => '',
'Server' => 'localhost',
'ArticlePath' => false,
'InterwikiMagic' => true,
'ExtraInterlanguageLinkPrefixes' => [],
'LocalInterwikis' => [],
'LanguageCode' => 'qqq',
'DisableLangConversion' => false,
'NamespaceAliases' => [],
'UrlProtocols' => [ 'http://' ],
'Script' => false,
'ScriptPath' => '/wiki',
'LoadScript' => false,
'LocalTZoffset' => null,
'ThumbLimits' => [ 4242 ],
'MaxTemplateDepth' => 42,
'LegalTitleChars' => 'abc'
];
private function createMockOrOverride( string $class, array $overrides ) {
if ( array_key_exists( $class, $overrides ) ) {
return $overrides[$class];
}
return $this->createNoOpMock( $class );
}
/**
* TODO it might save code to have this helper always return a
* TestingAccessWrapper?
*
* @param array $configOverrides Configuration options overriding default ServiceOptions config defined in
* DEFAULT_CONFIG above.
* @param array $parsoidSettings
* @param array $serviceOverrides
*
* @return SiteConfig
*/
private function createSiteConfig(
array $configOverrides = [],
array $parsoidSettings = [],
array $serviceOverrides = []
): SiteConfig {
$hcMock = $this->createMock( HookContainer::class );
$hcMock
->method( 'run' )
->willReturn( true );
return new SiteConfig(
new ServiceOptions(
SiteConfig::CONSTRUCTOR_OPTIONS,
array_replace( self::DEFAULT_CONFIG, $configOverrides )
),
$parsoidSettings,
$this->createSimpleObjectFactory(),
$this->createMockOrOverride( Language::class, $serviceOverrides ),
new NullStatsdDataFactory(),
$this->createMockOrOverride( MagicWordFactory::class, $serviceOverrides ),
$this->createMockOrOverride( NamespaceInfo::class, $serviceOverrides ),
$this->createMockOrOverride( SpecialPageFactory::class, $serviceOverrides ),
$this->createMockOrOverride( InterwikiLookup::class, $serviceOverrides ),
$this->createMockOrOverride( UserOptionsLookup::class, $serviceOverrides ),
$this->createMockOrOverride( LanguageFactory::class, $serviceOverrides ),
$this->createMockOrOverride( LanguageConverterFactory::class, $serviceOverrides ),
$this->createMockOrOverride( LanguageNameUtils::class, $serviceOverrides ),
$this->createMockOrOverride( Parser::class, $serviceOverrides ),
new HashConfig( $configOverrides ),
$hcMock
);
}
public function provideConfigParameterPassed(): iterable {
yield 'galleryOptions' => [
[ 'GalleryOptions' => [ 'blabla' ] ],
'galleryOptions',
[ 'blabla' ]
];
yield 'allowedExternalImagePrefixes, false' => [
[ 'AllowExternalImages' => true ],
'allowedExternalImagePrefixes',
[ '' ]
];
yield 'allowedExternalImagePrefixes, true' => [
[
'AllowExternalImages' => false,
'AllowExternalImagesFrom' => [ 'blabla' ]
],
'allowedExternalImagePrefixes',
[ 'blabla' ]
];
yield 'interwikiMagic' => [
[ 'InterwikiMagic' => true ],
'interwikiMagic',
true
];
yield 'lang' => [
[ 'LanguageCode' => 'qqx' ],
'lang',
'qqx'
];
// This is a setting from Cite extension
yield 'responsiveReferences, absent' => [
[],
'responsiveReferences',
[ 'enabled' => false, 'threshold' => 10 ]
];
// This is a setting from Cite extension
yield 'responsiveReferences, true' => [
[ 'CiteResponsiveReferences' => true ],
'responsiveReferences',
[ 'enabled' => true, 'threshold' => 10 ]
];
yield 'script' => [
[ 'Script' => 'blabla' ],
'script',
'blabla'
];
yield 'scriptpath' => [
[ 'ScriptPath' => 'blabla' ],
'scriptpath',
'blabla'
];
yield 'server' => [
[ 'Server' => 'blabla' ],
'server',
'blabla'
];
yield 'timezoneOffset' => [
[ 'LocalTZoffset' => 42 ],
'timezoneOffset',
42
];
yield 'getMaxTemplateDepth' => [
[ 'MaxTemplateDepth' => 42 ],
'getMaxTemplateDepth',
42
];
/* $wgLegalTitleChars can't be tested with this mechanism.
yield 'legalTitleChars' => [
[ 'LegalTitleChars' => 'blabla' ],
'legalTitleChars',
'blabla'
];
*/
yield 'getProtocols' => [
[ 'UrlProtocols' => [ 'blabla' ] ],
'getProtocols',
[ 'blabla' ]
];
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::galleryOptions
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::allowedExternalImagePrefixes
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::interwikiMagic
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::lang
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::responsiveReferences
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::script
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::scriptpath
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::server
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::timezoneOffset
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getMaxTemplateDepth
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::legalTitleChars
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getProtocols
* @dataProvider provideConfigParameterPassed
* @param array $settings
* @param string $method
* @param mixed $expectedValue
*/
public function testConfigParametersPassed(
array $settings,
string $method,
$expectedValue
) {
$config = $this->createSiteConfig( $settings );
$config = TestingAccessWrapper::newFromObject( $config );
$this->assertSame( $expectedValue, $config->$method() );
}
public function provideParsoidSettingPassed() {
yield 'nativeGalleryEnabled' => [
[ 'nativeGalleryEnabled' => true ],
'nativeGalleryEnabled',
true
];
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::nativeGalleryEnabled()
* @dataProvider provideParsoidSettingPassed
* @param array $settings
* @param string $method
* @param mixed $expectedValue
*/
public function testParsoidSettingPassed(
array $settings,
string $method,
$expectedValue
) {
$config = $this->createSiteConfig( [], $settings );
$config = TestingAccessWrapper::newFromObject( $config );
$this->assertSame( $expectedValue, $config->$method() );
}
public function provideServiceMethodProxied() {
yield 'canonicalNamespaceId' => [
NamespaceInfo::class, 'getCanonicalIndex', [ 'blabla_arg' ], 42, 'canonicalNamespaceId', 42
];
yield 'namespaceId' => [
Language::class, 'getNsIndex', [ 'blabla_arg' ], 42, 'namespaceId', 42
];
yield 'namespaceName, NS_MAIN' => [
Language::class, 'getFormattedNsText', [ NS_MAIN ], '', 'namespaceName', ''
];
yield 'namespaceName, NS_USER, null' => [
Language::class, 'getFormattedNsText', [ NS_USER ], '', 'namespaceName', null
];
yield 'namespaceName, NS_USER' => [
Language::class, 'getFormattedNsText', [ NS_USER ], 'User', 'namespaceName', 'User'
];
yield 'namespaceHasSubpages' => [
NamespaceInfo::class, 'hasSubpages', [ 42 ], true, 'namespaceHasSubpages', true
];
yield 'namespaceCase, first letter' => [
NamespaceInfo::class, 'isCapitalized', [ 42 ], true, 'namespaceCase', 'first-letter'
];
yield 'namespaceCase, case sensitive' => [
NamespaceInfo::class, 'isCapitalized', [ 42 ], false, 'namespaceCase', 'case-sensitive'
];
yield 'namespaceIsTalk' => [
NamespaceInfo::class, 'isTalk', [ 42 ], true, 'namespaceIsTalk', true
];
yield 'ucfirst' => [
Language::class, 'ucfirst', [ 'bla' ], 'Bla', 'ucfirst', 'Bla'
];
yield 'linkTrail' => [
Language::class, 'linkTrail', [], 'blabla', 'linkTrail', 'blabla'
];
yield 'rtl' => [
Language::class, 'isRTL', [], true, 'rtl', true
];
yield 'getVariableIDs' => [
MagicWordFactory::class, 'getVariableIDs', [], [ 'blabla' ], 'getVariableIDs', [ 'blabla' ]
];
yield 'getFunctionSynonyms' => [
Parser::class, 'getFunctionSynonyms', [], [ 0 => [ 'blabla' ], 1 => [ 'blabla' ] ],
'getFunctionSynonyms', [ 0 => [ 'blabla' ], 1 => [ 'blabla' ] ]
];
yield 'getMagicWords' => [
Language::class, 'getMagicWords', [], [ 'blabla' ], 'getMagicWords', [ 'blabla' ]
];
yield 'getNonNativeExtensionTags' => [
Parser::class, 'getTags', [], [ 'blabla' ], 'getNonNativeExtensionTags', [ 'blabla' => true ]
];
}
/**
* @dataProvider provideServiceMethodProxied
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::canonicalNamespaceId
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::namespaceId
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::namespaceName
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::namespaceHasSubpages
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::namespaceCase
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::namespaceIsTalk
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::ucfirst
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::linkTrail
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::rtl
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::widthOption
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getVariableIDs
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getFunctionSynonyms
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getMagicWords
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getNonNativeExtensionTags
* @param string $serviceClass
* @param string $serviceMethod
* @param array $arguments
* @param mixed $returnValue
* @param string $method
* @param mixed $expectedValue
*/
public function testServiceMethodProxied(
string $serviceClass,
string $serviceMethod,
array $arguments,
$returnValue,
string $method,
$expectedValue
) {
$serviceMock = $this->createMock( $serviceClass );
$serviceMock
->expects( $this->once() )
->method( $serviceMethod )
->with( ...$arguments )
->willReturn( $returnValue );
$config = $this->createSiteConfig( [], [], [
$serviceClass => $serviceMock
] );
$config = TestingAccessWrapper::newFromObject( $config );
$this->assertSame( $expectedValue, $config->$method( ...$arguments ) );
}
public function provideArticlePath_exception() {
yield 'No $1' => [ '/test/test' ];
yield 'Wrong path' => [ 'test\\test/$1' ];
}
/**
* @dataProvider provideArticlePath_exception
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::determineArticlePath
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::baseURI
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::relativeLinkPrefix
* @param string $articlePath
*/
public function testArticlePath_exception( string $articlePath ) {
$this->expectException( UnexpectedValueException::class );
$config = $this->createSiteConfig( [
'ArticlePath' => $articlePath
] );
$config->baseURI();
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::determineArticlePath
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::baseURI
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::relativeLinkPrefix
*/
public function testArticlePath_nopath() {
$config = $this->createSiteConfig( [
'ArticlePath' => '$1',
'Server' => 'https://localhost'
] );
$this->assertSame( 'https://localhost/', $config->baseURI() );
$this->assertSame( './', $config->relativeLinkPrefix() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::determineArticlePath
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::baseURI
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::relativeLinkPrefix
*/
public function testArticlePath() {
$config = $this->createSiteConfig( [
'ArticlePath' => '/wiki/$1',
'Server' => 'https://localhost'
] );
$this->assertSame( './', $config->relativeLinkPrefix() );
$this->assertSame( 'https://localhost/wiki/', $config->baseURI() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::mwaToRegex
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::redirectRegexp
*/
public function testRedirectRegexp() {
$langMock = $this->createMock( Language::class );
$magicWordFactoryMock = $this->createMock( MagicWordFactory::class );
$magicWordFactoryMock
->method( 'newArray' )
->willReturn(
new MagicWordArray( [ 'blabla_case_sen', 'blabla_case_insen' ], $magicWordFactoryMock )
);
$magicWordFactoryMock
->method( 'get' )
->willReturnOnConsecutiveCalls(
new MagicWord( 'blabla_id', [ 'blabla_synonym1' ], true, $langMock ),
new MagicWord( 'blabla_id', [ 'blabla_synonym2' ], false, $langMock )
);
$config = $this->createSiteConfig( [], [], [
MagicWordFactory::class => $magicWordFactoryMock
] );
$this->assertSame( '@(?i:blabla_synonym2)|blabla_synonym1@Su', $config->redirectRegexp() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::categoryRegexp
*/
public function testCategoryRegexp() {
$nsInfoMock = $this->createMock( NamespaceInfo::class );
$nsInfoMock
->method( 'getCanonicalName' )
->willReturn( 'Bla bla' );
$langMock = $this->createMock( Language::class );
$langMock
->method( 'getNamespaceAliases' )
->willReturn( [ 'Bla_alias' => NS_CATEGORY, 'Ignored' => NS_MAIN ] );
$config = $this->createSiteConfig( [], [], [
NamespaceInfo::class => $nsInfoMock,
Language::class => $langMock
] );
$this->assertSame( '@(?i:Bla[ _]bla|Bla[ _]alias)@', $config->categoryRegexp() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::bswRegexp
*/
public function testBswRegexp() {
$langMock = $this->createMock( Language::class );
$magicWordFactoryMock = $this->createMock( MagicWordFactory::class );
$magicWordFactoryMock
->method( 'getDoubleUnderscoreArray' )
->willReturn(
new MagicWordArray( [ 'blabla' ], $magicWordFactoryMock )
);
$magicWordFactoryMock
->method( 'get' )
->willReturn(
new MagicWord( 'blabla_id', [ 'blabla_synonym' ], true, $langMock )
);
$config = $this->createSiteConfig( [], [], [
MagicWordFactory::class => $magicWordFactoryMock
] );
$this->assertSame( '@(?i:(?!))|blabla_synonym@Su', $config->bswRegexp() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::specialPageLocalName
*/
public function testSpecialPageLocalName() {
$specialPageFactoryMock = $this->createMock( SpecialPageFactory::class );
$specialPageFactoryMock
->method( 'resolveAlias' )
->with( 'blabla_alias' )
->willReturn( [ 'resolved_page', 'resolved_subpage' ] );
$specialPageFactoryMock
->method( 'getLocalNameFor' )
->with( 'resolved_page', 'resolved_subpage' )
->willReturn( 'blabla' );
$config = $this->createSiteConfig( [], [], [
SpecialPageFactory::class => $specialPageFactoryMock
] );
$this->assertSame( 'blabla', $config->specialPageLocalName( 'blabla_alias' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::interwikiMap
*/
public function testInterwikiMap() {
$interwikiMock = $this->createMock( InterwikiLookup::class );
$interwikiMock
->method( 'getAllPrefixes' )
->willReturn( [
[ 'iw_prefix' => 'ru', 'iw_url' => '//test/', 'iw_local' => 1 ]
] );
$langNameUtilsMock = $this->createMock( LanguageNameUtils::class );
$langNameUtilsMock
->method( 'getLanguageNames' )
->willReturn( [ 'ru' => 'Russian' ] );
$messageCacheMock = $this->createMock( MessageCache::class );
$messageCacheMock
->method( 'get' )
->willReturn( false );
$config = $this->createSiteConfig( [
'ExtraInterlanguageLinkPrefixes' => [ 'ru' ],
'LocalInterwikis' => [ 'ru' ],
], [], [
InterwikiLookup::class => $interwikiMock,
LanguageNameUtils::class => $langNameUtilsMock,
MessageCache::class => $messageCacheMock,
] );
$this->assertSame( [
'ru' => [
'prefix' => 'ru',
'url' => 'http://test/$1',
'protorel' => true,
'local' => true,
'language' => true,
'localinterwiki' => true,
'extralanglink' => true,
]
], $config->interwikiMap() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::iwp
*/
public function testIwp() {
$config = $this->createSiteConfig();
$this->assertNotNull( $config->iwp() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::linkPrefixRegex
*/
public function testLinkPrefixRegex_disabled() {
$langMock = $this->createMock( Language::class );
$langMock
->method( 'linkPrefixExtension' )
->willReturn( false );
$config = $this->createSiteConfig( [], [], [
Language::class => $langMock
] );
$this->assertNull( $config->linkPrefixRegex() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::linkPrefixRegex
*/
public function testLinkPrefixRegex() {
$langMock = $this->createMock( Language::class );
$langMock
->method( 'linkPrefixExtension' )
->willReturn( true );
$langMock
->method( 'linkPrefixCharset' )
->willReturn( 'blabla' );
$config = $this->createSiteConfig( [], [], [
Language::class => $langMock
] );
$this->assertStringContainsString( 'blabla', $config->linkPrefixRegex() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::mainpage
*/
public function testMainpage() {
$this->markTestSkipped( 'Requires MessageCache; not a unit test' );
$this->assertSame( 'Main Page', $this->createSiteConfig()->mainpage() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::langConverterEnabled
*/
public function testLangConverterEnabled_disabled() {
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( true );
$config = $this->createSiteConfig( [], [], [
LanguageConverterFactory::class => $langConverterFactoryMock,
] );
$this->assertFalse( $config->langConverterEnabled( 'zh' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::langConverterEnabled
*/
public function testLangConverterEnabled_invalidCode() {
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( false );
$config = $this->createSiteConfig( [], [], [
LanguageConverterFactory::class => $langConverterFactoryMock,
] );
$this->assertFalse( $config->langConverterEnabled( 'zhasdcasdc' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::langConverterEnabled
*/
public function testLangConverterEnabled_valid() {
$langMock = $this->createMock( Language::class );
$langFactoryMock = $this->createMock( LanguageFactory::class );
$langFactoryMock
->method( 'getLanguage' )
->with( 'zh' )
->willReturn( $langMock );
$langConverterMock = $this->getMockBuilder( ZhConverter::class )
->disableOriginalConstructor()
->getMock();
$langConverterMock
->method( 'hasVariants' )
->willReturn( true );
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'getLanguageConverter' )
->with( $langMock )
->willReturn( $langConverterMock );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( false );
$config = $this->createSiteConfig( [], [], [
LanguageFactory::class => $langFactoryMock,
LanguageConverterFactory::class => $langConverterFactoryMock
] );
$this->assertTrue( $config->langConverterEnabled( 'zh' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::langConverterEnabled
*/
public function testLangConverterEnabled_exception() {
$langFactoryMock = $this->createMock( LanguageFactory::class );
$langFactoryMock
->method( 'getLanguage' )
->with( 'zh' )
->willThrowException( new MWException( 'TEST' ) );
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( false );
$config = $this->createSiteConfig( [], [], [
LanguageFactory::class => $langFactoryMock,
LanguageConverterFactory::class => $langConverterFactoryMock,
] );
$this->assertFalse( $config->langConverterEnabled( 'zh' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::variants
*/
public function testVariants_disabled() {
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( true );
$config = $this->createSiteConfig( [], [], [
LanguageConverterFactory::class => $langConverterFactoryMock,
] );
$this->assertSame( [], $config->variants() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::variants
*/
public function testVariants() {
$langFactoryMock = $this->createMock( LanguageFactory::class );
$langFactoryMock
->method( 'getLanguage' )
->willReturnCallback( function ( $code ) {
$langMock = $this->createMock( Language::class );
$langMock->method( 'getCode' )
->willReturn( $code );
return $langMock;
} );
$converterMock = $this->createMock( ILanguageConverter::class );
$converterMock
->method( 'hasVariants' )
->willReturn( true );
$converterMock
->method( 'getVariants' )
->willReturn( [ 'zh-hans' ] );
$converterMock
->method( 'getVariantFallbacks' )
->willReturn( 'zh-fallback' );
$langConverterFactoryMock = $this->createMock( LanguageConverterFactory::class );
$langConverterFactoryMock
->method( 'isConversionDisabled' )
->willReturn( false );
$langConverterFactoryMock
->method( 'getLanguageConverter' )
->willReturnCallback( function ( $l ) use ( $converterMock ) {
if ( $l->getCode() === 'zh' ) {
return $converterMock;
}
return $this->createMock( ILanguageConverter::class );
} );
$config = $this->createSiteConfig( [], [], [
LanguageFactory::class => $langFactoryMock,
LanguageConverterFactory::class => $langConverterFactoryMock
] );
$this->assertSame(
[ 'zh-hans' => [ 'base' => 'zh', 'fallbacks' => [ 'zh-fallback' ] ] ],
$config->variants()
);
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::widthOption
*/
public function testWithOption() {
$optionsLookupMock = $this->createMock( UserOptionsLookup::class );
$optionsLookupMock
->method( 'getDefaultOption' )
->with( 'thumbsize' )
->willReturn( 'small' );
$config = $this->createSiteConfig( [
'ThumbLimits' => [ 'small' => 42 ]
], [], [
UserOptionsLookup::class => $optionsLookupMock
] );
$this->assertSame( 42, $config->widthOption() );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getMagicWordMatcher
*/
public function testGetMagicWordMatcher() {
$magicWordMock = $this->createMock( MagicWord::class );
$magicWordMock
->expects( $this->once() )
->method( 'getRegexStartToEnd' )
->willReturn( 'blabla' );
$magicWordFactoryMock = $this->createMock( MagicWordFactory::class );
$magicWordFactoryMock
->expects( $this->once() )
->method( 'get' )
->with( 'blabla_id' )
->willReturn( $magicWordMock );
$config = $this->createSiteConfig( [], [], [
MagicWordFactory::class => $magicWordFactoryMock
] );
$this->assertSame( 'blabla', $config->getMagicWordMatcher( 'blabla_id' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getParameterizedAliasMatcher
*/
public function testGetParameterizedAliasMatcher() {
$langMock = $this->createMock( Language::class );
$magicWordFactoryMock = $this->createMock( MagicWordFactory::class );
$magicWordFactoryMock
->method( 'newArray' )
->willReturn( new MagicWordArray( [ 'test' ], $magicWordFactoryMock ) );
$magicWordFactoryMock
->method( 'get' )
->willReturn( new MagicWord( 'blabla_id', [ 'blabla_alias:$1' ], true, $langMock ) );
$config = $this->createSiteConfig( [], [], [
MagicWordFactory::class => $magicWordFactoryMock
] );
$matcher = $config->getParameterizedAliasMatcher( [ 'blabla' ] );
$this->assertSame( [ 'k' => 'test', 'v' => 'blabla' ], $matcher( 'blabla_alias:blabla' ) );
$this->assertNull( $matcher( 'Blabla_alias:blabla' ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getSpecialNSAliases
*/
public function testGetSpecialNSAliases() {
$mockLang = $this->createMock( Language::class );
$mockLang
->method( 'getNsText' )
->willReturn( 'Special_Special' );
$mockLang
->method( 'getNamespaceAliases' )
->willReturn( [
'From Language' => NS_SPECIAL,
'Whatever' => NS_MAIN
] );
$config = $this->createSiteConfig( [
'NamespaceAliases' => [
'From Config' => NS_SPECIAL,
'Whatever' => NS_MAIN
]
], [], [
Language::class => $mockLang
] );
$config = TestingAccessWrapper::newFromObject( $config );
$this->assertSame(
[ 'Special', 'Special[ _]Special', 'From[ _]Language', 'From[ _]Config' ],
$config->getSpecialNSAliases()
);
}
/**
* @covers \MediaWiki\Parser\Parsoid\Config\SiteConfig::getSpecialPageAliases
*/
public function testGetSpecialPageAliases() {
$mockLang = $this->createMock( Language::class );
$mockLang
->method( 'getSpecialPageAliases' )
->willReturn( [
'Page1' => [ 'Alias1', 'Alias2' ]
] );
$config = $this->createSiteConfig( [], [], [
Language::class => $mockLang
] );
$config = TestingAccessWrapper::newFromObject( $config );
$this->assertSame( [ 'Page1', 'Alias1', 'Alias2' ], $config->getSpecialPageAliases( 'Page1' ) );
$this->assertSame( [ 'Page2' ], $config->getSpecialPageAliases( 'Page2' ) );
}
}