[\s\S]*?-->)'; public const CONSTRUCTOR_OPTIONS = [ MainConfigNames::GalleryOptions, MainConfigNames::AllowExternalImages, MainConfigNames::AllowExternalImagesFrom, MainConfigNames::Server, MainConfigNames::ArticlePath, MainConfigNames::InterwikiMagic, MainConfigNames::ExtraInterlanguageLinkPrefixes, MainConfigNames::InterlanguageLinkCodeMap, MainConfigNames::LocalInterwikis, MainConfigNames::LanguageCode, MainConfigNames::NamespaceAliases, MainConfigNames::UrlProtocols, MainConfigNames::Script, MainConfigNames::ScriptPath, MainConfigNames::LoadScript, MainConfigNames::LocalTZoffset, MainConfigNames::ThumbLimits, MainConfigNames::MaxTemplateDepth, MainConfigNames::NoFollowLinks, MainConfigNames::NoFollowNsExceptions, MainConfigNames::NoFollowDomainExceptions, MainConfigNames::ExternalLinkTarget, ]; /** @var ServiceOptions */ private $config; /** @var Config */ private $mwConfig; /** @var array Parsoid-specific options array from $config */ private $parsoidSettings; /** @var Language */ private $contLang; /** @var StatsdDataFactoryInterface */ private $stats; /** @var MagicWordFactory */ private $magicWordFactory; /** @var NamespaceInfo */ private $namespaceInfo; /** @var SpecialPageFactory */ private $specialPageFactory; /** @var InterwikiLookup */ private $interwikiLookup; /** @var Parser */ private $parser; /** @var UserOptionsLookup */ private $userOptionsLookup; /** @var ObjectFactory */ private $objectFactory; /** @var LanguageFactory */ private $languageFactory; /** @var LanguageConverterFactory */ private $languageConverterFactory; /** @var LanguageNameUtils */ private $languageNameUtils; /** @var UrlUtils */ private $urlUtils; /** @var string|null */ private $baseUri; /** @var string|null */ private $relativeLinkPrefix; /** @var array|null */ private $interwikiMap; /** @var array|null */ private $variants; /** @var array */ private $extensionTags; /** * @param ServiceOptions $config MediaWiki main configuration object * @param array $parsoidSettings Parsoid-specific options array from main configuration. * @param ObjectFactory $objectFactory * @param Language $contentLanguage Content language. * @param StatsdDataFactoryInterface $stats * @param MagicWordFactory $magicWordFactory * @param NamespaceInfo $namespaceInfo * @param SpecialPageFactory $specialPageFactory * @param InterwikiLookup $interwikiLookup * @param UserOptionsLookup $userOptionsLookup * @param LanguageFactory $languageFactory * @param LanguageConverterFactory $languageConverterFactory * @param LanguageNameUtils $languageNameUtils * @param UrlUtils $urlUtils * @param Parser $parser * @param Config $mwConfig */ public function __construct( ServiceOptions $config, array $parsoidSettings, ObjectFactory $objectFactory, Language $contentLanguage, StatsdDataFactoryInterface $stats, MagicWordFactory $magicWordFactory, NamespaceInfo $namespaceInfo, SpecialPageFactory $specialPageFactory, InterwikiLookup $interwikiLookup, UserOptionsLookup $userOptionsLookup, LanguageFactory $languageFactory, LanguageConverterFactory $languageConverterFactory, LanguageNameUtils $languageNameUtils, UrlUtils $urlUtils, // $parser is temporary and may be removed once a better solution is found. Parser $parser, // T268776 Config $mwConfig ) { parent::__construct(); $config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->config = $config; $this->mwConfig = $mwConfig; $this->parsoidSettings = $parsoidSettings; $this->objectFactory = $objectFactory; $this->contLang = $contentLanguage; $this->stats = $stats; $this->magicWordFactory = $magicWordFactory; $this->namespaceInfo = $namespaceInfo; $this->specialPageFactory = $specialPageFactory; $this->interwikiLookup = $interwikiLookup; $this->parser = $parser; $this->userOptionsLookup = $userOptionsLookup; $this->languageFactory = $languageFactory; $this->languageConverterFactory = $languageConverterFactory; $this->languageNameUtils = $languageNameUtils; $this->urlUtils = $urlUtils; // Override parent default // Override parent default if ( isset( $this->parsoidSettings['linting'] ) ) { // @todo: Add this setting to MW's MainConfigSchema $this->linterEnabled = $this->parsoidSettings['linting']; } if ( isset( $this->parsoidSettings['wt2htmlLimits'] ) ) { $this->wt2htmlLimits = $this->parsoidSettings['wt2htmlLimits'] + $this->wt2htmlLimits; } if ( isset( $this->parsoidSettings['html2wtLimits'] ) ) { $this->html2wtLimits = $this->parsoidSettings['html2wtLimits'] + $this->html2wtLimits; } // Register extension modules // TODO: inject this (T257586) $parsoidModules = ExtensionRegistry::getInstance()->getAttribute( 'ParsoidModules' ); foreach ( $parsoidModules as $configOrSpec ) { $this->registerExtensionModule( $configOrSpec ); } } /** @inheritDoc */ public function getObjectFactory(): ObjectFactory { return $this->objectFactory; } /** @inheritDoc */ public function getLogger(): LoggerInterface { // TODO: inject if ( $this->logger === null ) { $this->logger = LoggerFactory::getInstance( 'Parsoid' ); } return $this->logger; } public function metrics(): ?StatsdDataFactoryInterface { // TODO: inject static $prefixedMetrics = null; if ( $prefixedMetrics === null ) { $prefixedMetrics = new PrefixingStatsdDataFactoryProxy( // Our stats will also get prefixed with 'MediaWiki.' $this->stats, $this->parsoidSettings['metricsPrefix'] ?? 'Parsoid.' ); } return $prefixedMetrics; } public function galleryOptions(): array { return $this->config->get( MainConfigNames::GalleryOptions ); } public function allowedExternalImagePrefixes(): array { if ( $this->config->get( MainConfigNames::AllowExternalImages ) ) { return [ '' ]; } else { $allowFrom = $this->config->get( MainConfigNames::AllowExternalImagesFrom ); return $allowFrom ? (array)$allowFrom : []; } } /** * Determine the article base URI and relative prefix * * Populates `$this->baseUri` and `$this->relativeLinkPrefix` based on * `$wgServer` and `$wgArticlePath`, by splitting it at the last '/' in the * path portion. */ private function determineArticlePath(): void { $url = $this->config->get( MainConfigNames::Server ) . $this->config->get( MainConfigNames::ArticlePath ); if ( substr( $url, -2 ) !== '$1' ) { throw new UnexpectedValueException( "Article path '$url' does not have '$1' at the end" ); } $url = substr( $url, 0, -2 ); $bits = wfParseUrl( $url ); if ( !$bits ) { throw new UnexpectedValueException( "Failed to parse article path '$url'" ); } if ( empty( $bits['path'] ) ) { $path = '/'; } else { $path = wfRemoveDotSegments( $bits['path'] ); } $relParts = [ 'query' => true, 'fragment' => true ]; $base = array_diff_key( $bits, $relParts ); $rel = array_intersect_key( $bits, $relParts ); $i = strrpos( $path, '/' ); $base['path'] = substr( $path, 0, $i + 1 ); $rel['path'] = '.' . substr( $path, $i ); $this->baseUri = wfAssembleUrl( $base ); $this->relativeLinkPrefix = wfAssembleUrl( $rel ); } public function baseURI(): string { if ( $this->baseUri === null ) { $this->determineArticlePath(); } return $this->baseUri; } public function relativeLinkPrefix(): string { if ( $this->relativeLinkPrefix === null ) { $this->determineArticlePath(); } return $this->relativeLinkPrefix; } /** * This is very similar to MagicWordArray::getBaseRegex() except we * don't emit the named grouping constructs, which can cause havoc * when embedded in other regexps with grouping constructs. * * @param MagicWordArray $magicWordArray * @param string $delimiter * @return string */ private static function mwaToRegex( MagicWordArray $magicWordArray, string $delimiter = '/' ): string { return implode( '|', $magicWordArray->getBaseRegex( false, $delimiter ) ); } public function redirectRegexp(): string { $redirect = self::mwaToRegex( $this->magicWordFactory->newArray( [ 'redirect' ] ), '@' ); return "@$redirect@Su"; } public function categoryRegexp(): string { $canon = $this->namespaceInfo->getCanonicalName( NS_CATEGORY ); $result = [ $canon ]; foreach ( $this->contLang->getNamespaceAliases() as $alias => $ns ) { if ( $ns === NS_CATEGORY && $alias !== $canon ) { $result[] = $alias; } } $category = implode( '|', array_map( function ( $v ) { return $this->quoteTitleRe( $v, '@' ); }, $result ) ); return "@(?i:$category)@"; } public function bswRegexp(): string { $bsw = self::mwaToRegex( $this->magicWordFactory->getDoubleUnderscoreArray(), '@' ); // Aliases for double underscore mws include the underscores // So, strip them since the base regexp will have included them // and they aren't expected at the use sites of bswRegexp $bsw = str_replace( '__', '', $bsw ); return "@$bsw@Su"; } /** @inheritDoc */ public function canonicalNamespaceId( string $name ): ?int { $ret = $this->namespaceInfo->getCanonicalIndex( $name ); return $ret === false ? null : $ret; } /** @inheritDoc */ public function namespaceId( string $name ): ?int { $ret = $this->contLang->getNsIndex( $name ); return $ret === false ? null : $ret; } /** @inheritDoc */ public function namespaceName( int $ns ): ?string { $ret = $this->contLang->getFormattedNsText( $ns ); return $ret === '' && $ns !== NS_MAIN ? null : $ret; } /** @inheritDoc */ public function namespaceHasSubpages( int $ns ): bool { return $this->namespaceInfo->hasSubpages( $ns ); } /** @inheritDoc */ public function namespaceCase( int $ns ): string { return $this->namespaceInfo->isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive'; } /** @inheritDoc */ public function namespaceIsTalk( int $ns ): bool { return $this->namespaceInfo->isTalk( $ns ); } /** @inheritDoc */ public function ucfirst( string $str ): string { return $this->contLang->ucfirst( $str ); } /** @inheritDoc */ public function specialPageLocalName( string $alias ): ?string { $aliases = $this->specialPageFactory->resolveAlias( $alias ); return $aliases[0] !== null ? $this->specialPageFactory->getLocalNameFor( ...$aliases ) : $alias; } public function interwikiMagic(): bool { return $this->config->get( MainConfigNames::InterwikiMagic ); } public function interwikiMap(): array { // Unfortunate that this mostly duplicates \ApiQuerySiteinfo::appendInterwikiMap() if ( $this->interwikiMap !== null ) { return $this->interwikiMap; } $this->interwikiMap = []; $getPrefixes = $this->interwikiLookup->getAllPrefixes(); $langNames = $this->languageNameUtils->getLanguageNames(); $extraLangPrefixes = $this->config->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ); $extraLangCodeMap = $this->config->get( MainConfigNames::InterlanguageLinkCodeMap ); $localInterwikis = $this->config->get( MainConfigNames::LocalInterwikis ); foreach ( $getPrefixes as $row ) { $prefix = $row['iw_prefix']; $val = []; $val['prefix'] = $prefix; // ApiQuerySiteInfo::appendInterwikiMap uses PROTO_CURRENT here, // but that's the 'current' protocol *of the API request*; use // PROTO_CANONICAL instead. $val['url'] = $this->urlUtils->expand( $row['iw_url'], PROTO_CANONICAL ) ?? false; // Fix up broken interwiki hrefs that are missing a $1 placeholder // Just append the placeholder at the end. // This makes sure that the interwikiMatcher adds one match // group per URI, and that interwiki links work as expected. if ( strpos( $val['url'], '$1' ) === false ) { $val['url'] .= '$1'; } if ( substr( $row['iw_url'], 0, 2 ) == '//' ) { $val['protorel'] = true; } if ( isset( $row['iw_local'] ) && $row['iw_local'] == '1' ) { $val['local'] = true; } if ( isset( $langNames[$prefix] ) ) { $val['language'] = true; $standard = LanguageCode::replaceDeprecatedCodes( $prefix ); if ( $standard !== $prefix ) { # Note that even if this code is deprecated, it should # only be remapped if extralanglink (set below) is false. $val['deprecated'] = $standard; } $val['bcp47'] = LanguageCode::bcp47( $standard ); } if ( in_array( $prefix, $localInterwikis, true ) ) { $val['localinterwiki'] = true; } if ( in_array( $prefix, $extraLangPrefixes, true ) ) { $val['extralanglink'] = true; $val['code'] = $extraLangCodeMap[$prefix] ?? $prefix; $val['bcp47'] = LanguageCode::bcp47( $val['code'] ); } $this->interwikiMap[$prefix] = $val; } return $this->interwikiMap; } public function iwp(): string { return WikiMap::getCurrentWikiId(); } public function legalTitleChars(): string { return Title::legalChars(); } public function linkPrefixRegex(): ?string { if ( !$this->contLang->linkPrefixExtension() ) { return null; } return '/[' . $this->contLang->linkPrefixCharset() . ']+$/Du'; } /** @inheritDoc */ protected function linkTrail(): string { return $this->contLang->linkTrail(); } public function langBcp47(): Bcp47Code { return $this->contLang; } public function mainpage(): string { // @todo Perhaps should inject TitleFactory here? return Title::newMainPage()->getPrefixedText(); } /** * Lookup config * @param string $key * @return mixed config value for $key, if present or null, if not. */ public function getMWConfigValue( string $key ) { return $this->mwConfig->has( $key ) ? $this->mwConfig->get( $key ) : null; } public function rtl(): bool { return $this->contLang->isRTL(); } /** * @param Bcp47Code $lang * @return bool */ public function langConverterEnabledBcp47( Bcp47Code $lang ): bool { if ( $this->languageConverterFactory->isConversionDisabled() ) { return false; } try { $langObject = $this->languageFactory->getLanguage( $lang ); if ( !in_array( $langObject->getCode(), LanguageConverter::$languagesWithVariants, true ) ) { return false; } $converter = $this->languageConverterFactory->getLanguageConverter( $langObject ); return $converter->hasVariants(); } catch ( MWException $ex ) { // Probably a syntactically invalid language code return false; } } public function script(): string { return $this->config->get( MainConfigNames::Script ); } public function scriptpath(): string { return $this->config->get( MainConfigNames::ScriptPath ); } public function server(): string { return $this->config->get( MainConfigNames::Server ); } /** * @inheritDoc * @param Document $document * @param ContentMetadataCollector $metadata * @param string $defaultTitle * @param Bcp47Code $lang */ public function exportMetadataToHeadBcp47( Document $document, ContentMetadataCollector $metadata, string $defaultTitle, Bcp47Code $lang ): void { '@phan-var ParserOutput $metadata'; // @var ParserOutput $metadata // Look for a displaytitle. $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?: // Use the default title, properly escaped Utils::escapeHtml( $defaultTitle ); $this->exportMetadataHelper( $document, $this->config->get( MainConfigNames::LoadScript ), $metadata->getModules(), $metadata->getModuleStyles(), $metadata->getJsConfigVars(), $displayTitle, $lang ); } public function timezoneOffset(): int { return $this->config->get( MainConfigNames::LocalTZoffset ); } /** * Language variant information * @return array Keys are MediaWiki-internal variant codes (e.g. "zh-cn"), * values are arrays with two fields: * - base: (string) Base language code (e.g. "zh") (MediaWiki-internal) * - fallbacks: (string[]) Fallback variants (MediaWiki-internal codes) * @deprecated Use ::variantsFor() (T320662) */ public function variants(): array { if ( $this->variants !== null ) { return $this->variants; } $this->variants = []; $langNames = LanguageConverter::$languagesWithVariants; if ( $this->languageConverterFactory->isConversionDisabled() ) { // Ensure result is empty if language conversion is disabled. $langNames = []; } foreach ( $langNames as $langCode ) { $lang = $this->languageFactory->getLanguage( $langCode ); $converter = $this->languageConverterFactory->getLanguageConverter( $lang ); if ( !$converter->hasVariants() ) { continue; } $variants = $converter->getVariants(); foreach ( $variants as $v ) { $fallbacks = $converter->getVariantFallbacks( $v ); if ( !is_array( $fallbacks ) ) { $fallbacks = [ $fallbacks ]; } $this->variants[$v] = [ 'base' => $langCode, 'fallbacks' => $fallbacks, ]; } } return $this->variants; } /** * Language variant information for the given language (or null if * unknown). * @param Bcp47Code $code The language for which you want variant information * @return ?array{base:Bcp47Code,fallbacks:Bcp47Code[]} an array with * two fields: * - base: (Bcp47Code) Base BCP-47 language code (e.g. "zh") * - fallbacks: (Bcp47Code[]) Fallback variants, as BCP-47 codes */ public function variantsFor( Bcp47Code $code ): ?array { $variants = $this->variants(); $lang = $this->languageFactory->getLanguage( $code ); $tuple = $variants[$lang->getCode()] ?? null; if ( $tuple === null ) { return null; } return [ 'base' => $this->languageFactory->getLanguage( $tuple['base'] ), 'fallbacks' => array_map( [ $this->languageFactory, 'getLanguage' ], $tuple['fallbacks'] ), ]; } public function widthOption(): int { // Even though this looks like Parsoid is supporting per-user thumbsize // options, that is not the case, Parsoid doesn't receive user session state $thumbsize = $this->userOptionsLookup->getDefaultOption( 'thumbsize' ); return $this->config->get( MainConfigNames::ThumbLimits )[$thumbsize]; } /** @inheritDoc */ protected function getVariableIDs(): array { return $this->magicWordFactory->getVariableIDs(); } /** @inheritDoc */ protected function getFunctionSynonyms(): array { return $this->parser->getFunctionSynonyms(); } /** @inheritDoc */ protected function getMagicWords(): array { return $this->contLang->getMagicWords(); } /** @inheritDoc */ public function getMagicWordMatcher( string $id ): string { return $this->magicWordFactory->get( $id )->getRegexStartToEnd(); } /** @inheritDoc */ public function getParameterizedAliasMatcher( array $words ): callable { // PORT-FIXME: this should be combined with // getMediaPrefixParameterizedAliasMatcher; see PORT-FIXME comment // in that method. // Filter out timedmedia-* unless that extension is loaded, so Parsoid // doesn't have a hard dependency on an extension. if ( !ExtensionRegistry::getInstance()->isLoaded( 'TimedMediaHandler' ) ) { $words = preg_grep( '/^timedmedia_/', $words, PREG_GREP_INVERT ); } $words = $this->magicWordFactory->newArray( $words ); return static function ( $text ) use ( $words ) { $ret = $words->matchVariableStartToEnd( $text ); if ( $ret[0] === false || $ret[1] === false ) { return null; } else { return [ 'k' => $ret[0], 'v' => $ret[1] ]; } }; } private function populateExtensionTags(): void { $this->extensionTags = array_fill_keys( $this->parser->getTags(), true ); } /** @inheritDoc */ protected function getNonNativeExtensionTags(): array { if ( $this->extensionTags === null ) { $this->populateExtensionTags(); } return $this->extensionTags; } /** @inheritDoc */ public function getMaxTemplateDepth(): int { return (int)$this->config->get( MainConfigNames::MaxTemplateDepth ); } /** * Overrides the max template depth in the MediaWiki configuration. * @param int $depth */ public function setMaxTemplateDepth( int $depth ): void { // Parsoid's command-line tools let you set the max template depth // as a CLI argument. Since we currently invoke the legacy // preprocessor in some situations, we can't just override // ::getMaxTemplateDepth() above, we need to reset the Config // service. if ( $this->config instanceof MutableConfig ) { $this->config->set( MainConfigNames::MaxTemplateDepth, $depth ); } else { // Fall back on global variable (hopefully we're using // a GlobalVarConfig and this will work) $GLOBALS['wgMaxTemplateDepth'] = $depth; } } /** @inheritDoc */ protected function getSpecialNSAliases(): array { $nsAliases = [ 'Special', $this->quoteTitleRe( $this->contLang->getNsText( NS_SPECIAL ) ) ]; foreach ( $this->contLang->getNamespaceAliases() + $this->config->get( MainConfigNames::NamespaceAliases ) as $name => $ns ) { if ( $ns === NS_SPECIAL ) { $nsAliases[] = $this->quoteTitleRe( $name ); } } return $nsAliases; } /** @inheritDoc */ protected function getSpecialPageAliases( string $specialPage ): array { return array_merge( [ $specialPage ], $this->contLang->getSpecialPageAliases()[$specialPage] ?? [] ); } /** @inheritDoc */ protected function getProtocols(): array { return $this->config->get( MainConfigNames::UrlProtocols ); } /** @return array */ public function getNoFollowConfig(): array { return [ 'nofollow' => $this->config->get( MainConfigNames::NoFollowLinks ), 'nsexceptions' => $this->config->get( MainConfigNames::NoFollowNsExceptions ), 'domainexceptions' => $this->config->get( MainConfigNames::NoFollowDomainExceptions ) ]; } /** @return string|false */ public function getExternalLinkTarget() { return $this->config->get( MainConfigNames::ExternalLinkTarget ); } }