wiki.techinc.nl/includes/parser/ParserOutput.php

2009 lines
58 KiB
PHP
Raw Normal View History

<?php
use HtmlFormatter\HtmlFormatter;
use MediaWiki\Json\JsonUnserializable;
use MediaWiki\Json\JsonUnserializableTrait;
use MediaWiki\Json\JsonUnserializer;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageReference;
use Wikimedia\Reflection\GhostFieldAccessTrait;
/**
* Output of the PHP parser.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Parser
*/
class ParserOutput extends CacheTime {
use GhostFieldAccessTrait;
use JsonUnserializableTrait;
/**
* Feature flags to indicate to extensions that MediaWiki core supports and
* uses getText() stateless transforms.
*
* @since 1.31
*/
public const SUPPORTS_STATELESS_TRANSFORMS = 1;
/**
* @since 1.31
*/
public const SUPPORTS_UNWRAP_TRANSFORM = 1;
/**
* @var string|null The output text
*/
private $mText = null;
/**
* @var array List of the full text of language links,
* in the order they appear.
*/
private $mLanguageLinks;
/**
* @var array<string,string> Map of category names to sort keys
*/
private $mCategories;
/**
* @var array Page status indicators, usually displayed in top-right corner.
*/
private $mIndicators = [];
/**
* @var string Title text of the chosen language variant, as HTML.
*/
private $mTitleText;
/**
* @var int[][] 2-D map of NS/DBK to ID for the links in the document.
* ID=zero for broken.
* @phan-var array<int,array<string,int>>
*/
private $mLinks = [];
preferences: Signature validation (lint errors, user links, nested subst) Three new checks are now applied to user signatures in preferences: * Disallow invalid HTML and lint errors (T140606) Since 15e0e9bb4b we can rely on Parsoid to check the signature for lint errors. (The old PHP Parser doesn't have this capability.) Most importantly, this will disallow unclosed HTML tags. Unclosed formatting tags like `<i>` (and also wikitext markup like `''`) could affect the entire page with the bad markup. New configuration variable $wgSignatureAllowedLintErrors is added to allow ignoring some errors. The default value ignores the 'obsolete-tag' error (caused by HTML tags like `<font>` and `<tt>`.) * Require a link to user page, talk page or contributions (T237700) Various tools don't work correctly when such a link is missing. For example, Echo notifications are not sent, DiscussionTools will not allow replying to these comments, English Wikipedia's SineBot treats these comments as unsigned. Such requirement has been present for a long time in many Wikimedia wikis' policies, but it was not enforced by software. * Disallow "nested" substitution in signature (T230652) Clever abuse of "subst" markup and tildes allows users to save edits containing wikitext in which substitution occurs again when the page is next saved. Disallow this in signatures, at least. New configuration variable $wgSignatureValidation is added to control what we do about the result of the validation described above. The options are: * 'warning': Only displays a warning near the field on Special:Preferences if the current signature is invalid. Signatures can still be changed regardless of validity and will be used when signing comments. * 'new': In addition to the above, if a user tries to change their signature, the new one must be valid. Existing invalid signatures are still used when signing comments. * 'disallow': In addition to the above, existing invalid signatures are no longer used when signing comments. Bug: T140606 Bug: T237700 Bug: T230652 Change-Id: I07c575c2d9d2afe7a89c4847d16ac044417297bf
2019-11-09 00:15:51 +00:00
/**
* @var array Keys are DBKs for the links to special pages in the document.
* @since 1.35
*/
private $mLinksSpecial = [];
preferences: Signature validation (lint errors, user links, nested subst) Three new checks are now applied to user signatures in preferences: * Disallow invalid HTML and lint errors (T140606) Since 15e0e9bb4b we can rely on Parsoid to check the signature for lint errors. (The old PHP Parser doesn't have this capability.) Most importantly, this will disallow unclosed HTML tags. Unclosed formatting tags like `<i>` (and also wikitext markup like `''`) could affect the entire page with the bad markup. New configuration variable $wgSignatureAllowedLintErrors is added to allow ignoring some errors. The default value ignores the 'obsolete-tag' error (caused by HTML tags like `<font>` and `<tt>`.) * Require a link to user page, talk page or contributions (T237700) Various tools don't work correctly when such a link is missing. For example, Echo notifications are not sent, DiscussionTools will not allow replying to these comments, English Wikipedia's SineBot treats these comments as unsigned. Such requirement has been present for a long time in many Wikimedia wikis' policies, but it was not enforced by software. * Disallow "nested" substitution in signature (T230652) Clever abuse of "subst" markup and tildes allows users to save edits containing wikitext in which substitution occurs again when the page is next saved. Disallow this in signatures, at least. New configuration variable $wgSignatureValidation is added to control what we do about the result of the validation described above. The options are: * 'warning': Only displays a warning near the field on Special:Preferences if the current signature is invalid. Signatures can still be changed regardless of validity and will be used when signing comments. * 'new': In addition to the above, if a user tries to change their signature, the new one must be valid. Existing invalid signatures are still used when signing comments. * 'disallow': In addition to the above, existing invalid signatures are no longer used when signing comments. Bug: T140606 Bug: T237700 Bug: T230652 Change-Id: I07c575c2d9d2afe7a89c4847d16ac044417297bf
2019-11-09 00:15:51 +00:00
/**
* @var array 2-D map of NS/DBK to ID for the template references.
* ID=zero for broken.
*/
private $mTemplates = [];
/**
* @var array 2-D map of NS/DBK to rev ID for the template references.
* ID=zero for broken.
*/
private $mTemplateIds = [];
/**
* @var array DB keys of the images used, in the array key only
*/
private $mImages = [];
/**
* @var array DB keys of the images used mapped to sha1 and MW timestamp.
*/
private $mFileSearchOptions = [];
/**
* @var array External link URLs, in the key only.
*/
private $mExternalLinks = [];
/**
* @var array 2-D map of prefix/DBK (in keys only)
* for the inline interwiki links in the document.
*/
private $mInterwikiLinks = [];
/**
* @var bool Show a new section link?
*/
private $mNewSection = false;
/**
* @var bool Hide the new section link?
*/
private $mHideNewSection = false;
/**
* @var bool No gallery on category page? (__NOGALLERY__).
*/
private $mNoGallery = false;
/**
* @var array Items to put in the <head> section
*/
private $mHeadItems = [];
/**
* @var array Modules to be loaded by ResourceLoader
*/
private $mModules = [];
/**
* @var array Modules of which only the CSSS will be loaded by ResourceLoader.
*/
private $mModuleStyles = [];
/**
* @var array JavaScript config variable for mw.config combined with this page.
*/
private $mJsConfigVars = [];
/**
* @var array Hook tags as per $wgParserOutputHooks.
*/
private $mOutputHooks = [];
/**
* @var array Warning text to be returned to the user.
* Wikitext formatted, in the key only.
*/
private $mWarnings = [];
/**
* @var array Table of contents
*/
private $mSections = [];
/**
* @var array Name/value pairs to be cached in the DB.
*/
private $mProperties = [];
/**
* @var string HTML of the TOC.
*/
private $mTOCHTML = '';
/**
* @var string Timestamp of the revision.
*/
private $mTimestamp;
/**
* @var bool Whether OOUI should be enabled.
*/
private $mEnableOOUI = false;
/**
* @var string 'index' or 'noindex'? Any other value will result in no change.
*/
private $mIndexPolicy = '';
/**
* @var array extra data used by extensions.
*/
private $mExtensionData = [];
/**
* @var array Parser limit report data.
*/
private $mLimitReportData = [];
/** @var array Parser limit report data for JSON */
private $mLimitReportJSData = [];
/**
* @var array Timestamps for getTimeSinceStart().
*/
private $mParseStartTime = [];
/**
* @var bool Whether to emit X-Frame-Options: DENY.
*/
private $mPreventClickjacking = false;
/**
* @var array Extra script-src for CSP
*/
private $mExtraScriptSrcs = [];
/**
* @var array Extra default-src for CSP [Everything but script and style]
*/
private $mExtraDefaultSrcs = [];
/**
* @var array Extra style-src for CSP
*/
private $mExtraStyleSrcs = [];
/**
* @var array Generic flags.
*/
private $mFlags = [];
/** @var string[] */
private const SPECULATIVE_FIELDS = [
'speculativePageIdUsed',
'mSpeculativeRevId',
'revisionTimestampUsed'
];
/** @var int|null Assumed rev ID for {{REVISIONID}} if no revision is set */
private $mSpeculativeRevId;
/** @var int|null Assumed page ID for {{PAGEID}} if no revision is set */
private $speculativePageIdUsed;
/** @var int|null Assumed rev timestamp for {{REVISIONTIMESTAMP}} if no revision is set */
private $revisionTimestampUsed;
/** @var string|null SHA-1 base 36 hash of any self-transclusion */
private $revisionUsedSha1Base36;
/** string CSS classes to use for the wrapping div, stored in the array keys.
* If no class is given, no wrapper is added.
*/
private $mWrapperDivClasses = [];
/** @var int Upper bound of expiry based on parse duration */
private $mMaxAdaptiveExpiry = INF;
private const EDITSECTION_REGEX =
'#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#s';
// finalizeAdaptiveCacheExpiry() uses TTL = MAX( m * PARSE_TIME + b, MIN_AR_TTL)
// Current values imply that m=3933.333333 and b=-333.333333
// See https://www.nngroup.com/articles/website-response-times/
private const PARSE_FAST_SEC = 0.100; // perceived "fast" page parse
private const PARSE_SLOW_SEC = 1.0; // perceived "slow" page parse
private const FAST_AR_TTL = 60; // adaptive TTL for "fast" pages
private const SLOW_AR_TTL = 3600; // adaptive TTL for "slow" pages
private const MIN_AR_TTL = 15; // min adaptive TTL (for sanity, pool counter, and edit stashing)
/**
* @param string|null $text HTML. Use null to indicate that this ParserOutput contains only
* meta-data, and the HTML output is undetermined, as opposed to empty. Passing null
* here causes hasText() to return false.
* @param array $languageLinks
* @param array $categoryLinks
* @param bool $unused
* @param string $titletext
*/
public function __construct( $text = '', $languageLinks = [], $categoryLinks = [],
$unused = false, $titletext = ''
) {
$this->mText = $text;
$this->mLanguageLinks = $languageLinks;
$this->mCategories = $categoryLinks;
$this->mTitleText = $titletext;
}
/**
* Returns true if text was passed to the constructor, or set using setText(). Returns false
* if null was passed to the $text parameter of the constructor to indicate that this
* ParserOutput only contains meta-data, and the HTML output is undetermined.
*
* @since 1.32
*
* @return bool Whether this ParserOutput contains rendered text. If this returns false, the
* ParserOutput contains meta-data only.
*/
public function hasText() {
return ( $this->mText !== null );
}
/**
* Get the cacheable text with <mw:editsection> markers still in it. The
* return value is suitable for writing back via setText() but is not valid
* for display to the user.
*
* @return string
* @since 1.27
*/
public function getRawText() {
if ( $this->mText === null ) {
throw new LogicException( 'This ParserOutput contains no text!' );
}
return $this->mText;
}
/**
* Get the output HTML
*
* @param array $options (since 1.31) Transformations to apply to the HTML
* - allowTOC: (bool) Show the TOC, assuming there were enough headings
* to generate one and `__NOTOC__` wasn't used. Default is true,
* but might be statefully overridden.
* - enableSectionEditLinks: (bool) Include section edit links, assuming
* section edit link tokens are present in the HTML. Default is true,
* but might be statefully overridden.
* - skin: (Skin) Skin object used for transforming section edit links.
* - unwrap: (bool) Return text without a wrapper div. Default is false,
* meaning a wrapper div will be added if getWrapperDivClass() returns
* a non-empty string.
* - wrapperDivClass: (string) Wrap the output in a div and apply the given
* CSS class to that div. This overrides the output of getWrapperDivClass().
* Setting this to an empty string has the same effect as 'unwrap' => true.
* - deduplicateStyles: (bool) When true, which is the default, `<style>`
* tags with the `data-mw-deduplicate` attribute set are deduplicated by
* value of the attribute: all but the first will be replaced by `<link
* rel="mw-deduplicated-inline-style" href="mw-data:..."/>` tags, where
* the scheme-specific-part of the href is the (percent-encoded) value
* of the `data-mw-deduplicate` attribute.
* - absoluteURLs: (bool) use absolute URLs in all links. Default: false
* @return string HTML
* @return-taint escaped
*/
public function getText( $options = [] ) {
$options += [
'allowTOC' => true,
'enableSectionEditLinks' => true,
'skin' => null,
'unwrap' => false,
'deduplicateStyles' => true,
'wrapperDivClass' => $this->getWrapperDivClass(),
];
$text = $this->getRawText();
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
Hooks::runner()->onParserOutputPostCacheTransform( $this, $text, $options );
if ( $options['wrapperDivClass'] !== '' && !$options['unwrap'] ) {
$text = Html::rawElement( 'div', [ 'class' => $options['wrapperDivClass'] ], $text );
}
if ( $options['enableSectionEditLinks'] ) {
// TODO: Passing the skin should be required
$skin = $options['skin'] ?: RequestContext::getMain()->getSkin();
$text = preg_replace_callback(
self::EDITSECTION_REGEX,
function ( $m ) use ( $skin ) {
$editsectionPage = Title::newFromText( htmlspecialchars_decode( $m[1] ) );
$editsectionSection = htmlspecialchars_decode( $m[2] );
$editsectionContent = isset( $m[4] ) ? Sanitizer::decodeCharReferences( $m[3] ) : null;
if ( !is_object( $editsectionPage ) ) {
LoggerFactory::getInstance( 'Parser' )
->error(
'ParserOutput::getText(): bad title in editsection placeholder',
[
'placeholder' => $m[0],
'editsectionPage' => $m[1],
'titletext' => $this->getTitleText(),
'phab' => 'T261347'
]
);
return '';
}
return $skin->doEditSectionLink(
$editsectionPage,
$editsectionSection,
$editsectionContent,
$skin->getLanguage()
);
},
$text
);
} else {
$text = preg_replace( self::EDITSECTION_REGEX, '', $text );
}
if ( $options['allowTOC'] ) {
$text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text );
} else {
$text = preg_replace(
'#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s',
'',
$text
);
}
if ( $options['deduplicateStyles'] ) {
$seen = [];
$text = preg_replace_callback(
'#<style\s+([^>]*data-mw-deduplicate\s*=[^>]*)>.*?</style>#s',
static function ( $m ) use ( &$seen ) {
$attr = Sanitizer::decodeTagAttributes( $m[1] );
if ( !isset( $attr['data-mw-deduplicate'] ) ) {
return $m[0];
}
$key = $attr['data-mw-deduplicate'];
if ( !isset( $seen[$key] ) ) {
$seen[$key] = true;
return $m[0];
}
// We were going to use an empty <style> here, but there
// was concern that would be too much overhead for browsers.
// So let's hope a <link> with a non-standard rel and href isn't
// going to be misinterpreted or mangled by any subsequent processing.
return Html::element( 'link', [
'rel' => 'mw-deduplicated-inline-style',
'href' => "mw-data:" . wfUrlencode( $key ),
] );
},
$text
);
}
// Expand all relative URLs
if ( ( $options['absoluteURLs'] ?? false ) && $text ) {
$formatter = new HtmlFormatter( $text );
$doc = $formatter->getDoc();
$xpath = new DOMXPath( $doc );
$nodes = $xpath->query( '//a[@href]' );
/** @var DOMElement $node */
foreach ( $nodes as $node ) {
$node->setAttribute(
'href',
wfExpandUrl( $node->getAttribute( 'href' ), PROTO_RELATIVE )
);
}
$text = $formatter->getText( 'html' );
}
// Hydrate slot section header placeholders generated by RevisionRenderer.
$text = preg_replace_callback(
'#<mw:slotheader>(.*?)</mw:slotheader>#',
static function ( $m ) {
$role = htmlspecialchars_decode( $m[1] );
// TODO: map to message, using the interface language. Set lang="xyz" accordingly.
$headerText = $role;
return $headerText;
},
$text
);
return $text;
}
/**
* Adds a comment notice about cache state to the text of the page
* @param string $msg
* @internal used by ParserCache
*/
public function addCacheMessage( string $msg ) {
$this->mText .= "\n<!-- $msg\n -->\n";
}
/**
* Add a CSS class to use for the wrapping div. If no class is given, no wrapper is added.
*
* @param string $class
*/
public function addWrapperDivClass( $class ) {
$this->mWrapperDivClasses[$class] = true;
}
/**
* Clears the CSS class to use for the wrapping div, effectively disabling the wrapper div
* until addWrapperDivClass() is called.
*/
public function clearWrapperDivClass() {
$this->mWrapperDivClasses = [];
}
/**
* Returns the class (or classes) to be used with the wrapper div for this otuput.
* If there is no wrapper class given, no wrapper div should be added.
* The wrapper div is added automatically by getText().
*
* @return string
*/
public function getWrapperDivClass() {
return implode( ' ', array_keys( $this->mWrapperDivClasses ) );
}
/**
* @param int $id
* @since 1.28
*/
public function setSpeculativeRevIdUsed( $id ) {
$this->mSpeculativeRevId = $id;
}
/**
* @return int|null
* @since 1.28
*/
public function getSpeculativeRevIdUsed() {
return $this->mSpeculativeRevId;
}
/**
* @param int $id
* @since 1.34
*/
public function setSpeculativePageIdUsed( $id ) {
$this->speculativePageIdUsed = $id;
}
/**
* @return int|null
* @since 1.34
*/
public function getSpeculativePageIdUsed() {
return $this->speculativePageIdUsed;
}
/**
* @param string $timestamp TS_MW timestamp
* @since 1.34
*/
public function setRevisionTimestampUsed( $timestamp ) {
$this->revisionTimestampUsed = $timestamp;
}
/**
* @return string|null TS_MW timestamp or null if not used
* @since 1.34
*/
public function getRevisionTimestampUsed() {
return $this->revisionTimestampUsed;
}
/**
* @param string $hash Lowercase SHA-1 base 36 hash
* @since 1.34
*/
public function setRevisionUsedSha1Base36( $hash ) {
if ( $hash === null ) {
return; // e.g. RevisionRecord::getSha1() returned null
}
if (
$this->revisionUsedSha1Base36 !== null &&
$this->revisionUsedSha1Base36 !== $hash
) {
$this->revisionUsedSha1Base36 = ''; // mismatched
} else {
$this->revisionUsedSha1Base36 = $hash;
}
}
/**
* @return string|null Lowercase SHA-1 base 36 hash, null if unused, or "" on inconsistency
* @since 1.34
*/
public function getRevisionUsedSha1Base36() {
return $this->revisionUsedSha1Base36;
}
public function &getLanguageLinks() {
return $this->mLanguageLinks;
}
public function getInterwikiLinks() {
return $this->mInterwikiLinks;
}
/**
* Return the names of the categories on this page.
* @return array<string>
* @deprecated since 1.38, use ::getCategoryNames() instead.
*/
public function getCategoryLinks() {
wfDeprecated( __METHOD__, '1.38' );
return $this->getCategoryNames();
}
/**
* Return the names of the categories on this page.
* Unlike ::getCategories(), sort keys are *not* included in the
* return value.
* @return array<string> The names of the categories
* @since 1.38
*/
public function getCategoryNames(): array {
return array_keys( $this->mCategories );
}
public function &getCategories() {
return $this->mCategories;
}
Implement page status indicators Page status indicators are icons (or short text snippets) usually displayed in the top-right corner of the page, outside of the main content. Basically, <indicator name="foo">[[File:Foo.svg|20px]]</indicator> may be used on a page to place the icon in the indicator area. They are also known as top icons, page icons, heading icons or title icons. I found the discussion on bug 23796 highly illuminating. I suggest that everyone read it before suggesting different design choices. I spent some time with a thesaurus pondering the name. "Emblems" and "badges" were also considered, but the former has a much more limited meaning and the latter is already taken by Wikidata, with a similar but subtly different feature set. I am not aware of any naming conflicts ;) besides new talk page message "indicator" (used by core and Echo in some documents) and OOjs UI indicators (tiny icons like the arrow on a dropdown form element), which shouldn't be confusing. Potential use cases include: * "Lock" indicators for page protection levels * Featured/good article indicators * Redirect shortcuts display ("WP:VPT") * Links to help/manual for special pages * Coordinates?… or globe icon for inline pop-up maps Design features: * Skin-customizable. Skins can fully control where and how indicators are shown, or may just do <?php echo $this->getIndicators(); ?> to output the default structure. By default they are not shown at all. * Extension-customizable. Extensions can call ParserOutput::addIndicator() to insert an indicator from one of the numerous parser hooks. * Wiki-customizable. In addition to just using the parser functions, on-wiki styles and scripts can use the provided classes and ids (.mw-indicator, #mw-indicator-<name>) to customize their display. Design limitations: * Every indicator must have a unique identifier (name). It's not possible to create arrays, or to have several indicators with the same name. In case of duplicates, the latest occurrence of the parser function wins. * Indicators are displayed ordered by their names (and not occurrence order). This ensures consistency across pages and provides a simple means of ordering or grouping them. * Indicators are not stored, tracked or accessible outside of ParserOutput (in particular they're not in the page_props table). They are intended to merely reflect the content or metadata that is already present on the page, and not be data themselves. If you ever think you need to list pages with a given status indicator, instead figure out what it means and use the appropriate tracking category, special page report, already existing page_prop, or other means. Corresponding patch in Vector: I90a8ae15ac8275d084ea5f47b6b2684d5e6c7412. I'll implement support in the other three skins included in the tarball and document it on mediawiki.org after this is merged. Bug: 23796 Change-Id: I2389ff9a5332a2b1d033eb75f0946e5241cfaaf4
2014-09-24 10:44:16 +00:00
/**
* @return array
Implement page status indicators Page status indicators are icons (or short text snippets) usually displayed in the top-right corner of the page, outside of the main content. Basically, <indicator name="foo">[[File:Foo.svg|20px]]</indicator> may be used on a page to place the icon in the indicator area. They are also known as top icons, page icons, heading icons or title icons. I found the discussion on bug 23796 highly illuminating. I suggest that everyone read it before suggesting different design choices. I spent some time with a thesaurus pondering the name. "Emblems" and "badges" were also considered, but the former has a much more limited meaning and the latter is already taken by Wikidata, with a similar but subtly different feature set. I am not aware of any naming conflicts ;) besides new talk page message "indicator" (used by core and Echo in some documents) and OOjs UI indicators (tiny icons like the arrow on a dropdown form element), which shouldn't be confusing. Potential use cases include: * "Lock" indicators for page protection levels * Featured/good article indicators * Redirect shortcuts display ("WP:VPT") * Links to help/manual for special pages * Coordinates?… or globe icon for inline pop-up maps Design features: * Skin-customizable. Skins can fully control where and how indicators are shown, or may just do <?php echo $this->getIndicators(); ?> to output the default structure. By default they are not shown at all. * Extension-customizable. Extensions can call ParserOutput::addIndicator() to insert an indicator from one of the numerous parser hooks. * Wiki-customizable. In addition to just using the parser functions, on-wiki styles and scripts can use the provided classes and ids (.mw-indicator, #mw-indicator-<name>) to customize their display. Design limitations: * Every indicator must have a unique identifier (name). It's not possible to create arrays, or to have several indicators with the same name. In case of duplicates, the latest occurrence of the parser function wins. * Indicators are displayed ordered by their names (and not occurrence order). This ensures consistency across pages and provides a simple means of ordering or grouping them. * Indicators are not stored, tracked or accessible outside of ParserOutput (in particular they're not in the page_props table). They are intended to merely reflect the content or metadata that is already present on the page, and not be data themselves. If you ever think you need to list pages with a given status indicator, instead figure out what it means and use the appropriate tracking category, special page report, already existing page_prop, or other means. Corresponding patch in Vector: I90a8ae15ac8275d084ea5f47b6b2684d5e6c7412. I'll implement support in the other three skins included in the tarball and document it on mediawiki.org after this is merged. Bug: 23796 Change-Id: I2389ff9a5332a2b1d033eb75f0946e5241cfaaf4
2014-09-24 10:44:16 +00:00
* @since 1.25
*/
public function getIndicators() {
return $this->mIndicators;
}
public function getTitleText() {
return $this->mTitleText;
}
public function getSections() {
return $this->mSections;
}
public function &getLinks() {
return $this->mLinks;
}
preferences: Signature validation (lint errors, user links, nested subst) Three new checks are now applied to user signatures in preferences: * Disallow invalid HTML and lint errors (T140606) Since 15e0e9bb4b we can rely on Parsoid to check the signature for lint errors. (The old PHP Parser doesn't have this capability.) Most importantly, this will disallow unclosed HTML tags. Unclosed formatting tags like `<i>` (and also wikitext markup like `''`) could affect the entire page with the bad markup. New configuration variable $wgSignatureAllowedLintErrors is added to allow ignoring some errors. The default value ignores the 'obsolete-tag' error (caused by HTML tags like `<font>` and `<tt>`.) * Require a link to user page, talk page or contributions (T237700) Various tools don't work correctly when such a link is missing. For example, Echo notifications are not sent, DiscussionTools will not allow replying to these comments, English Wikipedia's SineBot treats these comments as unsigned. Such requirement has been present for a long time in many Wikimedia wikis' policies, but it was not enforced by software. * Disallow "nested" substitution in signature (T230652) Clever abuse of "subst" markup and tildes allows users to save edits containing wikitext in which substitution occurs again when the page is next saved. Disallow this in signatures, at least. New configuration variable $wgSignatureValidation is added to control what we do about the result of the validation described above. The options are: * 'warning': Only displays a warning near the field on Special:Preferences if the current signature is invalid. Signatures can still be changed regardless of validity and will be used when signing comments. * 'new': In addition to the above, if a user tries to change their signature, the new one must be valid. Existing invalid signatures are still used when signing comments. * 'disallow': In addition to the above, existing invalid signatures are no longer used when signing comments. Bug: T140606 Bug: T237700 Bug: T230652 Change-Id: I07c575c2d9d2afe7a89c4847d16ac044417297bf
2019-11-09 00:15:51 +00:00
/**
* @return array Keys are DBKs for the links to special pages in the document
* @since 1.35
*/
public function &getLinksSpecial() {
return $this->mLinksSpecial;
}
public function &getTemplates() {
return $this->mTemplates;
}
public function &getTemplateIds() {
return $this->mTemplateIds;
}
public function &getImages() {
return $this->mImages;
}
public function &getFileSearchOptions() {
return $this->mFileSearchOptions;
}
public function &getExternalLinks() {
return $this->mExternalLinks;
}
public function setNoGallery( $value ) {
$this->mNoGallery = (bool)$value;
}
public function getNoGallery() {
return $this->mNoGallery;
}
public function getHeadItems() {
return $this->mHeadItems;
}
public function getModules() {
return $this->mModules;
}
public function getModuleStyles() {
return $this->mModuleStyles;
}
/**
* @return array
* @since 1.23
*/
public function getJsConfigVars() {
return $this->mJsConfigVars;
}
public function getOutputHooks() {
return (array)$this->mOutputHooks;
}
public function getWarnings() {
return array_keys( $this->mWarnings );
}
public function getIndexPolicy() {
return $this->mIndexPolicy;
}
public function getTOCHTML() {
return $this->mTOCHTML;
}
/**
* @return string|null TS_MW timestamp of the revision content
*/
public function getTimestamp() {
return $this->mTimestamp;
}
public function getLimitReportData() {
return $this->mLimitReportData;
}
public function getLimitReportJSData() {
return $this->mLimitReportJSData;
}
public function getEnableOOUI() {
return $this->mEnableOOUI;
}
/**
* Get extra Content-Security-Policy 'default-src' directives
* @since 1.35
* @return array
*/
public function getExtraCSPDefaultSrcs() {
return $this->mExtraDefaultSrcs;
}
/**
* Get extra Content-Security-Policy 'script-src' directives
* @since 1.35
* @return array
*/
public function getExtraCSPScriptSrcs() {
return $this->mExtraScriptSrcs;
}
/**
* Get extra Content-Security-Policy 'style-src' directives
* @since 1.35
* @return array
*/
public function getExtraCSPStyleSrcs() {
return $this->mExtraStyleSrcs;
}
public function setText( $text ) {
return wfSetVar( $this->mText, $text );
}
public function setLanguageLinks( $ll ) {
return wfSetVar( $this->mLanguageLinks, $ll );
}
/**
* @deprecated since 1.38, use ::setCategories() instead.
*/
public function setCategoryLinks( $cl ) {
wfDeprecated( __METHOD__, '1.38' );
return wfSetVar( $this->mCategories, $cl );
}
public function setTitleText( $t ) {
return wfSetVar( $this->mTitleText, $t );
}
public function setSections( $toc ) {
return wfSetVar( $this->mSections, $toc );
}
public function setIndexPolicy( $policy ) {
return wfSetVar( $this->mIndexPolicy, $policy );
}
public function setTOCHTML( $tochtml ) {
return wfSetVar( $this->mTOCHTML, $tochtml );
}
public function setTimestamp( $timestamp ) {
return wfSetVar( $this->mTimestamp, $timestamp );
}
/**
* Add a category.
* @param string $c The category name
* @param string $sort The sort key
*/
public function addCategory( $c, $sort ): void {
$this->mCategories[$c] = $sort;
}
/**
* Overwrite the category map.
* @param array<string,string> $c Map of category names to sort keys
* @since 1.38
*/
public function setCategories( array $c ): void {
$this->mCategories = $c;
}
Implement page status indicators Page status indicators are icons (or short text snippets) usually displayed in the top-right corner of the page, outside of the main content. Basically, <indicator name="foo">[[File:Foo.svg|20px]]</indicator> may be used on a page to place the icon in the indicator area. They are also known as top icons, page icons, heading icons or title icons. I found the discussion on bug 23796 highly illuminating. I suggest that everyone read it before suggesting different design choices. I spent some time with a thesaurus pondering the name. "Emblems" and "badges" were also considered, but the former has a much more limited meaning and the latter is already taken by Wikidata, with a similar but subtly different feature set. I am not aware of any naming conflicts ;) besides new talk page message "indicator" (used by core and Echo in some documents) and OOjs UI indicators (tiny icons like the arrow on a dropdown form element), which shouldn't be confusing. Potential use cases include: * "Lock" indicators for page protection levels * Featured/good article indicators * Redirect shortcuts display ("WP:VPT") * Links to help/manual for special pages * Coordinates?… or globe icon for inline pop-up maps Design features: * Skin-customizable. Skins can fully control where and how indicators are shown, or may just do <?php echo $this->getIndicators(); ?> to output the default structure. By default they are not shown at all. * Extension-customizable. Extensions can call ParserOutput::addIndicator() to insert an indicator from one of the numerous parser hooks. * Wiki-customizable. In addition to just using the parser functions, on-wiki styles and scripts can use the provided classes and ids (.mw-indicator, #mw-indicator-<name>) to customize their display. Design limitations: * Every indicator must have a unique identifier (name). It's not possible to create arrays, or to have several indicators with the same name. In case of duplicates, the latest occurrence of the parser function wins. * Indicators are displayed ordered by their names (and not occurrence order). This ensures consistency across pages and provides a simple means of ordering or grouping them. * Indicators are not stored, tracked or accessible outside of ParserOutput (in particular they're not in the page_props table). They are intended to merely reflect the content or metadata that is already present on the page, and not be data themselves. If you ever think you need to list pages with a given status indicator, instead figure out what it means and use the appropriate tracking category, special page report, already existing page_prop, or other means. Corresponding patch in Vector: I90a8ae15ac8275d084ea5f47b6b2684d5e6c7412. I'll implement support in the other three skins included in the tarball and document it on mediawiki.org after this is merged. Bug: 23796 Change-Id: I2389ff9a5332a2b1d033eb75f0946e5241cfaaf4
2014-09-24 10:44:16 +00:00
/**
* @param string $id
* @param string $content
Implement page status indicators Page status indicators are icons (or short text snippets) usually displayed in the top-right corner of the page, outside of the main content. Basically, <indicator name="foo">[[File:Foo.svg|20px]]</indicator> may be used on a page to place the icon in the indicator area. They are also known as top icons, page icons, heading icons or title icons. I found the discussion on bug 23796 highly illuminating. I suggest that everyone read it before suggesting different design choices. I spent some time with a thesaurus pondering the name. "Emblems" and "badges" were also considered, but the former has a much more limited meaning and the latter is already taken by Wikidata, with a similar but subtly different feature set. I am not aware of any naming conflicts ;) besides new talk page message "indicator" (used by core and Echo in some documents) and OOjs UI indicators (tiny icons like the arrow on a dropdown form element), which shouldn't be confusing. Potential use cases include: * "Lock" indicators for page protection levels * Featured/good article indicators * Redirect shortcuts display ("WP:VPT") * Links to help/manual for special pages * Coordinates?… or globe icon for inline pop-up maps Design features: * Skin-customizable. Skins can fully control where and how indicators are shown, or may just do <?php echo $this->getIndicators(); ?> to output the default structure. By default they are not shown at all. * Extension-customizable. Extensions can call ParserOutput::addIndicator() to insert an indicator from one of the numerous parser hooks. * Wiki-customizable. In addition to just using the parser functions, on-wiki styles and scripts can use the provided classes and ids (.mw-indicator, #mw-indicator-<name>) to customize their display. Design limitations: * Every indicator must have a unique identifier (name). It's not possible to create arrays, or to have several indicators with the same name. In case of duplicates, the latest occurrence of the parser function wins. * Indicators are displayed ordered by their names (and not occurrence order). This ensures consistency across pages and provides a simple means of ordering or grouping them. * Indicators are not stored, tracked or accessible outside of ParserOutput (in particular they're not in the page_props table). They are intended to merely reflect the content or metadata that is already present on the page, and not be data themselves. If you ever think you need to list pages with a given status indicator, instead figure out what it means and use the appropriate tracking category, special page report, already existing page_prop, or other means. Corresponding patch in Vector: I90a8ae15ac8275d084ea5f47b6b2684d5e6c7412. I'll implement support in the other three skins included in the tarball and document it on mediawiki.org after this is merged. Bug: 23796 Change-Id: I2389ff9a5332a2b1d033eb75f0946e5241cfaaf4
2014-09-24 10:44:16 +00:00
* @since 1.25
*/
public function setIndicator( $id, $content ) {
$this->mIndicators[$id] = $content;
}
/**
* Enables OOUI, if true, in any OutputPage instance this ParserOutput
* object is added to.
*
* @since 1.26
* @param bool $enable If OOUI should be enabled or not
*/
public function setEnableOOUI( $enable = false ) {
$this->mEnableOOUI = $enable;
}
public function addLanguageLink( $t ) {
$this->mLanguageLinks[] = $t;
}
public function addWarning( $s ) {
$this->mWarnings[$s] = 1;
}
public function addOutputHook( $hook, $data = false ) {
$this->mOutputHooks[] = [ $hook, $data ];
}
public function setNewSection( $value ) {
$this->mNewSection = (bool)$value;
}
/**
* @param bool $value Hide the new section link?
*/
public function setHideNewSection( bool $value ): void {
$this->mHideNewSection = $value;
}
/**
* @param bool $value Hide the new section link?
* @deprecated since 1.38 use ::setHideNewSection()
*/
public function hideNewSection( $value ) {
wfDeprecated( __METHOD__, '1.38' );
$this->setHideNewSection( (bool)$value );
}
public function getHideNewSection() {
return (bool)$this->mHideNewSection;
}
public function getNewSection() {
return (bool)$this->mNewSection;
}
/**
* Checks, if a url is pointing to the own server
*
* @param string $internal The server to check against
* @param string $url The url to check
* @return bool
* @internal
*/
public static function isLinkInternal( $internal, $url ) {
return (bool)preg_match( '/^' .
# If server is proto relative, check also for http/https links
( substr( $internal, 0, 2 ) === '//' ? '(?:https?:)?' : '' ) .
preg_quote( $internal, '/' ) .
# check for query/path/anchor or end of link in each case
'(?:[\?\/\#]|$)/i',
$url
);
}
public function addExternalLink( $url ) {
# We don't register links pointing to our own server, unless... :-)
global $wgServer, $wgRegisterInternalExternals;
# Replace unnecessary URL escape codes with the referenced character
# This prevents spammers from hiding links from the filters
$url = Parser::normalizeLinkUrl( $url );
$registerExternalLink = true;
if ( !$wgRegisterInternalExternals ) {
$registerExternalLink = !self::isLinkInternal( $wgServer, $url );
}
if ( $registerExternalLink ) {
$this->mExternalLinks[$url] = 1;
}
}
/**
* Record a local or interwiki inline link for saving in future link tables.
*
* @param LinkTarget $link (used to require Title until 1.38)
* @param int|null $id Optional known page_id so we can skip the lookup
*/
public function addLink( LinkTarget $link, $id = null ) {
if ( $link->isExternal() ) {
// Don't record interwikis in pagelinks
$this->addInterwikiLink( $link );
return;
}
$ns = $link->getNamespace();
$dbk = $link->getDBkey();
if ( $ns === NS_MEDIA ) {
// Normalize this pseudo-alias if it makes it down here...
$ns = NS_FILE;
} elseif ( $ns === NS_SPECIAL ) {
preferences: Signature validation (lint errors, user links, nested subst) Three new checks are now applied to user signatures in preferences: * Disallow invalid HTML and lint errors (T140606) Since 15e0e9bb4b we can rely on Parsoid to check the signature for lint errors. (The old PHP Parser doesn't have this capability.) Most importantly, this will disallow unclosed HTML tags. Unclosed formatting tags like `<i>` (and also wikitext markup like `''`) could affect the entire page with the bad markup. New configuration variable $wgSignatureAllowedLintErrors is added to allow ignoring some errors. The default value ignores the 'obsolete-tag' error (caused by HTML tags like `<font>` and `<tt>`.) * Require a link to user page, talk page or contributions (T237700) Various tools don't work correctly when such a link is missing. For example, Echo notifications are not sent, DiscussionTools will not allow replying to these comments, English Wikipedia's SineBot treats these comments as unsigned. Such requirement has been present for a long time in many Wikimedia wikis' policies, but it was not enforced by software. * Disallow "nested" substitution in signature (T230652) Clever abuse of "subst" markup and tildes allows users to save edits containing wikitext in which substitution occurs again when the page is next saved. Disallow this in signatures, at least. New configuration variable $wgSignatureValidation is added to control what we do about the result of the validation described above. The options are: * 'warning': Only displays a warning near the field on Special:Preferences if the current signature is invalid. Signatures can still be changed regardless of validity and will be used when signing comments. * 'new': In addition to the above, if a user tries to change their signature, the new one must be valid. Existing invalid signatures are still used when signing comments. * 'disallow': In addition to the above, existing invalid signatures are no longer used when signing comments. Bug: T140606 Bug: T237700 Bug: T230652 Change-Id: I07c575c2d9d2afe7a89c4847d16ac044417297bf
2019-11-09 00:15:51 +00:00
// We don't want to record Special: links in the database, so put them in a separate place.
// It might actually be wise to, but we'd need to do some normalization.
preferences: Signature validation (lint errors, user links, nested subst) Three new checks are now applied to user signatures in preferences: * Disallow invalid HTML and lint errors (T140606) Since 15e0e9bb4b we can rely on Parsoid to check the signature for lint errors. (The old PHP Parser doesn't have this capability.) Most importantly, this will disallow unclosed HTML tags. Unclosed formatting tags like `<i>` (and also wikitext markup like `''`) could affect the entire page with the bad markup. New configuration variable $wgSignatureAllowedLintErrors is added to allow ignoring some errors. The default value ignores the 'obsolete-tag' error (caused by HTML tags like `<font>` and `<tt>`.) * Require a link to user page, talk page or contributions (T237700) Various tools don't work correctly when such a link is missing. For example, Echo notifications are not sent, DiscussionTools will not allow replying to these comments, English Wikipedia's SineBot treats these comments as unsigned. Such requirement has been present for a long time in many Wikimedia wikis' policies, but it was not enforced by software. * Disallow "nested" substitution in signature (T230652) Clever abuse of "subst" markup and tildes allows users to save edits containing wikitext in which substitution occurs again when the page is next saved. Disallow this in signatures, at least. New configuration variable $wgSignatureValidation is added to control what we do about the result of the validation described above. The options are: * 'warning': Only displays a warning near the field on Special:Preferences if the current signature is invalid. Signatures can still be changed regardless of validity and will be used when signing comments. * 'new': In addition to the above, if a user tries to change their signature, the new one must be valid. Existing invalid signatures are still used when signing comments. * 'disallow': In addition to the above, existing invalid signatures are no longer used when signing comments. Bug: T140606 Bug: T237700 Bug: T230652 Change-Id: I07c575c2d9d2afe7a89c4847d16ac044417297bf
2019-11-09 00:15:51 +00:00
$this->mLinksSpecial[$dbk] = 1;
return;
} elseif ( $dbk === '' ) {
// Don't record self links - [[#Foo]]
return;
}
if ( !isset( $this->mLinks[$ns] ) ) {
$this->mLinks[$ns] = [];
}
if ( $id === null ) {
$page = MediaWikiServices::getInstance()->getPageStore()->getPageForLink( $link );
$id = $page->getId();
}
$this->mLinks[$ns][$dbk] = $id;
}
/**
* Register a file dependency for this output
* @param string $name Title dbKey
* @param string|false|null $timestamp MW timestamp of file creation (or false if non-existing)
* @param string|false|null $sha1 Base 36 SHA-1 of file (or false if non-existing)
*/
public function addImage( $name, $timestamp = null, $sha1 = null ) {
$this->mImages[$name] = 1;
if ( $timestamp !== null && $sha1 !== null ) {
$this->mFileSearchOptions[$name] = [ 'time' => $timestamp, 'sha1' => $sha1 ];
}
}
/**
* Register a template dependency for this output
*
* @param LinkTarget $link (used to require Title until 1.38)
* @param int $page_id
* @param int $rev_id
*/
public function addTemplate( $link, $page_id, $rev_id ) {
$ns = $link->getNamespace();
$dbk = $link->getDBkey();
if ( !isset( $this->mTemplates[$ns] ) ) {
$this->mTemplates[$ns] = [];
}
$this->mTemplates[$ns][$dbk] = $page_id;
if ( !isset( $this->mTemplateIds[$ns] ) ) {
$this->mTemplateIds[$ns] = [];
}
$this->mTemplateIds[$ns][$dbk] = $rev_id; // For versioning
}
/**
* @param LinkTarget $link LinkTarget object, must be an interwiki link
* (used to require Title until 1.38).
*
* @throws MWException If given invalid input
*/
public function addInterwikiLink( $link ) {
if ( !$link->isExternal() ) {
throw new MWException( 'Non-interwiki link passed, internal parser error.' );
}
$prefix = $link->getInterwiki();
if ( !isset( $this->mInterwikiLinks[$prefix] ) ) {
$this->mInterwikiLinks[$prefix] = [];
}
$this->mInterwikiLinks[$prefix][$link->getDBkey()] = 1;
}
/**
* Add some text to the "<head>".
* If $tag is set, the section with that tag will only be included once
* in a given page.
* @param string $section
* @param string|bool $tag
*/
public function addHeadItem( $section, $tag = false ) {
if ( $tag !== false ) {
$this->mHeadItems[$tag] = $section;
} else {
$this->mHeadItems[] = $section;
}
}
/**
* @see OutputPage::addModules
* @param string|array $modules
*/
public function addModules( $modules ) {
$this->mModules = array_merge( $this->mModules, (array)$modules );
}
/**
* @see OutputPage::addModuleStyles
* @param string|array $modules
*/
public function addModuleStyles( $modules ) {
$this->mModuleStyles = array_merge( $this->mModuleStyles, (array)$modules );
}
/**
* Add one or more variables to be set in mw.config in JavaScript.
*
* @param string|array $keys Key or array of key/value pairs.
* @param mixed|null $value [optional] Value of the configuration variable.
* @since 1.23
*/
public function addJsConfigVars( $keys, $value = null ) {
if ( is_array( $keys ) ) {
foreach ( $keys as $key => $value ) {
$this->mJsConfigVars[$key] = $value;
}
return;
}
$this->mJsConfigVars[$keys] = $value;
}
/**
* Copy items from the OutputPage object into this one
*
* @param OutputPage $out
*/
public function addOutputPageMetadata( OutputPage $out ) {
$this->addModules( $out->getModules() );
$this->addModuleStyles( $out->getModuleStyles() );
$this->addJsConfigVars( $out->getJsConfigVars() );
$this->mHeadItems = array_merge( $this->mHeadItems, $out->getHeadItemsArray() );
$this->mPreventClickjacking = $this->mPreventClickjacking || $out->getPreventClickjacking();
}
/**
* Add a tracking category, getting the title from a system message,
* or print a debug message if the title is invalid.
*
* Any message used with this function should be registered so it will
* show up on Special:TrackingCategories. Core messages should be added
* to TrackingCategories::CORE_TRACKING_CATEGORIES, and extensions
* should add to "TrackingCategories" in their extension.json.
*
* @todo Migrate some code to TrackingCategories
*
* @param string $msg Message key
* @param PageReference $page the page which is being tracked
* (used to require a Title until 1.38)
* @return bool Whether the addition was successful
* @since 1.25
* @deprecated since 1.38, use Parser::addTrackingCategory or
* TrackingCategories::addTrackingCategory() instead
*/
public function addTrackingCategory( $msg, PageReference $page ) {
$trackingCategories = MediaWikiServices::getInstance()->getTrackingCategories();
return $trackingCategories->addTrackingCategory( $this, $msg, $page );
}
/**
* Override the title to be used for display
*
* @note this is assumed to have been validated
* (check equal normalisation, etc.)
*
* @note this is expected to be safe HTML,
* ready to be served to the client.
*
* @param string $text Desired title text
*/
public function setDisplayTitle( $text ) {
$this->setTitleText( $text );
$this->setPageProperty( 'displaytitle', $text );
}
/**
* Get the title to be used for display.
*
* As per the contract of setDisplayTitle(), this is safe HTML,
* ready to be served to the client.
*
* @return string|false HTML
*/
public function getDisplayTitle() {
$t = $this->getTitleText();
if ( $t === '' ) {
return false;
}
return $t;
}
/**
* Attach a flag to the output so that it can be checked later to handle special cases
*
* @param string $flag
*/
public function setFlag( $flag ) {
$this->mFlags[$flag] = true;
}
/**
* @param string $flag
* @return bool Whether the given flag was set to signify a special case
*/
public function getFlag( $flag ) {
return isset( $this->mFlags[$flag] );
}
/**
* @return string[] List of flags signifying special cases
* @since 1.34
*/
public function getAllFlags() {
return array_keys( $this->mFlags );
}
/**
* Sets a page property to be stored in the page_props database table.
* @param string $name
* @param int|float|string|bool|null $value
* @deprecated since 1.38, renamed to ::setPageProperty()
*/
public function setProperty( $name, $value ) {
wfDeprecated( __METHOD__, '1.38' );
$this->setPageProperty( $name, $value );
}
/**
* @param string $name The property name to look up.
*
* @return mixed|bool The value previously set using setPageProperty(). False if null or no value
* was set for the given property name.
*
* @note You need to use getPageProperties() to check for boolean and null properties.
* @deprecated since 1.38, renamed to ::getPageProperty()
*/
public function getProperty( $name ) {
wfDeprecated( __METHOD__, '1.38' );
return $this->getPageProperty( $name );
}
/**
* @param string $name
* @deprecated since 1.38, renamed to ::unsetPageProperty()
*/
public function unsetProperty( $name ) {
wfDeprecated( __METHOD__, '1.38' );
$this->unsetPageProperty( $name );
}
/**
* @return array
* @deprecated since 1.38, renamed to ::getPageProperties()
*/
public function getProperties() {
wfDeprecated( __METHOD__, '1.38' );
return $this->getPageProperties();
}
/**
* Set a page property to be stored in the page_props database table.
*
* page_props is a key value store indexed by the page ID. This allows
* the parser to set a property on a page which can then be quickly
* retrieved given the page ID or via a DB join when given the page
* title.
*
* Since 1.23, page_props are also indexed by numeric value, to allow
* for efficient "top k" queries of pages wrt a given property.
*
* setPageProperty() is thus used to propagate properties from the parsed
* page to request contexts other than a page view of the currently parsed
* article.
*
* Some applications examples:
*
* * To implement hidden categories, hiding pages from category listings
* by storing a page property.
*
* * Overriding the displayed article title (ParserOutput::setDisplayTitle()).
*
* * To implement image tagging, for example displaying an icon on an
* image thumbnail to indicate that it is listed for deletion on
* Wikimedia Commons.
* This is not actually implemented, yet but would be pretty cool.
*
* @note Do not use setPageProperty() to set a property which is only used
* in a context where the ParserOutput object itself is already available,
* for example a normal page view. There is no need to save such a property
* in the database since the text is already parsed. You can just hook
* OutputPageParserOutput and get your data out of the ParserOutput object.
*
* If you are writing an extension where you want to set a property in the
* parser which is used by an OutputPageParserOutput hook, you have to
* associate the extension data directly with the ParserOutput object.
* Since MediaWiki 1.21, you can use setExtensionData() to do this:
*
* @par Example:
* @code
* $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' );
* @endcode
*
* And then later, in OutputPageParserOutput or similar:
*
* @par Example:
* @code
* $output->getExtensionData( 'my_ext_foo' );
* @endcode
*
* In MediaWiki 1.20 and older, you have to use a custom member variable
* within the ParserOutput object:
*
* @par Example:
* @code
* $parser->getOutput()->my_ext_foo = '...';
* @endcode
*
* @note Only scalar values like numbers and strings are supported
* as a value. Attempt to use an object or array will
* not work properly with LinksUpdate.
*
* @param string $name
* @param int|float|string|bool|null $value
* @since 1.38
*/
public function setPageProperty( string $name, $value ): void {
$this->mProperties[$name] = $value;
}
/**
* Look up a page property.
* @param string $name The page property name to look up.
* @return int|float|string|bool The value previously set using setPageProperty(). False if null or no value
* was set for the given property name.
*
* @note You need to use getPageProperties() to check for boolean and null properties.
* @since 1.38
*/
public function getPageProperty( string $name ) {
return $this->mProperties[$name] ?? false;
}
/**
* Remove a page property.
* @param string $name The page property name.
* @since 1.38
*/
public function unsetPageProperty( string $name ): void {
unset( $this->mProperties[$name] );
}
/**
* Return all the page properties set on this ParserOutput.
* @return array<string,int|float|string|bool|null>
* @since 1.38
*/
public function getPageProperties(): array {
if ( !isset( $this->mProperties ) ) {
$this->mProperties = [];
}
return $this->mProperties;
}
/**
* Attaches arbitrary data to this ParserObject. This can be used to store some information in
* the ParserOutput object for later use during page output. The data will be cached along with
* the ParserOutput object, but unlike data set using setPageProperty(), it is not recorded in the
* database.
*
* This method is provided to overcome the unsafe practice of attaching extra information to a
* ParserObject by directly assigning member variables.
*
* To use setExtensionData() to pass extension information from a hook inside the parser to a
* hook in the page output, use this in the parser hook:
*
* @par Example:
* @code
* $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' );
* @endcode
*
* And then later, in OutputPageParserOutput or similar:
*
* @par Example:
* @code
* $output->getExtensionData( 'my_ext_foo' );
* @endcode
*
* In MediaWiki 1.20 and older, you have to use a custom member variable
* within the ParserOutput object:
*
* @par Example:
* @code
* $parser->getOutput()->my_ext_foo = '...';
* @endcode
*
* @note Only scalar values, e.g. numbers, strings, arrays or MediaWiki\Json\JsonUnserializable
* instances are supported as a value. Attempt to set other class instance as a extension data
* will break ParserCache for the page.
*
* @param string $key The key for accessing the data. Extensions should take care to avoid
* conflicts in naming keys. It is suggested to use the extension's name as a prefix.
*
* @param mixed|JsonUnserializable $value The value to set.
* Setting a value to null is equivalent to removing the value.
* @since 1.21
*/
public function setExtensionData( $key, $value ) {
if ( $value === null ) {
unset( $this->mExtensionData[$key] );
} else {
$this->mExtensionData[$key] = $value;
}
}
/**
* Gets extensions data previously attached to this ParserOutput using setExtensionData().
* Typically, such data would be set while parsing the page, e.g. by a parser function.
*
* @since 1.21
*
* @param string $key The key to look up.
*
* @return mixed|null The value previously set for the given key using setExtensionData()
* or null if no value was set for this key.
*/
public function getExtensionData( $key ) {
return $this->mExtensionData[$key] ?? null;
}
private static function getTimes( $clock = null ) {
$ret = [];
if ( !$clock || $clock === 'wall' ) {
$ret['wall'] = microtime( true );
}
if ( !$clock || $clock === 'cpu' ) {
$ru = getrusage( 0 /* RUSAGE_SELF */ );
$ret['cpu'] = $ru['ru_utime.tv_sec'] + $ru['ru_utime.tv_usec'] / 1e6;
$ret['cpu'] += $ru['ru_stime.tv_sec'] + $ru['ru_stime.tv_usec'] / 1e6;
}
return $ret;
}
/**
* Resets the parse start timestamps for future calls to getTimeSinceStart()
* @since 1.22
*/
public function resetParseStartTime() {
$this->mParseStartTime = self::getTimes();
}
/**
* Returns the time since resetParseStartTime() was last called
*
* Clocks available are:
* - wall: Wall clock time
* - cpu: CPU time (requires getrusage)
*
* @since 1.22
* @param string $clock
* @return float|null
*/
public function getTimeSinceStart( $clock ) {
if ( !isset( $this->mParseStartTime[$clock] ) ) {
return null;
}
$end = self::getTimes( $clock );
return $end[$clock] - $this->mParseStartTime[$clock];
}
/**
* Sets parser limit report data for a key
*
* The key is used as the prefix for various messages used for formatting:
* - $key: The label for the field in the limit report
* - $key-value-text: Message used to format the value in the "NewPP limit
* report" HTML comment. If missing, uses $key-format.
* - $key-value-html: Message used to format the value in the preview
* limit report table. If missing, uses $key-format.
* - $key-value: Message used to format the value. If missing, uses "$1".
*
* Note that all values are interpreted as wikitext, and so should be
* encoded with htmlspecialchars() as necessary, but should avoid complex
* HTML for sanity of display in the "NewPP limit report" comment.
*
* @since 1.22
* @param string $key Message key
* @param mixed $value Appropriate for Message::params()
*/
public function setLimitReportData( $key, $value ) {
$this->mLimitReportData[$key] = $value;
if ( is_array( $value ) ) {
if ( array_keys( $value ) === [ 0, 1 ]
&& is_numeric( $value[0] )
&& is_numeric( $value[1] )
) {
$data = [ 'value' => $value[0], 'limit' => $value[1] ];
} else {
$data = $value;
}
} else {
$data = $value;
}
if ( strpos( $key, '-' ) ) {
list( $ns, $name ) = explode( '-', $key, 2 );
$this->mLimitReportJSData[$ns][$name] = $data;
} else {
$this->mLimitReportJSData[$key] = $data;
}
}
/**
WikiPage: Document triggerOpportunisticLinksUpdate and related code == History of WikiPage::triggerOpportunisticLinksUpdate == * 2007 (r19095; T10575; b3a8d488a8) Introduces the "cascading protection" feature. This commit added code to Article.php, in a conditional branch where we encountered a ParserCache "miss" and thus have done a fresh parse. The code in question would query which templates we ended up using, and if that differed from what the database said (e.g. stored during the last actual edit or links update), then a new LinksUpdate is ad-hoc constructed and executed. I could not find it anywhere explicitly spelled out, but my best guess is that the reason for this is to make sure that if the page in question contains wikitext that trancludes a different page based on the current date and time (such as how most Wikipedia main pages transclude news information and "Did you know" information based on dated subpages that are prepared in advance), then we don't just want to re-render the page after a day has passed, we also want to re-do the links update to ensure the search index, category links, and "WhatLinksHere" is correct, and thus by extent, to make sure that cascading protection from the main page does in fact apply to the "current" set of subpages and templates actually in-use. * 2007 (r19227; 0c0c0eff81) This adds an optimisation to the added logic that limits it to pages that satisfy `mTitle->areRestrictionsCascading()`. Thus for most articles, which aren't protected at all, we don't run LinksUpdate mid-request after a cache miss page view. Because of this commit, the pre-2007 status quo remained unaltered and has remains unaltered to this very day: We don't re-index categories and WhatLinksHere etc, unless an article edit or propagating template edit takes place. * 2009 (r52888; 1353a8ba29) Introduces the PoolCounter feature. The logic in question moves to Article::doCascadeProtectionUpdates(). * 2015 (Iea952d4d2e66; df5ef8b5d7). The logic in question is changed, motivated by wanting to avoid DB writes during page views. * Instead of executing LinksUpdate mid-request, we now queue a RefreshLinksJob on the JobQueue, and utilize a newly added `prioritize => true` parameter. This commit also introduces a new feature, which is to queue RefreshLinksJob also for pages that do not have cascading protection, but that do satisfy a new boolean method called `$parserOutput->hasDynamicContent()`, which is set when the Parser encounters TTL-reducing magic words and functions such as {{CURRENTDAY}} and {{#time}}. For this new case, however, the `prioritize` parameter is not set, and this feature is disabled in WMF production (and other farms that enable wgMiserMode). This commit also renamed doCascadeProtectionUpdates() to triggerOpportunisticLinksUpdate(). This commit also removed various documentation comments, which I've partly restored in this patch, the patch you're looking at now. == Actual changes == * Rename hasDynamicContent() to hasReducedExpiry() and keep the previous method as a non-deprecated wrapper. This change is motivated by T280605, in which I intent to make use of a Parser hook that reduces the cache expiry. There are numerous extensions in WMF production that already do this, and thus the assumption that these have "dynamic content" is already false in some cases. I'm not yet sure how or if to refactor this so to allow reducing of the TTL *without* causing this side-effect, but as a first step we can make the method more obvious in its impact and behaviour. I've also updated two of the callers that I think will benefit from this more explicit name and (current) implementation detail. Bug: T280605 Change-Id: I85bdff7f86911f8ea5b866e3639f08ddd3f3bf6f
2021-05-05 01:03:16 +00:00
* Check whether the cache TTL was lowered from the site default.
*
* When content is determined by more than hard state (e.g. page edits),
* such as template/file transclusions based on the current timestamp or
* extension tags that generate lists based on queries, this return true.
*
WikiPage: Document triggerOpportunisticLinksUpdate and related code == History of WikiPage::triggerOpportunisticLinksUpdate == * 2007 (r19095; T10575; b3a8d488a8) Introduces the "cascading protection" feature. This commit added code to Article.php, in a conditional branch where we encountered a ParserCache "miss" and thus have done a fresh parse. The code in question would query which templates we ended up using, and if that differed from what the database said (e.g. stored during the last actual edit or links update), then a new LinksUpdate is ad-hoc constructed and executed. I could not find it anywhere explicitly spelled out, but my best guess is that the reason for this is to make sure that if the page in question contains wikitext that trancludes a different page based on the current date and time (such as how most Wikipedia main pages transclude news information and "Did you know" information based on dated subpages that are prepared in advance), then we don't just want to re-render the page after a day has passed, we also want to re-do the links update to ensure the search index, category links, and "WhatLinksHere" is correct, and thus by extent, to make sure that cascading protection from the main page does in fact apply to the "current" set of subpages and templates actually in-use. * 2007 (r19227; 0c0c0eff81) This adds an optimisation to the added logic that limits it to pages that satisfy `mTitle->areRestrictionsCascading()`. Thus for most articles, which aren't protected at all, we don't run LinksUpdate mid-request after a cache miss page view. Because of this commit, the pre-2007 status quo remained unaltered and has remains unaltered to this very day: We don't re-index categories and WhatLinksHere etc, unless an article edit or propagating template edit takes place. * 2009 (r52888; 1353a8ba29) Introduces the PoolCounter feature. The logic in question moves to Article::doCascadeProtectionUpdates(). * 2015 (Iea952d4d2e66; df5ef8b5d7). The logic in question is changed, motivated by wanting to avoid DB writes during page views. * Instead of executing LinksUpdate mid-request, we now queue a RefreshLinksJob on the JobQueue, and utilize a newly added `prioritize => true` parameter. This commit also introduces a new feature, which is to queue RefreshLinksJob also for pages that do not have cascading protection, but that do satisfy a new boolean method called `$parserOutput->hasDynamicContent()`, which is set when the Parser encounters TTL-reducing magic words and functions such as {{CURRENTDAY}} and {{#time}}. For this new case, however, the `prioritize` parameter is not set, and this feature is disabled in WMF production (and other farms that enable wgMiserMode). This commit also renamed doCascadeProtectionUpdates() to triggerOpportunisticLinksUpdate(). This commit also removed various documentation comments, which I've partly restored in this patch, the patch you're looking at now. == Actual changes == * Rename hasDynamicContent() to hasReducedExpiry() and keep the previous method as a non-deprecated wrapper. This change is motivated by T280605, in which I intent to make use of a Parser hook that reduces the cache expiry. There are numerous extensions in WMF production that already do this, and thus the assumption that these have "dynamic content" is already false in some cases. I'm not yet sure how or if to refactor this so to allow reducing of the TTL *without* causing this side-effect, but as a first step we can make the method more obvious in its impact and behaviour. I've also updated two of the callers that I think will benefit from this more explicit name and (current) implementation detail. Bug: T280605 Change-Id: I85bdff7f86911f8ea5b866e3639f08ddd3f3bf6f
2021-05-05 01:03:16 +00:00
* This method mainly exists to facilitate the logic in
* WikiPage::triggerOpportunisticLinksUpdate. As such, beware that reducing the TTL for
* reasons that do not relate to "dynamic content", may have the side-effect of incurring
* more RefreshLinksJob executions.
*
* @internal For use by Parser and WikiPage
* @since 1.37
* @return bool
*/
public function hasReducedExpiry(): bool {
global $wgParserCacheExpireTime;
return $this->getCacheExpiry() < $wgParserCacheExpireTime;
}
WikiPage: Document triggerOpportunisticLinksUpdate and related code == History of WikiPage::triggerOpportunisticLinksUpdate == * 2007 (r19095; T10575; b3a8d488a8) Introduces the "cascading protection" feature. This commit added code to Article.php, in a conditional branch where we encountered a ParserCache "miss" and thus have done a fresh parse. The code in question would query which templates we ended up using, and if that differed from what the database said (e.g. stored during the last actual edit or links update), then a new LinksUpdate is ad-hoc constructed and executed. I could not find it anywhere explicitly spelled out, but my best guess is that the reason for this is to make sure that if the page in question contains wikitext that trancludes a different page based on the current date and time (such as how most Wikipedia main pages transclude news information and "Did you know" information based on dated subpages that are prepared in advance), then we don't just want to re-render the page after a day has passed, we also want to re-do the links update to ensure the search index, category links, and "WhatLinksHere" is correct, and thus by extent, to make sure that cascading protection from the main page does in fact apply to the "current" set of subpages and templates actually in-use. * 2007 (r19227; 0c0c0eff81) This adds an optimisation to the added logic that limits it to pages that satisfy `mTitle->areRestrictionsCascading()`. Thus for most articles, which aren't protected at all, we don't run LinksUpdate mid-request after a cache miss page view. Because of this commit, the pre-2007 status quo remained unaltered and has remains unaltered to this very day: We don't re-index categories and WhatLinksHere etc, unless an article edit or propagating template edit takes place. * 2009 (r52888; 1353a8ba29) Introduces the PoolCounter feature. The logic in question moves to Article::doCascadeProtectionUpdates(). * 2015 (Iea952d4d2e66; df5ef8b5d7). The logic in question is changed, motivated by wanting to avoid DB writes during page views. * Instead of executing LinksUpdate mid-request, we now queue a RefreshLinksJob on the JobQueue, and utilize a newly added `prioritize => true` parameter. This commit also introduces a new feature, which is to queue RefreshLinksJob also for pages that do not have cascading protection, but that do satisfy a new boolean method called `$parserOutput->hasDynamicContent()`, which is set when the Parser encounters TTL-reducing magic words and functions such as {{CURRENTDAY}} and {{#time}}. For this new case, however, the `prioritize` parameter is not set, and this feature is disabled in WMF production (and other farms that enable wgMiserMode). This commit also renamed doCascadeProtectionUpdates() to triggerOpportunisticLinksUpdate(). This commit also removed various documentation comments, which I've partly restored in this patch, the patch you're looking at now. == Actual changes == * Rename hasDynamicContent() to hasReducedExpiry() and keep the previous method as a non-deprecated wrapper. This change is motivated by T280605, in which I intent to make use of a Parser hook that reduces the cache expiry. There are numerous extensions in WMF production that already do this, and thus the assumption that these have "dynamic content" is already false in some cases. I'm not yet sure how or if to refactor this so to allow reducing of the TTL *without* causing this side-effect, but as a first step we can make the method more obvious in its impact and behaviour. I've also updated two of the callers that I think will benefit from this more explicit name and (current) implementation detail. Bug: T280605 Change-Id: I85bdff7f86911f8ea5b866e3639f08ddd3f3bf6f
2021-05-05 01:03:16 +00:00
/**
* @see ParserOutput::hasReducedExpiry
* @return bool
* @since 1.25
*/
public function hasDynamicContent() {
return $this->hasReducedExpiry();
}
/**
* Set the prevent-clickjacking flag
*
* @param bool $flag New flag value
* @since 1.38
*/
public function setPreventClickjacking( bool $flag ) {
$this->mPreventClickjacking = $flag;
}
/**
* Get the prevent-clickjacking flag
*
* @return bool Flag value
* @since 1.38
*/
public function getPreventClickjacking(): bool {
return $this->mPreventClickjacking;
}
/**
* Get or set the prevent-clickjacking flag
*
* @since 1.24
* @param bool|null $flag New flag value, or null to leave it unchanged
* @return bool Old flag value
* @deprecated since 1.38:
* use ::setPreventClickjacking() or ::getPreventClickjacking()
*/
public function preventClickjacking( $flag ) {
wfDeprecated( __METHOD__, '1.38' );
$old = $this->getPreventClickjacking();
if ( $flag !== null ) {
$this->setPreventClickjacking( $flag );
}
return $old;
}
/**
* Lower the runtime adaptive TTL to at most this value
*
* @param int $ttl
* @since 1.28
*/
public function updateRuntimeAdaptiveExpiry( $ttl ) {
$this->mMaxAdaptiveExpiry = min( $ttl, $this->mMaxAdaptiveExpiry );
$this->updateCacheExpiry( $ttl );
}
/**
* Add an extra value to Content-Security-Policy default-src directive
*
* Call this if you are including a resource (e.g. image) from a third party domain.
* This is used for all source types except style and script.
*
* @since 1.35
* @param string $src CSP source e.g. example.com
*/
public function addExtraCSPDefaultSrc( $src ) {
$this->mExtraDefaultSrcs[] = $src;
}
/**
* Add an extra value to Content-Security-Policy style-src directive
*
* @since 1.35
* @param string $src CSP source e.g. example.com
*/
public function addExtraCSPStyleSrc( $src ) {
$this->mExtraStyleSrcs[] = $src;
}
/**
* Add an extra value to Content-Security-Policy script-src directive
*
* Call this if you are loading third-party Javascript
*
* @since 1.35
* @param string $src CSP source e.g. example.com
*/
public function addExtraCSPScriptSrc( $src ) {
$this->mExtraScriptSrcs[] = $src;
}
/**
* Call this when parsing is done to lower the TTL based on low parse times
*
* @since 1.28
*/
public function finalizeAdaptiveCacheExpiry() {
if ( is_infinite( $this->mMaxAdaptiveExpiry ) ) {
return; // not set
}
$runtime = $this->getTimeSinceStart( 'wall' );
if ( is_float( $runtime ) ) {
$slope = ( self::SLOW_AR_TTL - self::FAST_AR_TTL )
/ ( self::PARSE_SLOW_SEC - self::PARSE_FAST_SEC );
// SLOW_AR_TTL = PARSE_SLOW_SEC * $slope + $point
$point = self::SLOW_AR_TTL - self::PARSE_SLOW_SEC * $slope;
$adaptiveTTL = min(
max( $slope * $runtime + $point, self::MIN_AR_TTL ),
$this->mMaxAdaptiveExpiry
);
$this->updateCacheExpiry( $adaptiveTTL );
}
}
public function __sleep() {
return array_filter( array_keys( get_object_vars( $this ) ),
static function ( $field ) {
if ( $field === 'mParseStartTime' ) {
return false;
}
// Unserializing unknown private fields in HHVM causes
// member variables with nulls in their names (T229366)
return strpos( $field, "\0" ) === false;
}
);
}
/**
* Merges internal metadata such as flags, accessed options, and profiling info
* from $source into this ParserOutput. This should be used whenever the state of $source
* has any impact on the state of this ParserOutput.
*
* @param ParserOutput $source
*/
public function mergeInternalMetaDataFrom( ParserOutput $source ) {
$this->mOutputHooks = self::mergeList( $this->mOutputHooks, $source->getOutputHooks() );
$this->mWarnings = self::mergeMap( $this->mWarnings, $source->mWarnings ); // don't use getter
$this->mTimestamp = $this->useMaxValue( $this->mTimestamp, $source->getTimestamp() );
foreach ( self::SPECULATIVE_FIELDS as $field ) {
if ( $this->$field && $source->$field && $this->$field !== $source->$field ) {
wfLogWarning( __METHOD__ . ": inconsistent '$field' properties!" );
}
$this->$field = $this->useMaxValue( $this->$field, $source->$field );
}
$this->mParseStartTime = $this->useEachMinValue(
$this->mParseStartTime,
$source->mParseStartTime
);
$this->mFlags = self::mergeMap( $this->mFlags, $source->mFlags );
$this->mParseUsedOptions = self::mergeMap( $this->mParseUsedOptions, $source->mParseUsedOptions );
// TODO: maintain per-slot limit reports!
if ( empty( $this->mLimitReportData ) ) {
$this->mLimitReportData = $source->mLimitReportData;
}
if ( empty( $this->mLimitReportJSData ) ) {
$this->mLimitReportJSData = $source->mLimitReportJSData;
}
}
/**
* Merges HTML metadata such as head items, JS config vars, and HTTP cache control info
* from $source into this ParserOutput. This should be used whenever the HTML in $source
* has been somehow mered into the HTML of this ParserOutput.
*
* @param ParserOutput $source
*/
public function mergeHtmlMetaDataFrom( ParserOutput $source ) {
// HTML and HTTP
$this->mHeadItems = self::mergeMixedList( $this->mHeadItems, $source->getHeadItems() );
$this->mModules = self::mergeList( $this->mModules, $source->getModules() );
$this->mModuleStyles = self::mergeList( $this->mModuleStyles, $source->getModuleStyles() );
$this->mJsConfigVars = self::mergeMap( $this->mJsConfigVars, $source->getJsConfigVars() );
$this->mMaxAdaptiveExpiry = min( $this->mMaxAdaptiveExpiry, $source->mMaxAdaptiveExpiry );
$this->mExtraStyleSrcs = self::mergeList(
$this->mExtraStyleSrcs,
$source->getExtraCSPStyleSrcs()
);
$this->mExtraScriptSrcs = self::mergeList(
$this->mExtraScriptSrcs,
$source->getExtraCSPScriptSrcs()
);
$this->mExtraDefaultSrcs = self::mergeList(
$this->mExtraDefaultSrcs,
$source->getExtraCSPDefaultSrcs()
);
// "noindex" always wins!
if ( $this->mIndexPolicy === 'noindex' || $source->mIndexPolicy === 'noindex' ) {
$this->mIndexPolicy = 'noindex';
} elseif ( $this->mIndexPolicy !== 'index' ) {
$this->mIndexPolicy = $source->mIndexPolicy;
}
// Skin control
$this->mNewSection = $this->mNewSection || $source->getNewSection();
$this->mHideNewSection = $this->mHideNewSection || $source->getHideNewSection();
$this->mNoGallery = $this->mNoGallery || $source->getNoGallery();
$this->mEnableOOUI = $this->mEnableOOUI || $source->getEnableOOUI();
$this->mPreventClickjacking = $this->mPreventClickjacking || $source->getPreventClickjacking();
// TODO: we'll have to be smarter about this!
$this->mSections = array_merge( $this->mSections, $source->getSections() );
$this->mTOCHTML .= $source->mTOCHTML;
// XXX: we don't want to concatenate title text, so first write wins.
// We should use the first *modified* title text, but we don't have the original to check.
if ( $this->mTitleText === null || $this->mTitleText === '' ) {
$this->mTitleText = $source->mTitleText;
}
// class names are stored in array keys
$this->mWrapperDivClasses = self::mergeMap(
$this->mWrapperDivClasses,
$source->mWrapperDivClasses
);
// NOTE: last write wins, same as within one ParserOutput
$this->mIndicators = self::mergeMap( $this->mIndicators, $source->getIndicators() );
// NOTE: include extension data in "tracking meta data" as well as "html meta data"!
// TODO: add a $mergeStrategy parameter to setExtensionData to allow different
// kinds of extension data to be merged in different ways.
$this->mExtensionData = self::mergeMap(
$this->mExtensionData,
$source->mExtensionData
);
}
/**
* Merges dependency tracking metadata such as backlinks, images used, and extension data
* from $source into this ParserOutput. This allows dependency tracking to be done for the
* combined output of multiple content slots.
*
* @param ParserOutput $source
*/
public function mergeTrackingMetaDataFrom( ParserOutput $source ) {
$this->mLanguageLinks = self::mergeList( $this->mLanguageLinks, $source->getLanguageLinks() );
$this->mCategories = self::mergeMap( $this->mCategories, $source->getCategories() );
$this->mLinks = self::merge2D( $this->mLinks, $source->getLinks() );
$this->mTemplates = self::merge2D( $this->mTemplates, $source->getTemplates() );
$this->mTemplateIds = self::merge2D( $this->mTemplateIds, $source->getTemplateIds() );
$this->mImages = self::mergeMap( $this->mImages, $source->getImages() );
$this->mFileSearchOptions = self::mergeMap(
$this->mFileSearchOptions,
$source->getFileSearchOptions()
);
$this->mExternalLinks = self::mergeMap( $this->mExternalLinks, $source->getExternalLinks() );
$this->mInterwikiLinks = self::merge2D(
$this->mInterwikiLinks,
$source->getInterwikiLinks()
);
// TODO: add a $mergeStrategy parameter to setPageProperty to allow different
// kinds of properties to be merged in different ways.
$this->mProperties = self::mergeMap( $this->mProperties, $source->getPageProperties() );
// NOTE: include extension data in "tracking meta data" as well as "html meta data"!
// TODO: add a $mergeStrategy parameter to setExtensionData to allow different
// kinds of extension data to be merged in different ways.
$this->mExtensionData = self::mergeMap(
$this->mExtensionData,
$source->mExtensionData
);
}
private static function mergeMixedList( array $a, array $b ) {
return array_unique( array_merge( $a, $b ), SORT_REGULAR );
}
private static function mergeList( array $a, array $b ) {
return array_values( array_unique( array_merge( $a, $b ), SORT_REGULAR ) );
}
private static function mergeMap( array $a, array $b ) {
return array_replace( $a, $b );
}
private static function merge2D( array $a, array $b ) {
$values = [];
$keys = array_merge( array_keys( $a ), array_keys( $b ) );
foreach ( $keys as $k ) {
if ( empty( $a[$k] ) ) {
$values[$k] = $b[$k];
} elseif ( empty( $b[$k] ) ) {
$values[$k] = $a[$k];
} elseif ( is_array( $a[$k] ) && is_array( $b[$k] ) ) {
$values[$k] = array_replace( $a[$k], $b[$k] );
} else {
$values[$k] = $b[$k];
}
}
return $values;
}
private static function useEachMinValue( array $a, array $b ) {
$values = [];
$keys = array_merge( array_keys( $a ), array_keys( $b ) );
foreach ( $keys as $k ) {
if ( is_array( $a[$k] ?? null ) && is_array( $b[$k] ?? null ) ) {
$values[$k] = self::useEachMinValue( $a[$k], $b[$k] );
} else {
$values[$k] = self::useMinValue( $a[$k] ?? null, $b[$k] ?? null );
}
}
return $values;
}
private static function useMinValue( $a, $b ) {
if ( $a === null ) {
return $b;
}
if ( $b === null ) {
return $a;
}
return min( $a, $b );
}
private static function useMaxValue( $a, $b ) {
if ( $a === null ) {
return $b;
}
if ( $b === null ) {
return $a;
}
return max( $a, $b );
}
/**
* Returns a JSON serializable structure representing this ParserOutput instance.
* @see newFromJson()
*
* @return array
*/
protected function toJsonArray(): array {
$data = [
'Text' => $this->mText,
'LanguageLinks' => $this->mLanguageLinks,
'Categories' => $this->mCategories,
'Indicators' => $this->mIndicators,
'TitleText' => $this->mTitleText,
'Links' => $this->mLinks,
'LinksSpecial' => $this->mLinksSpecial,
'Templates' => $this->mTemplates,
'TemplateIds' => $this->mTemplateIds,
'Images' => $this->mImages,
'FileSearchOptions' => $this->mFileSearchOptions,
'ExternalLinks' => $this->mExternalLinks,
'InterwikiLinks' => $this->mInterwikiLinks,
'NewSection' => $this->mNewSection,
'HideNewSection' => $this->mHideNewSection,
'NoGallery' => $this->mNoGallery,
'HeadItems' => $this->mHeadItems,
'Modules' => $this->mModules,
'ModuleStyles' => $this->mModuleStyles,
'JsConfigVars' => $this->mJsConfigVars,
'OutputHooks' => $this->mOutputHooks,
'Warnings' => $this->mWarnings,
'Sections' => $this->mSections,
'Properties' => self::detectAndEncodeBinary( $this->mProperties ),
'TOCHTML' => $this->mTOCHTML,
'Timestamp' => $this->mTimestamp,
'EnableOOUI' => $this->mEnableOOUI,
'IndexPolicy' => $this->mIndexPolicy,
// may contain arbitrary structures!
'ExtensionData' => $this->mExtensionData,
'LimitReportData' => $this->mLimitReportData,
'LimitReportJSData' => $this->mLimitReportJSData,
'ParseStartTime' => $this->mParseStartTime,
'PreventClickjacking' => $this->mPreventClickjacking,
'ExtraScriptSrcs' => $this->mExtraScriptSrcs,
'ExtraDefaultSrcs' => $this->mExtraDefaultSrcs,
'ExtraStyleSrcs' => $this->mExtraStyleSrcs,
'Flags' => $this->mFlags,
'SpeculativeRevId' => $this->mSpeculativeRevId,
'SpeculativePageIdUsed' => $this->speculativePageIdUsed,
'RevisionTimestampUsed' => $this->revisionTimestampUsed,
'RevisionUsedSha1Base36' => $this->revisionUsedSha1Base36,
'WrapperDivClasses' => $this->mWrapperDivClasses,
];
// Fill in missing fields from parents. Array addition does not override existing fields.
$data += parent::toJsonArray();
// TODO: make more fields optional!
if ( $this->mMaxAdaptiveExpiry !== INF ) {
// NOTE: JSON can't encode infinity!
$data['MaxAdaptiveExpiry'] = $this->mMaxAdaptiveExpiry;
}
return $data;
}
public static function newFromJsonArray( JsonUnserializer $unserializer, array $json ) {
$parserOutput = new ParserOutput();
$parserOutput->initFromJson( $unserializer, $json );
return $parserOutput;
}
/**
* Initialize member fields from an array returned by jsonSerialize().
* @param JsonUnserializer $unserializer
* @param array $jsonData
*/
protected function initFromJson( JsonUnserializer $unserializer, array $jsonData ) {
parent::initFromJson( $unserializer, $jsonData );
$this->mText = $jsonData['Text'];
$this->mLanguageLinks = $jsonData['LanguageLinks'];
$this->mCategories = $jsonData['Categories'];
$this->mIndicators = $jsonData['Indicators'];
$this->mTitleText = $jsonData['TitleText'];
$this->mLinks = $jsonData['Links'];
$this->mLinksSpecial = $jsonData['LinksSpecial'];
$this->mTemplates = $jsonData['Templates'];
$this->mTemplateIds = $jsonData['TemplateIds'];
$this->mImages = $jsonData['Images'];
$this->mFileSearchOptions = $jsonData['FileSearchOptions'];
$this->mExternalLinks = $jsonData['ExternalLinks'];
$this->mInterwikiLinks = $jsonData['InterwikiLinks'];
$this->mNewSection = $jsonData['NewSection'];
$this->mHideNewSection = $jsonData['HideNewSection'];
$this->mNoGallery = $jsonData['NoGallery'];
$this->mHeadItems = $jsonData['HeadItems'];
$this->mModules = $jsonData['Modules'];
$this->mModuleStyles = $jsonData['ModuleStyles'];
$this->mJsConfigVars = $jsonData['JsConfigVars'];
$this->mOutputHooks = $jsonData['OutputHooks'];
$this->mWarnings = $jsonData['Warnings'];
$this->mSections = $jsonData['Sections'];
$this->mProperties = self::detectAndDecodeBinary( $jsonData['Properties'] );
$this->mTOCHTML = $jsonData['TOCHTML'];
$this->mTimestamp = $jsonData['Timestamp'];
$this->mEnableOOUI = $jsonData['EnableOOUI'];
$this->mIndexPolicy = $jsonData['IndexPolicy'];
$this->mExtensionData = $unserializer->unserializeArray( $jsonData['ExtensionData'] ?? [] );
$this->mLimitReportData = $jsonData['LimitReportData'];
$this->mLimitReportJSData = $jsonData['LimitReportJSData'];
$this->mParseStartTime = $jsonData['ParseStartTime'];
$this->mPreventClickjacking = $jsonData['PreventClickjacking'];
$this->mExtraScriptSrcs = $jsonData['ExtraScriptSrcs'];
$this->mExtraDefaultSrcs = $jsonData['ExtraDefaultSrcs'];
$this->mExtraStyleSrcs = $jsonData['ExtraStyleSrcs'];
$this->mFlags = $jsonData['Flags'];
$this->mSpeculativeRevId = $jsonData['SpeculativeRevId'];
$this->speculativePageIdUsed = $jsonData['SpeculativePageIdUsed'];
$this->revisionTimestampUsed = $jsonData['RevisionTimestampUsed'];
$this->revisionUsedSha1Base36 = $jsonData['RevisionUsedSha1Base36'];
$this->mWrapperDivClasses = $jsonData['WrapperDivClasses'];
$this->mMaxAdaptiveExpiry = $jsonData['MaxAdaptiveExpiry'] ?? INF;
}
/**
* Finds any non-utf8 strings in the given array and replaces them with
* an associative array that wraps a base64 encoded version of the data.
* Inverse of detectAndDecodeBinary().
*
* @param array $properties
*
* @return array
*/
private static function detectAndEncodeBinary( array $properties ) {
foreach ( $properties as $key => $value ) {
if ( is_string( $value ) ) {
if ( !mb_detect_encoding( $value, 'UTF-8', true ) ) {
$properties[$key] = [
'_type_' => 'string',
'_encoding_' => 'base64',
'_data_' => base64_encode( $value ),
];
}
}
}
return $properties;
}
/**
* Finds any associative arrays that represent encoded binary strings, and
* replaces them with the decoded binary data.
*
* @param array $properties
*
* @return array
*/
private static function detectAndDecodeBinary( array $properties ) {
foreach ( $properties as $key => $value ) {
if ( is_array( $value ) && isset( $value['_encoding_'] ) ) {
if ( $value['_encoding_'] === 'base64' ) {
$properties[$key] = base64_decode( $value['_data_'] );
}
}
}
return $properties;
}
public function __wakeup() {
// Backwards compatibility, pre 1.36
$priorAccessedOptions = $this->getGhostFieldValue( 'mAccessedOptions' );
if ( $priorAccessedOptions ) {
$this->mParseUsedOptions = $priorAccessedOptions;
}
}
public function __get( $name ) {
if ( property_exists( get_called_class(), $name ) ) {
// Direct access to a public property, deprecated.
wfDeprecatedMsg( "ParserOutput::{$name} public read access deprecated", '1.38' );
return $this->$name;
} elseif ( property_exists( $this, $name ) ) {
// Dynamic property access, deprecated.
wfDeprecatedMsg( "ParserOutput::{$name} dynamic property read access deprecated", '1.38' );
return $this->$name;
} else {
trigger_error( "Inaccessible property via __set(): $name" );
return null;
}
}
public function __set( $name, $value ) {
if ( property_exists( get_called_class(), $name ) ) {
// Direct access to a public property, deprecated.
wfDeprecatedMsg( "ParserOutput::$name public write access deprecated", '1.38' );
$this->$name = $value;
} else {
// Dynamic property access, deprecated.
wfDeprecatedMsg( "ParserOutput::$name dynamic property write access deprecated", '1.38' );
$this->$name = $value;
}
}
}