wiki.techinc.nl/includes/parser/CoreParserFunctions.php

1492 lines
47 KiB
PHP
Raw Normal View History

<?php
/**
* Parser functions provided by MediaWiki core
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Parser
*/
use MediaWiki\Config\ServiceOptions;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionRecord;
Add Sanitizer::removeSomeTags() which uses Remex to tokenize The existing Sanitizer::removeHTMLtags() method, in addition to having dodgy capitalization, uses regular expressions to parse the HTML. That produces corner cases like T298401 and T67747 and is not guaranteed to yield balanced or well-formed HTML. Instead, introduce and use a new Sanitizer::removeSomeTags() method which is guaranteed to always return balanced and well-formed HTML. Note that Sanitizer::removeHTMLtags()/::removeSomeTags() take a callback argument which (as far as I can tell) is never used outside core. Mark that argument as @internal, and clean up the version used by ::removeSomeTags(). Use the new ::removeSomeTags() method in the two places where DISPLAYTITLE is handled (following up on T67747). The use by the legacy parser is more difficult to replace (and would have a performace cost), so leave the old ::removeHTMLtags() method in place for that call site for now: when the legacy parser is replaced by Parsoid the need for the old ::removeHTMLtags() will go away. In a follow-up patch we'll rename ::removeHTMLtags() and mark it @internal so that we can deprecate ::removeHTMLtags() for external use. Some benchmarking code added. On my machine, with PHP 7.4, the new method tidies short 30-character title strings at a rate of about 6764/s while the tidy-based method being replaced here managed 6384/s. Sanitizer::removeHTMLtags blazes through short strings 20x faster (120,915/s); some of this difference is due to the set up cost of creating the tag whitelist and the Remex pipeline, so further optimizations could doubtless be done if Sanitizer::removeSomeTags() is more widely used. Bug: T299722 Bug: T67747 Change-Id: Ic864c01471c292f11799c4fbdac4d7d30b8bc50f
2022-01-21 22:03:26 +00:00
use Wikimedia\RemexHtml\Tokenizer\Attributes;
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
/**
* Various core parser functions, registered in every Parser
* @ingroup Parser
*/
class CoreParserFunctions {
/**
* @internal
*/
public const REGISTER_OPTIONS = [
// See documentation for the corresponding config options
'AllowDisplayTitle',
'AllowSlowParserFunctions',
];
2011-02-08 23:18:13 +00:00
/**
* @param Parser $parser
* @param ServiceOptions $options
*
* @return void
* @throws MWException
* @internal
2011-02-08 23:18:13 +00:00
*/
public static function register( Parser $parser, ServiceOptions $options ) {
$options->assertRequiredOptions( self::REGISTER_OPTIONS );
$allowDisplayTitle = $options->get( 'AllowDisplayTitle' );
$allowSlowParserFunctions = $options->get( 'AllowSlowParserFunctions' );
# Syntax for arguments (see Parser::setFunctionHook):
# "name for lookup in localized magic words array",
# function callback,
# optional Parser::SFH_NO_HASH to omit the hash from calls (e.g. {{int:...}}
# instead of {{#int:...}})
$noHashFunctions = [
'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc',
'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl',
'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'bidi',
'numberofpages', 'numberofusers', 'numberofactiveusers',
'numberofarticles', 'numberoffiles', 'numberofadmins',
'numberingroup', 'numberofedits', 'language',
'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath',
'pagesincategory', 'pagesize', 'protectionlevel', 'protectionexpiry',
'namespacee', 'namespacenumber', 'talkspace', 'talkspacee',
'subjectspace', 'subjectspacee', 'pagename', 'pagenamee',
'fullpagename', 'fullpagenamee', 'rootpagename', 'rootpagenamee',
'basepagename', 'basepagenamee', 'subpagename', 'subpagenamee',
'talkpagename', 'talkpagenamee', 'subjectpagename',
'subjectpagenamee', 'pageid', 'revisionid', 'revisionday',
'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear',
'revisiontimestamp', 'revisionuser', 'cascadingsources',
];
foreach ( $noHashFunctions as $func ) {
$parser->setFunctionHook( $func, [ __CLASS__, $func ], Parser::SFH_NO_HASH );
}
$parser->setFunctionHook(
'namespace',
[ __CLASS__, 'mwnamespace' ],
Parser::SFH_NO_HASH
);
$parser->setFunctionHook( 'int', [ __CLASS__, 'intFunction' ], Parser::SFH_NO_HASH );
$parser->setFunctionHook( 'special', [ __CLASS__, 'special' ] );
$parser->setFunctionHook( 'speciale', [ __CLASS__, 'speciale' ] );
$parser->setFunctionHook( 'tag', [ __CLASS__, 'tagObj' ], Parser::SFH_OBJECT_ARGS );
$parser->setFunctionHook( 'formatdate', [ __CLASS__, 'formatDate' ] );
if ( $allowDisplayTitle ) {
$parser->setFunctionHook(
'displaytitle',
[ __CLASS__, 'displaytitle' ],
Parser::SFH_NO_HASH
);
}
if ( $allowSlowParserFunctions ) {
$parser->setFunctionHook(
'pagesinnamespace',
[ __CLASS__, 'pagesinnamespace' ],
Parser::SFH_NO_HASH
);
}
}
/**
* @param Parser $parser
* @param string $part1 Message key
* @param mixed ...$params To pass to wfMessage()
* @return array
*/
public static function intFunction( $parser, $part1 = '', ...$params ) {
if ( strval( $part1 ) !== '' ) {
$message = wfMessage( $part1, $params )
->inLanguage( $parser->getOptions()->getUserLangObj() );
return [ $message->plain(), 'noparse' => false ];
} else {
return [ 'found' => false ];
}
}
/**
* @param Parser $parser
* @param string $date
* @param string|null $defaultPref
*
* @return string
*/
public static function formatDate( $parser, $date, $defaultPref = null ) {
$lang = $parser->getFunctionLang();
$df = MediaWikiServices::getInstance()->getDateFormatterFactory()->get( $lang );
2010-02-11 14:57:43 +00:00
$date = trim( $date );
$pref = $parser->getOptions()->getDateFormat();
// Specify a different default date format other than the normal default
// if the user has 'default' for their setting
if ( $pref == 'default' && $defaultPref ) {
$pref = $defaultPref;
}
$date = $df->reformat( $pref, $date, [ 'match-whole' ] );
return $date;
}
public static function ns( $parser, $part1 = '' ) {
if ( intval( $part1 ) || $part1 == "0" ) {
$index = intval( $part1 );
} else {
$index = $parser->getContentLanguage()->getNsIndex( str_replace( ' ', '_', $part1 ) );
}
if ( $index !== false ) {
return $parser->getContentLanguage()->getFormattedNsText( $index );
} else {
return [ 'found' => false ];
}
}
2010-02-11 14:57:43 +00:00
public static function nse( $parser, $part1 = '' ) {
$ret = self::ns( $parser, $part1 );
if ( is_string( $ret ) ) {
$ret = wfUrlencode( str_replace( ' ', '_', $ret ) );
}
return $ret;
}
/**
* urlencodes a string according to one of three patterns: (T24474)
*
* By default (for HTTP "query" strings), spaces are encoded as '+'.
* Or to encode a value for the HTTP "path", spaces are encoded as '%20'.
* For links to "wiki"s, or similar software, spaces are encoded as '_',
*
* @param Parser $parser
* @param string $s The text to encode.
* @param string|null $arg (optional): The type of encoding.
2012-02-09 21:35:05 +00:00
* @return string
*/
public static function urlencode( $parser, $s = '', $arg = null ) {
static $magicWords = null;
if ( $magicWords === null ) {
$magicWords =
$parser->getMagicWordFactory()->newArray( [ 'url_path', 'url_query', 'url_wiki' ] );
}
switch ( $magicWords->matchStartToEnd( $arg ) ) {
// Encode as though it's a wiki page, '_' for ' '.
case 'url_wiki':
$func = 'wfUrlencode';
$s = str_replace( ' ', '_', $s );
break;
// Encode for an HTTP Path, '%20' for ' '.
case 'url_path':
$func = 'rawurlencode';
break;
// Encode for HTTP query, '+' for ' '.
case 'url_query':
default:
$func = 'urlencode';
}
// See T105242, where the choice to kill markers and various
// other options were discussed.
return $func( $parser->killMarkers( $s ) );
}
public static function lcfirst( $parser, $s = '' ) {
return $parser->getContentLanguage()->lcfirst( $s );
}
public static function ucfirst( $parser, $s = '' ) {
return $parser->getContentLanguage()->ucfirst( $s );
}
/**
* @param Parser $parser
* @param string $s
* @return string
*/
public static function lc( $parser, $s = '' ) {
return $parser->markerSkipCallback( $s, [ $parser->getContentLanguage(), 'lc' ] );
}
/**
* @param Parser $parser
* @param string $s
* @return string
*/
public static function uc( $parser, $s = '' ) {
return $parser->markerSkipCallback( $s, [ $parser->getContentLanguage(), 'uc' ] );
}
public static function localurl( $parser, $s = '', $arg = null ) {
return self::urlFunction( 'getLocalURL', $s, $arg );
}
public static function localurle( $parser, $s = '', $arg = null ) {
$temp = self::urlFunction( 'getLocalURL', $s, $arg );
if ( !is_string( $temp ) ) {
return $temp;
} else {
return htmlspecialchars( $temp, ENT_COMPAT );
}
}
public static function fullurl( $parser, $s = '', $arg = null ) {
return self::urlFunction( 'getFullURL', $s, $arg );
}
public static function fullurle( $parser, $s = '', $arg = null ) {
$temp = self::urlFunction( 'getFullURL', $s, $arg );
if ( !is_string( $temp ) ) {
return $temp;
} else {
return htmlspecialchars( $temp, ENT_COMPAT );
}
}
public static function canonicalurl( $parser, $s = '', $arg = null ) {
return self::urlFunction( 'getCanonicalURL', $s, $arg );
}
public static function canonicalurle( $parser, $s = '', $arg = null ) {
$temp = self::urlFunction( 'getCanonicalURL', $s, $arg );
if ( !is_string( $temp ) ) {
return $temp;
} else {
return htmlspecialchars( $temp, ENT_COMPAT );
}
}
public static function urlFunction( $func, $s = '', $arg = null ) {
$title = Title::newFromText( $s );
# Due to order of execution of a lot of bits, the values might be encoded
# before arriving here; if that's true, then the title can't be created
# and the variable will fail. If we can't get a decent title from the first
# attempt, url-decode and try for a second.
if ( $title === null ) {
$title = Title::newFromURL( urldecode( $s ) );
}
if ( $title !== null ) {
# Convert NS_MEDIA -> NS_FILE
if ( $title->inNamespace( NS_MEDIA ) ) {
2009-05-24 08:29:10 +00:00
$title = Title::makeTitle( NS_FILE, $title->getDBkey() );
}
if ( $arg !== null ) {
$text = $title->$func( $arg );
} else {
$text = $title->$func();
}
return $text;
} else {
return [ 'found' => false ];
}
}
/**
* @param Parser $parser
* @param string $num
* @param string|null $arg
* @return string
*/
public static function formatnum( $parser, $num = '', $arg = null ) {
if ( self::matchAgainstMagicword( $parser->getMagicWordFactory(), 'rawsuffix', $arg ) ) {
$func = [ $parser->getFunctionLang(), 'parseFormattedNumber' ];
} elseif (
self::matchAgainstMagicword( $parser->getMagicWordFactory(), 'nocommafysuffix', $arg )
) {
$func = [ $parser->getFunctionLang(), 'formatNumNoSeparators' ];
$func = self::getLegacyFormatNum( $parser, $func );
} else {
$func = [ $parser->getFunctionLang(), 'formatNum' ];
$func = self::getLegacyFormatNum( $parser, $func );
}
return $parser->markerSkipCallback( $num, $func );
}
2007-01-13 12:58:33 +00:00
/**
* @param Parser $parser
* @param callable $callback
*
* @return callable
*/
private static function getLegacyFormatNum( $parser, $callback ) {
// For historic reasons, the formatNum parser function will
// take arguments which are not actually formatted numbers,
// which then trigger deprecation warnings in Language::formatNum*.
// Instead emit a tracking category instead to allow linting.
return static function ( $number ) use ( $parser, $callback ) {
$validNumberRe = '(-(?=[\d\.]))?(\d+|(?=\.\d))(\.\d*)?([Ee][-+]?\d+)?';
if (
!is_numeric( $number ) &&
$number !== (string)NAN &&
$number !== (string)INF &&
$number !== (string)-INF
) {
$parser->addTrackingCategory( 'nonnumeric-formatnum' );
// Don't split on NAN/INF in the legacy case since they are
// likely to be found embedded inside non-numeric text.
return preg_replace_callback( "/{$validNumberRe}/", static function ( $m ) use ( $callback ) {
return call_user_func( $callback, $m[0] );
}, $number );
}
return call_user_func( $callback, $number );
};
}
/**
* @param Parser $parser
* @param string $case
* @param string $word
* @return string
*/
public static function grammar( $parser, $case = '', $word = '' ) {
$word = $parser->killMarkers( $word );
return $parser->getFunctionLang()->convertGrammar( $word, $case );
}
/**
* @param Parser $parser
* @param string $username
* @param string ...$forms What to output for each gender
* @return string
*/
public static function gender( $parser, $username, ...$forms ) {
// Some shortcuts to avoid loading user data unnecessarily
if ( count( $forms ) === 0 ) {
return '';
} elseif ( count( $forms ) === 1 ) {
return $forms[0];
}
$username = trim( $username );
$userOptionsLookup = MediaWikiServices::getInstance()->getUserOptionsLookup();
$gender = $userOptionsLookup->getDefaultOption( 'gender' );
// allow prefix and normalize (e.g. "&#42;foo" -> "*foo" ).
$title = Title::newFromText( $username, NS_USER );
if ( $title && $title->inNamespace( NS_USER ) ) {
$username = $title->getText();
}
// check parameter, or use the ParserOptions if in interface message
$user = User::newFromName( $username );
$genderCache = MediaWikiServices::getInstance()->getGenderCache();
if ( $user ) {
$gender = $genderCache->getGenderOf( $user, __METHOD__ );
} elseif ( $username === '' && $parser->getOptions()->getInterfaceMessage() ) {
$gender = $genderCache->getGenderOf( $parser->getOptions()->getUserIdentity(), __METHOD__ );
}
2009-06-03 06:28:19 +00:00
$ret = $parser->getFunctionLang()->gender( $gender, $forms );
return $ret;
}
/**
* @param Parser $parser
* @param string $text
* @param string ...$forms What to output for each number (singular, dual, plural, etc.)
* @return string
*/
public static function plural( $parser, $text = '', ...$forms ) {
$text = $parser->getFunctionLang()->parseFormattedNumber( $text );
settype( $text, ctype_digit( $text ) ? 'int' : 'float' );
return $parser->getFunctionLang()->convertPlural( $text, $forms );
}
/**
* @param Parser $parser
* @param string $text
* @return string
*/
public static function bidi( $parser, $text = '' ) {
return $parser->getFunctionLang()->embedBidi( $text );
}
/**
* Shorthand for getting a Language Converter for Target language
* @param Parser $parser Parent parser
* @return ILanguageConverter
*/
private static function getTargetLanguageConverter( Parser $parser ): ILanguageConverter {
return MediaWikiServices::getInstance()->getLanguageConverterFactory()
->getLanguageConverter( $parser->getTargetLanguage() );
}
/**
2008-04-18 14:19:00 +00:00
* Override the title of the page when viewed, provided we've been given a
* title which will normalise to the canonical title
*
* @param Parser $parser Parent parser
* @param string $text Desired title text
* @param string $uarg
* @return string
*/
public static function displaytitle( $parser, $text = '', $uarg = '' ) {
$restrictDisplayTitle = MediaWikiServices::getInstance()->getMainConfig()->get( 'RestrictDisplayTitle' );
static $magicWords = null;
if ( $magicWords === null ) {
$magicWords = $parser->getMagicWordFactory()->newArray(
[ 'displaytitle_noerror', 'displaytitle_noreplace' ] );
}
$arg = $magicWords->matchStartToEnd( $uarg );
// parse a limited subset of wiki markup (just the single quote items)
$text = $parser->doQuotes( $text );
// remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever
$text = $parser->killMarkers( $text );
Add Sanitizer::removeSomeTags() which uses Remex to tokenize The existing Sanitizer::removeHTMLtags() method, in addition to having dodgy capitalization, uses regular expressions to parse the HTML. That produces corner cases like T298401 and T67747 and is not guaranteed to yield balanced or well-formed HTML. Instead, introduce and use a new Sanitizer::removeSomeTags() method which is guaranteed to always return balanced and well-formed HTML. Note that Sanitizer::removeHTMLtags()/::removeSomeTags() take a callback argument which (as far as I can tell) is never used outside core. Mark that argument as @internal, and clean up the version used by ::removeSomeTags(). Use the new ::removeSomeTags() method in the two places where DISPLAYTITLE is handled (following up on T67747). The use by the legacy parser is more difficult to replace (and would have a performace cost), so leave the old ::removeHTMLtags() method in place for that call site for now: when the legacy parser is replaced by Parsoid the need for the old ::removeHTMLtags() will go away. In a follow-up patch we'll rename ::removeHTMLtags() and mark it @internal so that we can deprecate ::removeHTMLtags() for external use. Some benchmarking code added. On my machine, with PHP 7.4, the new method tidies short 30-character title strings at a rate of about 6764/s while the tidy-based method being replaced here managed 6384/s. Sanitizer::removeHTMLtags blazes through short strings 20x faster (120,915/s); some of this difference is due to the set up cost of creating the tag whitelist and the Remex pipeline, so further optimizations could doubtless be done if Sanitizer::removeSomeTags() is more widely used. Bug: T299722 Bug: T67747 Change-Id: Ic864c01471c292f11799c4fbdac4d7d30b8bc50f
2022-01-21 22:03:26 +00:00
// See T28547 for rationale for this processing.
// list of disallowed tags for DISPLAYTITLE
// these will be escaped even though they are allowed in normal wiki text
$bad = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr',
'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rtc', 'rp', 'br' ];
// disallow some styles that could be used to bypass $wgRestrictDisplayTitle
if ( $restrictDisplayTitle ) {
Add Sanitizer::removeSomeTags() which uses Remex to tokenize The existing Sanitizer::removeHTMLtags() method, in addition to having dodgy capitalization, uses regular expressions to parse the HTML. That produces corner cases like T298401 and T67747 and is not guaranteed to yield balanced or well-formed HTML. Instead, introduce and use a new Sanitizer::removeSomeTags() method which is guaranteed to always return balanced and well-formed HTML. Note that Sanitizer::removeHTMLtags()/::removeSomeTags() take a callback argument which (as far as I can tell) is never used outside core. Mark that argument as @internal, and clean up the version used by ::removeSomeTags(). Use the new ::removeSomeTags() method in the two places where DISPLAYTITLE is handled (following up on T67747). The use by the legacy parser is more difficult to replace (and would have a performace cost), so leave the old ::removeHTMLtags() method in place for that call site for now: when the legacy parser is replaced by Parsoid the need for the old ::removeHTMLtags() will go away. In a follow-up patch we'll rename ::removeHTMLtags() and mark it @internal so that we can deprecate ::removeHTMLtags() for external use. Some benchmarking code added. On my machine, with PHP 7.4, the new method tidies short 30-character title strings at a rate of about 6764/s while the tidy-based method being replaced here managed 6384/s. Sanitizer::removeHTMLtags blazes through short strings 20x faster (120,915/s); some of this difference is due to the set up cost of creating the tag whitelist and the Remex pipeline, so further optimizations could doubtless be done if Sanitizer::removeSomeTags() is more widely used. Bug: T299722 Bug: T67747 Change-Id: Ic864c01471c292f11799c4fbdac4d7d30b8bc50f
2022-01-21 22:03:26 +00:00
// This code is tested with the cases marked T28547 in
// parserTests.txt
$htmlTagsCallback = static function ( Attributes $attr ): Attributes {
$decoded = $attr->getValues();
if ( isset( $decoded['style'] ) ) {
// this is called later anyway, but we need it right now for the regexes below to be safe
// calling it twice doesn't hurt
$decoded['style'] = Sanitizer::checkCss( $decoded['style'] );
if ( preg_match( '/(display|user-select|visibility)\s*:/i', $decoded['style'] ) ) {
$decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */';
}
}
Add Sanitizer::removeSomeTags() which uses Remex to tokenize The existing Sanitizer::removeHTMLtags() method, in addition to having dodgy capitalization, uses regular expressions to parse the HTML. That produces corner cases like T298401 and T67747 and is not guaranteed to yield balanced or well-formed HTML. Instead, introduce and use a new Sanitizer::removeSomeTags() method which is guaranteed to always return balanced and well-formed HTML. Note that Sanitizer::removeHTMLtags()/::removeSomeTags() take a callback argument which (as far as I can tell) is never used outside core. Mark that argument as @internal, and clean up the version used by ::removeSomeTags(). Use the new ::removeSomeTags() method in the two places where DISPLAYTITLE is handled (following up on T67747). The use by the legacy parser is more difficult to replace (and would have a performace cost), so leave the old ::removeHTMLtags() method in place for that call site for now: when the legacy parser is replaced by Parsoid the need for the old ::removeHTMLtags() will go away. In a follow-up patch we'll rename ::removeHTMLtags() and mark it @internal so that we can deprecate ::removeHTMLtags() for external use. Some benchmarking code added. On my machine, with PHP 7.4, the new method tidies short 30-character title strings at a rate of about 6764/s while the tidy-based method being replaced here managed 6384/s. Sanitizer::removeHTMLtags blazes through short strings 20x faster (120,915/s); some of this difference is due to the set up cost of creating the tag whitelist and the Remex pipeline, so further optimizations could doubtless be done if Sanitizer::removeSomeTags() is more widely used. Bug: T299722 Bug: T67747 Change-Id: Ic864c01471c292f11799c4fbdac4d7d30b8bc50f
2022-01-21 22:03:26 +00:00
return new PlainAttributes( $decoded );
};
} else {
$htmlTagsCallback = null;
}
// only requested titles that normalize to the actual title are allowed through
// if $wgRestrictDisplayTitle is true (it is by default)
// mimic the escaping process that occurs in OutputPage::setPageTitle
Add Sanitizer::removeSomeTags() which uses Remex to tokenize The existing Sanitizer::removeHTMLtags() method, in addition to having dodgy capitalization, uses regular expressions to parse the HTML. That produces corner cases like T298401 and T67747 and is not guaranteed to yield balanced or well-formed HTML. Instead, introduce and use a new Sanitizer::removeSomeTags() method which is guaranteed to always return balanced and well-formed HTML. Note that Sanitizer::removeHTMLtags()/::removeSomeTags() take a callback argument which (as far as I can tell) is never used outside core. Mark that argument as @internal, and clean up the version used by ::removeSomeTags(). Use the new ::removeSomeTags() method in the two places where DISPLAYTITLE is handled (following up on T67747). The use by the legacy parser is more difficult to replace (and would have a performace cost), so leave the old ::removeHTMLtags() method in place for that call site for now: when the legacy parser is replaced by Parsoid the need for the old ::removeHTMLtags() will go away. In a follow-up patch we'll rename ::removeHTMLtags() and mark it @internal so that we can deprecate ::removeHTMLtags() for external use. Some benchmarking code added. On my machine, with PHP 7.4, the new method tidies short 30-character title strings at a rate of about 6764/s while the tidy-based method being replaced here managed 6384/s. Sanitizer::removeHTMLtags blazes through short strings 20x faster (120,915/s); some of this difference is due to the set up cost of creating the tag whitelist and the Remex pipeline, so further optimizations could doubtless be done if Sanitizer::removeSomeTags() is more widely used. Bug: T299722 Bug: T67747 Change-Id: Ic864c01471c292f11799c4fbdac4d7d30b8bc50f
2022-01-21 22:03:26 +00:00
$text = Sanitizer::removeSomeTags( $text, [
'attrCallback' => $htmlTagsCallback,
'removeTags' => $bad,
] );
$title = Title::newFromText( Sanitizer::stripAllTags( $text ) );
// Decode entities in $text the same way that Title::newFromText does
$filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
if ( !$restrictDisplayTitle ||
( $title instanceof Title
&& !$title->hasFragment()
&& $title->equals( $parser->getTitle() ) )
) {
$old = $parser->getOutput()->getPageProperty( 'displaytitle' );
Change return value of ParserOutput::getPageProperty() when property is missing The old ParserOutput::getProperty() method returned `false` when a property was missing. This requires callers to use the `?:` syntax to supply default values, which then causes any falsey value to be treated as missing. So, for example, setting the defaultsort to '0' will cause the default sort to be ignored. Modern php convention is to use `null` for missing values, and the `??` syntax is a better/more restrictive alternative to `?:`. We renamed `ParserOutput::getProperty()` to `::getPageProperty()` in 1.38 (Ie963eea5aa0f0e984ced7c4dfa0fd65d57313cfa/T287216) but kept the return value convention. Before this actually makes it into a 1.38 release, take the opportunity to fix the return value for the new `ParserOutput::getPageProperty()` method to return `null` when the property is missing. We need to do some temporary workarounds to the places we'd already swapped over to use the new `::getPageProperty()` method to allow them to handle either `false` or `null` as a return value; we'll clean that up once this is merged. Code search: https://codesearch.wmcloud.org/deployed/?q=-%3EgetPageProperty%5C%28|T301915&i=nope&files=&excludeFiles=&repos= Bug: T301915 Depends-On: I3f11ce604970e47b41fc1c123792df8c3045626f Depends-On: Ie7533f49fe4cad01ebfda29760d23c61e9867b10 Depends-On: Ic5c09f5caa4c897bc553c614fbae9cee159566a2 Depends-On: I0278b2eafd90e77e4fee41c45a1165fb79ddf47e Depends-On: I383abb6b7dc5e96c0061af13957609f6e31a1065 Depends-On: I79f9f4078e415284af29b15047bafd1c823d7f5b Depends-On: I02276c48c49f5d2d241a69eb0a6cdf439b572d8b Depends-On: I71628661b4539a4e35ae32846e719f92bcf782e0 Depends-On: I7e215cb43de0ce150a6bcc00f92481dcdcfed383 Change-Id: Iaa25c390118d2db2b6578cdd558f2defd5351d15
2022-02-16 22:03:26 +00:00
if ( $old === null || $arg !== 'displaytitle_noreplace' ) {
$parser->getOutput()->setDisplayTitle( $text );
}
Change return value of ParserOutput::getPageProperty() when property is missing The old ParserOutput::getProperty() method returned `false` when a property was missing. This requires callers to use the `?:` syntax to supply default values, which then causes any falsey value to be treated as missing. So, for example, setting the defaultsort to '0' will cause the default sort to be ignored. Modern php convention is to use `null` for missing values, and the `??` syntax is a better/more restrictive alternative to `?:`. We renamed `ParserOutput::getProperty()` to `::getPageProperty()` in 1.38 (Ie963eea5aa0f0e984ced7c4dfa0fd65d57313cfa/T287216) but kept the return value convention. Before this actually makes it into a 1.38 release, take the opportunity to fix the return value for the new `ParserOutput::getPageProperty()` method to return `null` when the property is missing. We need to do some temporary workarounds to the places we'd already swapped over to use the new `::getPageProperty()` method to allow them to handle either `false` or `null` as a return value; we'll clean that up once this is merged. Code search: https://codesearch.wmcloud.org/deployed/?q=-%3EgetPageProperty%5C%28|T301915&i=nope&files=&excludeFiles=&repos= Bug: T301915 Depends-On: I3f11ce604970e47b41fc1c123792df8c3045626f Depends-On: Ie7533f49fe4cad01ebfda29760d23c61e9867b10 Depends-On: Ic5c09f5caa4c897bc553c614fbae9cee159566a2 Depends-On: I0278b2eafd90e77e4fee41c45a1165fb79ddf47e Depends-On: I383abb6b7dc5e96c0061af13957609f6e31a1065 Depends-On: I79f9f4078e415284af29b15047bafd1c823d7f5b Depends-On: I02276c48c49f5d2d241a69eb0a6cdf439b572d8b Depends-On: I71628661b4539a4e35ae32846e719f92bcf782e0 Depends-On: I7e215cb43de0ce150a6bcc00f92481dcdcfed383 Change-Id: Iaa25c390118d2db2b6578cdd558f2defd5351d15
2022-02-16 22:03:26 +00:00
if ( $old !== null && $old !== $text && !$arg ) {
$converter = self::getTargetLanguageConverter( $parser );
return '<span class="error">' .
wfMessage( 'duplicate-displaytitle',
// Message should be parsed, but these params should only be escaped.
$converter->markNoConversion( wfEscapeWikiText( $old ) ),
// @phan-suppress-next-line SecurityCheck-DoubleEscaped we removed escaping above
$converter->markNoConversion( wfEscapeWikiText( $filteredText ) )
)->inContentLanguage()->text() .
'</span>';
} else {
return '';
}
} else {
$parser->getOutput()->addWarningMsg(
'restricted-displaytitle',
// Message should be parsed, but this param should only be escaped.
Message::plaintextParam( $filteredText )
);
$parser->addTrackingCategory( 'restricted-displaytitle-ignored' );
2008-08-17 21:08:58 +00:00
}
}
/**
* Matches the given value against the value of given magic word
*
* @param MagicWordFactory $magicWordFactory A factory to get the word from, e.g., from
* $parser->getMagicWordFactory()
* @param string $magicword Magic word key
* @param string $value Value to match
* @return bool True on successful match
*/
private static function matchAgainstMagicword(
MagicWordFactory $magicWordFactory, $magicword, $value
) {
$value = trim( strval( $value ) );
if ( $value === '' ) {
return false;
}
$mwObject = $magicWordFactory->get( $magicword );
return $mwObject->matchStartToEnd( $value );
}
/**
* Formats a number according to a language.
*
* @param int|float $num
* @param string $raw
* @param Language|StubUserLang $language
* @param MagicWordFactory|null $magicWordFactory To evaluate $raw
* @return string
*/
public static function formatRaw(
$num, $raw, $language, MagicWordFactory $magicWordFactory = null
) {
if ( $raw !== null && !$magicWordFactory ) {
$magicWordFactory = MediaWikiServices::getInstance()->getMagicWordFactory();
}
if (
$raw !== null && self::matchAgainstMagicword( $magicWordFactory, 'rawsuffix', $raw )
) {
return (string)$num;
2006-07-03 13:55:52 +00:00
} else {
return $language->formatNum( $num );
}
}
public static function numberofpages( $parser, $raw = null ) {
return self::formatRaw( SiteStats::pages(), $raw, $parser->getFunctionLang() );
}
public static function numberofusers( $parser, $raw = null ) {
return self::formatRaw( SiteStats::users(), $raw, $parser->getFunctionLang() );
}
public static function numberofactiveusers( $parser, $raw = null ) {
return self::formatRaw( SiteStats::activeUsers(), $raw, $parser->getFunctionLang() );
}
public static function numberofarticles( $parser, $raw = null ) {
return self::formatRaw( SiteStats::articles(), $raw, $parser->getFunctionLang() );
}
public static function numberoffiles( $parser, $raw = null ) {
return self::formatRaw( SiteStats::images(), $raw, $parser->getFunctionLang() );
}
public static function numberofadmins( $parser, $raw = null ) {
return self::formatRaw(
SiteStats::numberingroup( 'sysop' ),
$raw,
$parser->getFunctionLang()
);
}
public static function numberofedits( $parser, $raw = null ) {
return self::formatRaw( SiteStats::edits(), $raw, $parser->getFunctionLang() );
}
public static function pagesinnamespace( $parser, $namespace = 0, $raw = null ) {
return self::formatRaw(
SiteStats::pagesInNs( intval( $namespace ) ),
$raw,
$parser->getFunctionLang()
);
}
public static function numberingroup( $parser, $name = '', $raw = null ) {
return self::formatRaw(
SiteStats::numberingroup( strtolower( $name ) ),
$raw,
$parser->getFunctionLang()
);
}
/**
* Given a title, return the namespace name that would be given by the
* corresponding magic word
* Note: function name changed to "mwnamespace" rather than "namespace"
* to not break PHP 5.3
* @param Parser $parser
* @param string|null $title
2012-02-09 21:35:05 +00:00
* @return mixed|string
*/
public static function mwnamespace( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return str_replace( '_', ' ', $t->getNsText() );
}
public static function namespacee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfUrlencode( $t->getNsText() );
}
public static function namespacenumber( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return $t->getNamespace();
}
public static function talkspace( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return str_replace( '_', ' ', $t->getTalkNsText() );
}
public static function talkspacee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return wfUrlencode( $t->getTalkNsText() );
}
public static function subjectspace( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return str_replace( '_', ' ', $t->getSubjectNsText() );
}
public static function subjectspacee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfUrlencode( $t->getSubjectNsText() );
}
/**
* Functions to get and normalize pagenames, corresponding to the magic words
* of the same names
* @param Parser $parser
* @param string|null $title
* @return string
*/
public static function pagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getText() );
}
public static function pagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getPartialURL() );
}
public static function fullpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return wfEscapeWikiText( $t->getPrefixedText() );
}
public static function fullpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return wfEscapeWikiText( $t->getPrefixedURL() );
}
public static function subpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getSubpageText() );
}
public static function subpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getSubpageUrlForm() );
}
public static function rootpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getRootText() );
}
public static function rootpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( wfUrlencode( str_replace( ' ', '_', $t->getRootText() ) ) );
}
public static function basepagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getBaseText() );
}
public static function basepagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( wfUrlencode( str_replace( ' ', '_', $t->getBaseText() ) ) );
}
public static function talkpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return wfEscapeWikiText( $t->getTalkPage()->getPrefixedText() );
}
public static function talkpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null || !$t->canHaveTalkPage() ) {
return '';
}
return wfEscapeWikiText( $t->getTalkPage()->getPrefixedURL() );
}
public static function subjectpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedText() );
}
public static function subjectpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedURL() );
}
2008-04-18 14:19:00 +00:00
/**
* Return the number of pages, files or subcats in the given category,
* or 0 if it's nonexistent. This is an expensive parser function and
* can't be called too many times per page.
* @param Parser $parser
* @param string $name
* @param string|null $arg1
* @param string|null $arg2
2012-02-09 21:35:05 +00:00
* @return string
2008-04-18 14:19:00 +00:00
*/
public static function pagesincategory( $parser, $name = '', $arg1 = null, $arg2 = null ) {
static $magicWords = null;
if ( $magicWords === null ) {
$magicWords = $parser->getMagicWordFactory()->newArray( [
'pagesincategory_all',
'pagesincategory_pages',
'pagesincategory_subcats',
'pagesincategory_files'
] );
}
static $cache = [];
// split the given option to its variable
if ( self::matchAgainstMagicword( $parser->getMagicWordFactory(), 'rawsuffix', $arg1 ) ) {
// {{pagesincategory:|raw[|type]}}
$raw = $arg1;
$type = $magicWords->matchStartToEnd( $arg2 );
} else {
// {{pagesincategory:[|type[|raw]]}}
$type = $magicWords->matchStartToEnd( $arg1 );
$raw = $arg2;
}
if ( !$type ) { // backward compatibility
$type = 'pagesincategory_all';
}
$title = Title::makeTitleSafe( NS_CATEGORY, $name );
if ( !$title ) { # invalid title
return self::formatRaw( 0, $raw, $parser->getFunctionLang() );
}
$languageConverter = MediaWikiServices::getInstance()
->getLanguageConverterFactory()
->getLanguageConverter( $parser->getContentLanguage() );
$languageConverter->findVariantLink( $name, $title, true );
// Normalize name for cache
$name = $title->getDBkey();
if ( !isset( $cache[$name] ) ) {
$category = Category::newFromTitle( $title );
$allCount = $subcatCount = $fileCount = $pageCount = 0;
if ( $parser->incrementExpensiveFunctionCount() ) {
$allCount = $category->getMemberCount();
$subcatCount = $category->getSubcatCount();
$fileCount = $category->getFileCount();
$pageCount = $category->getPageCount( Category::COUNT_CONTENT_PAGES );
}
$cache[$name]['pagesincategory_all'] = $allCount;
$cache[$name]['pagesincategory_pages'] = $pageCount;
$cache[$name]['pagesincategory_subcats'] = $subcatCount;
$cache[$name]['pagesincategory_files'] = $fileCount;
}
$count = $cache[$name][$type];
return self::formatRaw( $count, $raw, $parser->getFunctionLang() );
}
/**
* Return the size of the given page, or 0 if it's nonexistent. This is an
* expensive parser function and can't be called too many times per page.
*
* @param Parser $parser
* @param string $page Name of page to check (Default: empty string)
* @param string|null $raw Should number be human readable with commas or just number
2012-02-09 21:35:05 +00:00
* @return string
*/
public static function pagesize( $parser, $page = '', $raw = null ) {
2010-02-11 14:57:43 +00:00
$title = Title::newFromText( $page );
if ( !is_object( $title ) ) {
return self::formatRaw( 0, $raw, $parser->getFunctionLang() );
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $title, ParserOutputFlags::VARY_REVISION_SHA1 );
$length = $rev ? $rev->getSize() : 0;
if ( $length === null ) {
// We've had bugs where rev_len was not being recorded for empty pages, see T135414
$length = 0;
}
return self::formatRaw( $length, $raw, $parser->getFunctionLang() );
}
/**
* Returns the requested protection level for the current page. This
* is an expensive parser function and can't be called too many times
* per page, unless the protection levels/expiries for the given title
* have already been retrieved
*
* @param Parser $parser
* @param string $type
* @param string $title
*
2012-02-09 21:35:05 +00:00
* @return string
*/
public static function protectionlevel( $parser, $type = '', $title = '' ) {
$titleObject = Title::newFromText( $title ) ?? $parser->getTitle();
if ( $titleObject->areRestrictionsLoaded() || $parser->incrementExpensiveFunctionCount() ) {
$restrictions = $titleObject->getRestrictions( strtolower( $type ) );
# Title::getRestrictions returns an array, its possible it may have
# multiple values in the future
return implode( ',', $restrictions );
}
return '';
}
/**
* Returns the requested protection expiry for the current page. This
* is an expensive parser function and can't be called too many times
* per page, unless the protection levels/expiries for the given title
* have already been retrieved
*
* @param Parser $parser
* @param string $type
* @param string $title
*
* @return string
*/
public static function protectionexpiry( $parser, $type = '', $title = '' ) {
$titleObject = Title::newFromText( $title ) ?? $parser->getTitle();
if ( $titleObject->areRestrictionsLoaded() || $parser->incrementExpensiveFunctionCount() ) {
$expiry = $titleObject->getRestrictionExpiry( strtolower( $type ) );
// getRestrictionExpiry() returns false on invalid type; trying to
// match protectionlevel() function that returns empty string instead
if ( $expiry === false ) {
$expiry = '';
}
return $expiry;
}
return '';
}
2011-07-11 18:05:32 +00:00
/**
* Gives language names.
* @param Parser $parser
* @param string $code Language code (of which to get name)
* @param string $inLanguage Language code (in which to get name)
* @return string
2011-07-11 18:05:32 +00:00
*/
public static function language( $parser, $code = '', $inLanguage = '' ) {
$code = strtolower( $code );
$inLanguage = strtolower( $inLanguage );
$lang = MediaWikiServices::getInstance()
->getLanguageNameUtils()
->getLanguageName( $code, $inLanguage );
return $lang !== '' ? $lang : LanguageCode::bcp47( $code );
}
2007-01-13 12:58:33 +00:00
/**
* Unicode-safe str_pad with the restriction that $length is forced to be <= 500
* @param Parser $parser
* @param string $string
* @param string $length
* @param string $padding
* @param int $direction
2012-02-09 21:35:05 +00:00
* @return string
*/
public static function pad(
$parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT
) {
$padding = $parser->killMarkers( $padding );
$lengthOfPadding = mb_strlen( $padding );
if ( $lengthOfPadding == 0 ) {
return $string;
}
# The remaining length to add counts down to 0 as padding is added
$length = min( (int)$length, 500 ) - mb_strlen( $string );
if ( $length <= 0 ) {
// Nothing to add
return $string;
}
# $finalPadding is just $padding repeated enough times so that
# mb_strlen( $string ) + mb_strlen( $finalPadding ) == $length
$finalPadding = '';
while ( $length > 0 ) {
# If $length < $lengthofPadding, truncate $padding so we get the
# exact length desired.
$finalPadding .= mb_substr( $padding, 0, $length );
$length -= $lengthOfPadding;
}
if ( $direction == STR_PAD_LEFT ) {
return $finalPadding . $string;
} else {
return $string . $finalPadding;
}
}
2007-01-13 12:58:33 +00:00
public static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) {
return self::pad( $parser, $string, $length, $padding, STR_PAD_LEFT );
}
2007-01-13 12:58:33 +00:00
public static function padright( $parser, $string = '', $length = 0, $padding = '0' ) {
return self::pad( $parser, $string, $length, $padding );
}
2007-01-13 12:58:33 +00:00
2011-02-08 23:18:13 +00:00
/**
* @param Parser $parser
* @param string $text
2011-02-08 23:18:13 +00:00
* @return string
*/
public static function anchorencode( $parser, $text ) {
$text = $parser->killMarkers( $text );
$section = (string)substr( $parser->guessSectionNameFromWikiText( $text ), 1 );
return Sanitizer::safeEncodeAttribute( $section );
}
public static function special( $parser, $text ) {
list( $page, $subpage ) = MediaWikiServices::getInstance()->getSpecialPageFactory()->
resolveAlias( $text );
if ( $page ) {
$title = SpecialPage::getTitleFor( $page, $subpage );
return $title->getPrefixedText();
} else {
// unknown special page, just use the given text as its title, if at all possible
$title = Title::makeTitleSafe( NS_SPECIAL, $text );
return $title ? $title->getPrefixedText() : self::special( $parser, 'Badtitle' );
}
}
public static function speciale( $parser, $text ) {
return wfUrlencode( str_replace( ' ', '_', self::special( $parser, $text ) ) );
}
2011-02-08 23:18:13 +00:00
/**
* @param Parser $parser
* @param string $text The sortkey to use
* @param string $uarg Either "noreplace" or "noerror" (in en)
* both suppress errors, and noreplace does nothing if
* a default sortkey already exists.
2011-02-08 23:18:13 +00:00
* @return string
*/
public static function defaultsort( $parser, $text, $uarg = '' ) {
static $magicWords = null;
if ( $magicWords === null ) {
$magicWords = $parser->getMagicWordFactory()->newArray(
[ 'defaultsort_noerror', 'defaultsort_noreplace' ] );
}
$arg = $magicWords->matchStartToEnd( $uarg );
$text = trim( $text );
if ( strlen( $text ) == 0 ) {
return '';
}
$old = $parser->getCustomDefaultSort();
if ( $old === false || $arg !== 'defaultsort_noreplace' ) {
$parser->setDefaultSort( $text );
}
if ( $old === false || $old == $text || $arg ) {
return '';
} else {
$converter = $parser->getTargetLanguageConverter();
return '<span class="error">' .
wfMessage( 'duplicate-defaultsort',
// Message should be parsed, but these params should only be escaped.
$converter->markNoConversion( wfEscapeWikiText( $old ) ),
$converter->markNoConversion( wfEscapeWikiText( $text ) )
)->inContentLanguage()->text() .
'</span>';
}
}
/**
* Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}}
* or {{filepath|300|nowiki}} or {{filepath|300px}}, {{filepath|200x300px}},
* {{filepath|nowiki|200x300px}}, {{filepath|200x300px|nowiki}}.
*
* @param Parser $parser
* @param string $name
* @param string $argA
* @param string $argB
* @return array|string
*/
public static function filepath( $parser, $name = '', $argA = '', $argB = '' ) {
$file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $name );
if ( $argA == 'nowiki' ) {
// {{filepath: | option [| size] }}
$isNowiki = true;
$parsedWidthParam = Parser::parseWidthParam( $argB );
} else {
// {{filepath: [| size [|option]] }}
$parsedWidthParam = Parser::parseWidthParam( $argA );
$isNowiki = ( $argB == 'nowiki' );
}
if ( $file ) {
$url = $file->getFullUrl();
// If a size is requested...
if ( count( $parsedWidthParam ) ) {
$mto = $file->transform( $parsedWidthParam );
// ... and we can
if ( $mto && !$mto->isError() ) {
// ... change the URL to point to a thumbnail.
2011-08-03 13:11:42 +00:00
$url = wfExpandUrl( $mto->getUrl(), PROTO_RELATIVE );
}
}
if ( $isNowiki ) {
return [ $url, 'nowiki' => true ];
}
return $url;
} else {
return '';
}
}
/**
* Parser function to extension tag adaptor
* @param Parser $parser
* @param PPFrame $frame
* @param PPNode[] $args
2012-02-09 21:35:05 +00:00
* @return string
*/
public static function tagObj( $parser, $frame, $args ) {
if ( !count( $args ) ) {
return '';
}
$tagName = strtolower( trim( $frame->expand( array_shift( $args ) ) ) );
if ( count( $args ) ) {
$inner = $frame->expand( array_shift( $args ) );
} else {
$inner = null;
}
$attributes = [];
foreach ( $args as $arg ) {
2008-01-26 05:40:51 +00:00
$bits = $arg->splitArg();
if ( strval( $bits['index'] ) === '' ) {
$name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
2008-01-26 05:40:51 +00:00
$value = trim( $frame->expand( $bits['value'] ) );
if ( preg_match( '/^(?:["\'](.+)["\']|""|\'\')$/s', $value, $m ) ) {
$value = $m[1] ?? '';
}
$attributes[$name] = $value;
}
}
$stripList = $parser->getStripList();
if ( !in_array( $tagName, $stripList ) ) {
// we can't handle this tag (at least not now), so just re-emit it as an ordinary tag
$attrText = '';
foreach ( $attributes as $name => $value ) {
$attrText .= ' ' . htmlspecialchars( $name ) .
'="' . htmlspecialchars( $value, ENT_COMPAT ) . '"';
}
if ( $inner === null ) {
return "<$tagName$attrText/>";
}
return "<$tagName$attrText>$inner</$tagName>";
}
$params = [
'name' => $tagName,
'inner' => $inner,
'attributes' => $attributes,
'close' => "</$tagName>",
];
return $parser->extensionSubstitution( $params, $frame );
}
/**
* Fetched the current revision of the given title and return this.
* Will increment the expensive function count and
* add a template link to get the value refreshed on changes.
* For a given title, which is equal to the current parser title,
* the RevisionRecord object from the parser is used, when that is the current one
*
* @param Parser $parser
* @param Title $title
* @param string $vary ParserOuput vary-* flag
* @return RevisionRecord|null
* @since 1.23
*/
private static function getCachedRevisionObject( $parser, $title, $vary ) {
if ( !$title ) {
return null;
}
$revisionRecord = null;
$isSelfReferential = $title->equals( $parser->getTitle() );
if ( $isSelfReferential ) {
// Revision is for the same title that is currently being parsed. Only use the last
// saved revision, regardless of Parser::getRevisionId() or fake revision injection
// callbacks against the current title.
$parserRevisionRecord = $parser->getRevisionRecordObject();
if ( $parserRevisionRecord && $parserRevisionRecord->isCurrent() ) {
$revisionRecord = $parserRevisionRecord;
}
}
$parserOutput = $parser->getOutput();
if ( !$revisionRecord ) {
if (
!$parser->isCurrentRevisionOfTitleCached( $title ) &&
!$parser->incrementExpensiveFunctionCount()
) {
return null; // not allowed
}
// Get the current revision, ignoring Parser::getRevisionId() being null/old
$revisionRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
if ( !$revisionRecord ) {
// Convert `false` error return to `null`
$revisionRecord = null;
}
// Register dependency in templatelinks
$parserOutput->addTemplate(
$title,
$revisionRecord ? $revisionRecord->getPageId() : 0,
$revisionRecord ? $revisionRecord->getId() : 0
);
}
if ( $isSelfReferential ) {
wfDebug( __METHOD__ . ": used current revision, setting $vary" );
// Upon page save, the result of the parser function using this might change
$parserOutput->setOutputFlag( $vary );
if ( $vary === ParserOutputFlags::VARY_REVISION_SHA1 && $revisionRecord ) {
try {
$sha1 = $revisionRecord->getSha1();
} catch ( RevisionAccessException $e ) {
$sha1 = null;
}
$parserOutput->setRevisionUsedSha1Base36( $sha1 );
}
}
return $revisionRecord;
}
/**
* Get the pageid of a specified page
* @param Parser $parser
* @param string|null $title Title to get the pageid from
* @return int|null|string
* @since 1.23
*/
public static function pageid( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( !$t ) {
return '';
} elseif ( !$t->canExist() || $t->isExternal() ) {
return 0; // e.g. special page or interwiki link
}
$parserOutput = $parser->getOutput();
if ( $t->equals( $parser->getTitle() ) ) {
// Revision is for the same title that is currently being parsed.
// Use the title from Parser in case a new page ID was injected into it.
$parserOutput->setOutputFlag( ParserOutputFlags::VARY_PAGE_ID );
$id = $parser->getTitle()->getArticleID();
if ( $id ) {
$parserOutput->setSpeculativePageIdUsed( $id );
}
return $id;
}
// Check the link cache for the title
$linkCache = MediaWikiServices::getInstance()->getLinkCache();
$pdbk = $t->getPrefixedDBkey();
$id = $linkCache->getGoodLinkID( $pdbk );
if ( $id != 0 || $linkCache->isBadLink( $pdbk ) ) {
$parserOutput->addLink( $t, $id );
return $id;
}
// We need to load it from the DB, so mark expensive
if ( $parser->incrementExpensiveFunctionCount() ) {
$id = $t->getArticleID();
$parserOutput->addLink( $t, $id );
return $id;
}
return null;
}
/**
* Get the id from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the id from
* @return int|null|string
* @since 1.23
*/
public static function revisionid( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
$services = MediaWikiServices::getInstance();
if (
$t->equals( $parser->getTitle() ) &&
$services->getMainConfig()->get( 'MiserMode' ) &&
!$parser->getOptions()->getInterfaceMessage() &&
// @TODO: disallow this word on all namespaces (T235957)
$services->getNamespaceInfo()->isSubject( $t->getNamespace() )
) {
// Use a stub result instead of the actual revision ID in order to avoid
// double parses on page save but still allow preview detection (T137900)
if ( $parser->getRevisionId() || $parser->getOptions()->getSpeculativeRevId() ) {
return '-';
} else {
$parser->getOutput()->setOutputFlag( ParserOutputFlags::VARY_REVISION_EXISTS );
return '';
}
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_ID );
return $rev ? $rev->getId() : '';
}
/**
* Get the day from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the day from
* @return string
* @since 1.23
*/
public static function revisionday( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'j' ) : '';
}
/**
* Get the day with leading zeros from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the day from
* @return string
* @since 1.23
*/
public static function revisionday2( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'd' ) : '';
}
/**
* Get the month with leading zeros from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the month from
* @return string
* @since 1.23
*/
public static function revisionmonth( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'm' ) : '';
}
/**
* Get the month from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the month from
* @return string
* @since 1.23
*/
public static function revisionmonth1( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'n' ) : '';
}
/**
* Get the year from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the year from
* @return string
* @since 1.23
*/
public static function revisionyear( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'Y' ) : '';
}
/**
* Get the timestamp from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the timestamp from
* @return string
* @since 1.23
*/
public static function revisiontimestamp( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_REVISION_TIMESTAMP );
return $rev ? MWTimestamp::getLocalInstance( $rev->getTimestamp() )->format( 'YmdHis' ) : '';
}
/**
* Get the user from the last revision of a specified page.
* @param Parser $parser
* @param string|null $title Title to get the user from
* @return string
* @since 1.23
*/
public static function revisionuser( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( $t === null ) {
return '';
}
// fetch revision from cache/database and return the value
$rev = self::getCachedRevisionObject( $parser, $t, ParserOutputFlags::VARY_USER );
if ( $rev === null ) {
return '';
}
$user = $rev->getUser();
return $user ? $user->getName() : '';
}
/**
* Returns the sources of any cascading protection acting on a specified page.
* Pages will not return their own title unless they transclude themselves.
* This is an expensive parser function and can't be called too many times per page,
* unless cascading protection sources for the page have already been loaded.
*
* @param Parser $parser
* @param string $title
*
* @return string
* @since 1.23
*/
public static function cascadingsources( $parser, $title = '' ) {
$titleObject = Title::newFromText( $title ) ?? $parser->getTitle();
if ( $titleObject->areCascadeProtectionSourcesLoaded()
|| $parser->incrementExpensiveFunctionCount()
) {
$names = [];
$sources = $titleObject->getCascadeProtectionSources();
foreach ( $sources[0] as $sourceTitle ) {
$names[] = $sourceTitle->getPrefixedText();
}
return implode( '|', $names );
}
return '';
}
}