wiki.techinc.nl/includes/MagicWord.php

703 lines
15 KiB
PHP
Raw Normal View History

<?php
/**
* See docs/magicword.txt.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
2008-03-12 17:37:03 +00:00
*
* @file
* @ingroup Parser
*/
/**
* This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
*
* @par Usage:
* @code
* if (MagicWord::get( 'redirect' )->match( $text ) ) {
* // some code
* }
* @endcode
2006-01-07 13:09:30 +00:00
*
* Possible future improvements:
* * Simultaneous searching for a number of magic words
2006-07-14 16:36:35 +00:00
* * MagicWord::$mObjects in shared memory
*
2006-01-07 13:09:30 +00:00
* Please avoid reading the data out of one of these objects and then writing
* special case code. If possible, add another match()-like function here.
*
* To add magic words in an extension, use $magicWords in a file listed in
* $wgExtensionMessagesFiles[].
*
* @par Example:
* @code
* $magicWords = array();
*
* $magicWords['en'] = array(
* 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
* 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
* );
* @endcode
*
* For magic words which are also Parser variables, add a MagicWordwgVariableIDs
2006-07-14 16:36:35 +00:00
* hook. Use string keys.
*
* @ingroup Parser
*/
class MagicWord {
/**#@-*/
/** @var int */
public $mId;
/** @var array */
public $mSynonyms;
/** @var bool */
public $mCaseSensitive;
/** @var string */
private $mRegex = '';
/** @var string */
private $mRegexStart = '';
/** @var string */
private $mRegexStartToEnd = '';
/** @var string */
private $mBaseRegex = '';
/** @var string */
private $mVariableRegex = '';
/** @var string */
private $mVariableStartToEndRegex = '';
/** @var bool */
private $mModified = false;
/** @var bool */
private $mFound = false;
2006-07-14 16:36:35 +00:00
public static $mVariableIDsInitialised = false;
public static $mVariableIDs = [
'!',
2006-07-14 16:36:35 +00:00
'currentmonth',
'currentmonth1',
2006-07-14 16:36:35 +00:00
'currentmonthname',
'currentmonthnamegen',
'currentmonthabbrev',
'currentday',
'currentday2',
'currentdayname',
'currentyear',
'currenttime',
2006-08-14 22:43:50 +00:00
'currenthour',
'localmonth',
'localmonth1',
'localmonthname',
'localmonthnamegen',
'localmonthabbrev',
'localday',
'localday2',
'localdayname',
'localyear',
'localtime',
'localhour',
2006-07-14 16:36:35 +00:00
'numberofarticles',
'numberoffiles',
2007-04-17 09:23:31 +00:00
'numberofedits',
'articlepath',
'pageid',
2006-07-14 16:36:35 +00:00
'sitename',
'server',
'servername',
'scriptpath',
'stylepath',
2006-07-14 16:36:35 +00:00
'pagename',
'pagenamee',
'fullpagename',
'fullpagenamee',
'namespace',
'namespacee',
'namespacenumber',
2006-07-14 16:36:35 +00:00
'currentweek',
'currentdow',
'localweek',
'localdow',
2006-07-14 16:36:35 +00:00
'revisionid',
'revisionday',
'revisionday2',
'revisionmonth',
'revisionmonth1',
'revisionyear',
'revisiontimestamp',
'revisionuser',
'revisionsize',
2006-07-14 16:36:35 +00:00
'subpagename',
'subpagenamee',
'talkspace',
'talkspacee',
'subjectspace',
'subjectspacee',
'talkpagename',
'talkpagenamee',
'subjectpagename',
'subjectpagenamee',
'numberofusers',
'numberofactiveusers',
2006-07-14 16:36:35 +00:00
'numberofpages',
'currentversion',
'rootpagename',
'rootpagenamee',
2006-07-14 16:36:35 +00:00
'basepagename',
'basepagenamee',
'currenttimestamp',
'localtimestamp',
2006-07-14 16:36:35 +00:00
'directionmark',
'contentlanguage',
'numberofadmins',
'cascadingsources',
];
/* Array of caching hints for ParserCache */
public static $mCacheTTLs = [
'currentmonth' => 86400,
'currentmonth1' => 86400,
'currentmonthname' => 86400,
'currentmonthnamegen' => 86400,
'currentmonthabbrev' => 86400,
'currentday' => 3600,
'currentday2' => 3600,
'currentdayname' => 3600,
'currentyear' => 86400,
'currenttime' => 3600,
'currenthour' => 3600,
'localmonth' => 86400,
'localmonth1' => 86400,
'localmonthname' => 86400,
'localmonthnamegen' => 86400,
'localmonthabbrev' => 86400,
'localday' => 3600,
'localday2' => 3600,
'localdayname' => 3600,
'localyear' => 86400,
'localtime' => 3600,
'localhour' => 3600,
'numberofarticles' => 3600,
'numberoffiles' => 3600,
'numberofedits' => 3600,
'currentweek' => 3600,
'currentdow' => 3600,
'localweek' => 3600,
'localdow' => 3600,
'numberofusers' => 3600,
'numberofactiveusers' => 3600,
'numberofpages' => 3600,
'currentversion' => 86400,
'currenttimestamp' => 3600,
'localtimestamp' => 3600,
'pagesinnamespace' => 3600,
'numberofadmins' => 3600,
'numberingroup' => 3600,
];
2006-07-14 16:36:35 +00:00
public static $mDoubleUnderscoreIDs = [
'notoc',
'nogallery',
'forcetoc',
'toc',
'noeditsection',
'newsectionlink',
'nonewsectionlink',
'hiddencat',
'index',
'noindex',
'staticredirect',
'notitleconvert',
'nocontentconvert',
];
public static $mSubstIDs = [
'subst',
'safesubst',
];
public static $mObjects = [];
public static $mDoubleUnderscoreArray = null;
2006-07-14 16:36:35 +00:00
/**#@-*/
public function __construct( $id = 0, $syn = [], $cs = false ) {
$this->mId = $id;
$this->mSynonyms = (array)$syn;
$this->mCaseSensitive = $cs;
}
/**
* Factory: creates an object representing an ID
*
* @param int $id
*
2011-02-08 23:18:13 +00:00
* @return MagicWord
*/
public static function &get( $id ) {
* Introduced a new system for localisation caching. The system is based around fast fetches of individual messages, minimising memory overhead and startup time in the typical case. It handles both core messages (formerly in Language.php) and extension messages (formerly in MessageCache.php). Profiling indicates a significant win for average throughput. * The serialized message cache, which would have been redundant, has been removed. Similar performance characteristics can be achieved with $wgLocalisationCacheConf['manualRecache'] = true; * Added a maintenance script rebuildLocalisationCache.php for offline rebuilding of the localisation cache. * Extension i18n files can now contain any of the variables which can be set in Messages*.php. It is possible, and recommended, to use this feature instead of the hooks for special page aliases and magic words. * $wgExtensionAliasesFiles, LanguageGetMagic and LanguageGetSpecialPageAliases are retained for backwards compatibility. $wgMessageCache->addMessages() and related functions have been removed. wfLoadExtensionMessages() is a no-op and can continue to be called for b/c. * Introduced $wgCacheDirectory as a default location for the various local caches that have accumulated. Suggested $IP/cache as a good place for it in the default LocalSettings.php and created this directory with a deny-all .htaccess. * Patched Exception.php to avoid using the message cache when an exception is thrown from within LocalisationCache, since this tends to fail horribly. * Removed Language::getLocalisationArray(), Language::loadLocalisation(), Language::load() * Fixed FileDependency::__sleep() * In Cdb.php, fixed newlines in debug messages In MessageCache::get(): * Replaced calls to $wgContLang capitalisation functions with plain PHP functions, reducing the typical case from 99us to 93us. Message cache keys are already documented as being restricted to ASCII. * Implemented a more efficient way to filter out bogus language codes, reducing the "foo/en" case from 430us to 101us * Optimised wfRunHooks() in the typical do-nothing case, from ~30us to ~3us. This reduced MessageCache::get() typical case time from 93us to 38us. * Removed hook MessageNotInMwNs to save an extra 3us per cache hit. Reimplemented the only user (LocalisationUpdate) using the new hook LocalisationCacheRecache.
2009-06-28 07:11:43 +00:00
if ( !isset( self::$mObjects[$id] ) ) {
$mw = new MagicWord();
$mw->load( $id );
2006-07-14 16:36:35 +00:00
self::$mObjects[$id] = $mw;
}
2006-07-14 16:36:35 +00:00
return self::$mObjects[$id];
}
/**
* Get an array of parser variable IDs
*
* @return array
2006-07-14 16:36:35 +00:00
*/
public static function getVariableIDs() {
2006-07-14 16:36:35 +00:00
if ( !self::$mVariableIDsInitialised ) {
# Get variable IDs
Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
2006-07-14 16:36:35 +00:00
self::$mVariableIDsInitialised = true;
}
return self::$mVariableIDs;
}
/**
* Get an array of parser substitution modifier IDs
* @return array
*/
public static function getSubstIDs() {
return self::$mSubstIDs;
}
/**
* Allow external reads of TTL array
*
* @param int $id
* @return int
*/
public static function getCacheTTL( $id ) {
if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
return self::$mCacheTTLs[$id];
} else {
return -1;
}
}
/**
* Get a MagicWordArray of double-underscore entities
*
2011-05-26 19:52:56 +00:00
* @return MagicWordArray
*/
public static function getDoubleUnderscoreArray() {
if ( is_null( self::$mDoubleUnderscoreArray ) ) {
Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
}
return self::$mDoubleUnderscoreArray;
}
/**
* Clear the self::$mObjects variable
* For use in parser tests
*/
public static function clearCache() {
self::$mObjects = [];
}
/**
* Initialises this object with an ID
*
* @param int $id
* @throws MWException
*/
public function load( $id ) {
2006-01-07 13:31:29 +00:00
global $wgContLang;
$this->mId = $id;
$wgContLang->getMagic( $this );
if ( !$this->mSynonyms ) {
$this->mSynonyms = [ 'brionmademeputthishere' ];
throw new MWException( "Error: invalid magic word '$id'" );
}
}
2006-01-07 13:31:29 +00:00
/**
* Preliminary initialisation
2006-06-10 18:28:50 +00:00
* @private
*/
public function initRegex() {
// Sort the synonyms by length, descending, so that the longest synonym
// matches in precedence to the shortest
$synonyms = $this->mSynonyms;
usort( $synonyms, [ $this, 'compareStringLength' ] );
$escSyn = [];
foreach ( $synonyms as $synonym ) {
// In case a magic word contains /, like that's going to happen;)
$escSyn[] = preg_quote( $synonym, '/' );
}
$this->mBaseRegex = implode( '|', $escSyn );
$case = $this->mCaseSensitive ? '' : 'iu';
$this->mRegex = "/{$this->mBaseRegex}/{$case}";
$this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
$this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
$this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
2006-01-07 13:09:30 +00:00
$this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
"/^(?:{$this->mBaseRegex})$/{$case}" );
}
2006-01-07 13:31:29 +00:00
/**
* A comparison function that returns -1, 0 or 1 depending on whether the
* first string is longer, the same length or shorter than the second
* string.
*
* @param string $s1
* @param string $s2
*
* @return int
*/
public function compareStringLength( $s1, $s2 ) {
$l1 = strlen( $s1 );
$l2 = strlen( $s2 );
if ( $l1 < $l2 ) {
return 1;
} elseif ( $l1 > $l2 ) {
return -1;
} else {
return 0;
}
}
/**
* Gets a regex representing matching the word
*
* @return string
*/
public function getRegex() {
if ( $this->mRegex == '' ) {
$this->initRegex();
}
return $this->mRegex;
}
/**
* Gets the regexp case modifier to use, i.e. i or nothing, to be used if
* one is using MagicWord::getBaseRegex(), otherwise it'll be included in
* the complete expression
*
* @return string
*/
public function getRegexCase() {
if ( $this->mRegex === '' ) {
$this->initRegex();
}
return $this->mCaseSensitive ? '' : 'iu';
}
/**
* Gets a regex matching the word, if it is at the string start
*
* @return string
*/
public function getRegexStart() {
if ( $this->mRegex == '' ) {
$this->initRegex();
}
return $this->mRegexStart;
}
2004-01-12 00:55:01 +00:00
/**
* Gets a regex matching the word from start to end of a string
*
* @return string
* @since 1.23
*/
public function getRegexStartToEnd() {
if ( $this->mRegexStartToEnd == '' ) {
$this->initRegex();
}
return $this->mRegexStartToEnd;
}
/**
* regex without the slashes and what not
*
* @return string
*/
public function getBaseRegex() {
if ( $this->mRegex == '' ) {
$this->initRegex();
}
return $this->mBaseRegex;
}
2006-01-07 13:31:29 +00:00
/**
* Returns true if the text contains the word
*
* @param string $text
*
* @return bool
*/
public function match( $text ) {
return (bool)preg_match( $this->getRegex(), $text );
}
/**
* Returns true if the text starts with the word
*
* @param string $text
*
* @return bool
*/
public function matchStart( $text ) {
return (bool)preg_match( $this->getRegexStart(), $text );
}
/**
* Returns true if the text matched the word
*
* @param string $text
*
* @return bool
* @since 1.23
*/
public function matchStartToEnd( $text ) {
return (bool)preg_match( $this->getRegexStartToEnd(), $text );
}
/**
* Returns NULL if there's no match, the value of $1 otherwise
* The return code is the matched string, if there's no variable
* part in the regex and the matched variable part ($1) if there
* is one.
*
* @param string $text
*
* @return string
*/
public function matchVariableStartToEnd( $text ) {
$matches = [];
2004-01-12 00:55:01 +00:00
$matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
if ( $matchcount == 0 ) {
return null;
2004-01-12 00:55:01 +00:00
} else {
2006-07-05 18:25:39 +00:00
# multiple matched parts (variable match); some will be empty because of
# synonyms. The variable will be the second non-empty one so remove any
# blank elements and re-sort the indices.
# See also bug 6526
$matches = array_values( array_filter( $matches ) );
if ( count( $matches ) == 1 ) {
return $matches[0];
} else {
return $matches[1];
}
2004-01-12 00:55:01 +00:00
}
}
/**
* Returns true if the text matches the word, and alters the
* input string, removing all instances of the word
*
* @param string $text
*
* @return bool
*/
public function matchAndRemove( &$text ) {
2006-07-14 16:36:35 +00:00
$this->mFound = false;
$text = preg_replace_callback(
$this->getRegex(),
[ &$this, 'pregRemoveAndRecord' ],
$text
);
2006-07-14 16:36:35 +00:00
return $this->mFound;
}
/**
* @param string $text
* @return bool
*/
public function matchStartAndRemove( &$text ) {
2006-07-14 16:36:35 +00:00
$this->mFound = false;
$text = preg_replace_callback(
$this->getRegexStart(),
[ &$this, 'pregRemoveAndRecord' ],
$text
);
2006-07-14 16:36:35 +00:00
return $this->mFound;
2006-01-07 13:31:29 +00:00
}
2006-07-14 16:36:35 +00:00
/**
* Used in matchAndRemove()
*
* @return string
*/
public function pregRemoveAndRecord() {
2006-07-14 16:36:35 +00:00
$this->mFound = true;
return '';
}
/**
* Replaces the word with something else
*
* @param string $replacement
* @param string $subject
* @param int $limit
*
* @return string
*/
public function replace( $replacement, $subject, $limit = -1 ) {
$res = preg_replace(
$this->getRegex(),
StringUtils::escapeRegexReplacement( $replacement ),
$subject,
$limit
);
$this->mModified = $res !== $subject;
return $res;
}
2003-09-21 13:10:10 +00:00
/**
* Variable handling: {{SUBST:xxx}} style words
* Calls back a function to determine what to replace xxx with
* Input word must contain $1
*
* @param string $text
* @param callable $callback
*
* @return string
*/
public function substituteCallback( $text, $callback ) {
$res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
$this->mModified = $res !== $text;
return $res;
2003-09-21 13:10:10 +00:00
}
/**
* Matches the word, where $1 is a wildcard
*
* @return string
*/
public function getVariableRegex() {
if ( $this->mVariableRegex == '' ) {
2003-09-21 13:10:10 +00:00
$this->initRegex();
2006-01-07 13:09:30 +00:00
}
2003-09-21 13:10:10 +00:00
return $this->mVariableRegex;
}
/**
* Matches the entire string, where $1 is a wildcard
*
* @return string
*/
public function getVariableStartToEndRegex() {
if ( $this->mVariableStartToEndRegex == '' ) {
2004-01-12 00:55:01 +00:00
$this->initRegex();
2006-01-07 13:09:30 +00:00
}
2004-01-12 00:55:01 +00:00
return $this->mVariableStartToEndRegex;
}
/**
* Accesses the synonym list directly
*
* @param int $i
*
* @return string
*/
public function getSynonym( $i ) {
2003-09-21 13:10:10 +00:00
return $this->mSynonyms[$i];
}
/**
* @return array
*/
public function getSynonyms() {
2006-07-02 17:47:24 +00:00
return $this->mSynonyms;
}
/**
2006-01-07 13:09:30 +00:00
* Returns true if the last call to replace() or substituteCallback()
* returned a modified text, otherwise false.
*
* @return bool
*/
public function getWasModified() {
return $this->mModified;
}
/**
* $magicarr is an associative array of (magic word ID => replacement)
* This method uses the php feature to do several replacements at the same time,
* thereby gaining some efficiency. The result is placed in the out variable
* $result. The return value is true if something was replaced.
* @deprecated since 1.25, unused
*
* @param array $magicarr
* @param string $subject
* @param string $result
*
* @return bool
*/
public function replaceMultiple( $magicarr, $subject, &$result ) {
wfDeprecated( __METHOD__, '1.25' );
$search = [];
$replace = [];
foreach ( $magicarr as $id => $replacement ) {
$mw = MagicWord::get( $id );
$search[] = $mw->getRegex();
$replace[] = $replacement;
}
$result = preg_replace( $search, $replace, $subject );
return $result !== $subject;
}
/**
* Adds all the synonyms of this MagicWord to an array, to allow quick
* lookup in a list of magic words
*
* @param array $array
* @param string $value
*/
public function addToArray( &$array, $value ) {
global $wgContLang;
foreach ( $this->mSynonyms as $syn ) {
$array[$wgContLang->lc( $syn )] = $value;
}
}
/**
* @return bool
*/
public function isCaseSensitive() {
return $this->mCaseSensitive;
}
/**
* @return int
*/
public function getId() {
return $this->mId;
}
}