Implicitly marking parameter $... as nullable is deprecated in php8.4, the explicit nullable type must be used instead Created with autofix from Ide15839e98a6229c22584d1c1c88c690982e1d7a Break one long line in SpecialPage.php Bug: T376276 Change-Id: I807257b2ba1ab2744ab74d9572c9c3d3ac2a968e
327 lines
9 KiB
PHP
327 lines
9 KiB
PHP
<?php
|
|
/**
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
*/
|
|
|
|
namespace MediaWiki\Parser;
|
|
|
|
use LogicException;
|
|
use MediaWiki\Logger\LoggerFactory;
|
|
use MediaWiki\MediaWikiServices;
|
|
|
|
/**
|
|
* Class for handling an array of magic words
|
|
*
|
|
* See docs/magicword.md.
|
|
*
|
|
* @since 1.11
|
|
* @ingroup Parser
|
|
*/
|
|
class MagicWordArray {
|
|
|
|
/** @var string[] */
|
|
public $names = [];
|
|
private MagicWordFactory $factory;
|
|
|
|
/** @var array<int,array<string,string>>|null */
|
|
private $hash;
|
|
|
|
/** @var string[]|null */
|
|
private $baseRegex;
|
|
|
|
/** @var string[]|null */
|
|
private $regex;
|
|
|
|
/**
|
|
* @param string[] $names
|
|
* @param MagicWordFactory|null $factory
|
|
*/
|
|
public function __construct( $names = [], ?MagicWordFactory $factory = null ) {
|
|
$this->names = $names;
|
|
$this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
|
|
}
|
|
|
|
/**
|
|
* Add a magic word by name
|
|
*
|
|
* @param string $name
|
|
*/
|
|
public function add( $name ): void {
|
|
$this->names[] = $name;
|
|
$this->hash = $this->baseRegex = $this->regex = null;
|
|
}
|
|
|
|
/**
|
|
* Get a 2-d hashtable for this array
|
|
*
|
|
* @return array<int,array<string,string>>
|
|
*/
|
|
public function getHash(): array {
|
|
if ( $this->hash === null ) {
|
|
$this->hash = [ 0 => [], 1 => [] ];
|
|
foreach ( $this->names as $name ) {
|
|
$magic = $this->factory->get( $name );
|
|
$case = intval( $magic->isCaseSensitive() );
|
|
foreach ( $magic->getSynonyms() as $syn ) {
|
|
if ( !$case ) {
|
|
$syn = $this->factory->getContentLanguage()->lc( $syn );
|
|
}
|
|
$this->hash[$case][$syn] = $name;
|
|
}
|
|
}
|
|
}
|
|
return $this->hash;
|
|
}
|
|
|
|
/**
|
|
* Get the base regex
|
|
*
|
|
* @internal For use in {@see Parser} only
|
|
* @param bool $capture Set to false to suppress the capture groups,
|
|
* which can cause unexpected conflicts when this regexp is embedded in
|
|
* other regexps with similar constructs.
|
|
* @param string $delimiter The delimiter which will be used for the
|
|
* eventual regexp.
|
|
* @return array<int,string>
|
|
*/
|
|
public function getBaseRegex( bool $capture = true, string $delimiter = '/' ): array {
|
|
if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
|
|
return $this->baseRegex;
|
|
}
|
|
$regex = [ 0 => [], 1 => [] ];
|
|
foreach ( $this->names as $name ) {
|
|
$magic = $this->factory->get( $name );
|
|
$case = $magic->isCaseSensitive() ? 1 : 0;
|
|
foreach ( $magic->getSynonyms() as $i => $syn ) {
|
|
if ( $capture ) {
|
|
// Group name must start with a non-digit in PCRE 8.34+
|
|
$it = strtr( $i, '0123456789', 'abcdefghij' );
|
|
$groupName = $it . '_' . $name;
|
|
$group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
|
|
$regex[$case][] = $group;
|
|
} else {
|
|
$regex[$case][] = preg_quote( $syn, $delimiter );
|
|
}
|
|
}
|
|
}
|
|
'@phan-var array<int,string[]> $regex';
|
|
foreach ( $regex as $case => &$re ) {
|
|
$re = count( $re ) ? implode( '|', $re ) : '(?!)';
|
|
if ( !$case ) {
|
|
$re = "(?i:{$re})";
|
|
}
|
|
}
|
|
'@phan-var array<int,string> $regex';
|
|
|
|
if ( $capture && $delimiter === '/' ) {
|
|
$this->baseRegex = $regex;
|
|
}
|
|
return $regex;
|
|
}
|
|
|
|
/**
|
|
* Get an unanchored regex that does not match parameters
|
|
*
|
|
* @return array<int,string>
|
|
*/
|
|
private function getRegex(): array {
|
|
if ( $this->regex === null ) {
|
|
$this->regex = [];
|
|
$base = $this->getBaseRegex( true, '/' );
|
|
foreach ( $base as $case => $re ) {
|
|
$this->regex[$case] = "/$re/JS";
|
|
}
|
|
// As a performance optimization, turn on unicode mode only for
|
|
// case-insensitive matching.
|
|
$this->regex[0] .= 'u';
|
|
}
|
|
return $this->regex;
|
|
}
|
|
|
|
/**
|
|
* Get a regex anchored to the start of the string that does not match parameters
|
|
*
|
|
* @return array<int,string>
|
|
*/
|
|
private function getRegexStart(): array {
|
|
$newRegex = [];
|
|
$base = $this->getBaseRegex( true, '/' );
|
|
foreach ( $base as $case => $re ) {
|
|
$newRegex[$case] = "/^(?:$re)/JS";
|
|
}
|
|
// As a performance optimization, turn on unicode mode only for
|
|
// case-insensitive matching.
|
|
$newRegex[0] .= 'u';
|
|
return $newRegex;
|
|
}
|
|
|
|
/**
|
|
* Get an anchored regex for matching variables with parameters
|
|
*
|
|
* @return array<int,string>
|
|
*/
|
|
private function getVariableStartToEndRegex(): array {
|
|
$newRegex = [];
|
|
$base = $this->getBaseRegex( true, '/' );
|
|
foreach ( $base as $case => $re ) {
|
|
$newRegex[$case] = str_replace( '\$1', '(.*?)', "/^(?:$re)$/JS" );
|
|
}
|
|
// As a performance optimization, turn on unicode mode only for
|
|
// case-insensitive matching.
|
|
$newRegex[0] .= 'u';
|
|
return $newRegex;
|
|
}
|
|
|
|
/**
|
|
* @since 1.20
|
|
* @return string[]
|
|
*/
|
|
public function getNames() {
|
|
return $this->names;
|
|
}
|
|
|
|
/**
|
|
* Parse a match array from preg_match
|
|
*
|
|
* @param array<string|int,string> $matches
|
|
* @return array{0:string,1:string|false} Pair of (magic word ID, parameter value),
|
|
* where the latter is instead false if there is no parameter value.
|
|
*/
|
|
private function parseMatch( array $matches ): array {
|
|
$magicName = null;
|
|
foreach ( $matches as $key => $match ) {
|
|
if ( $magicName !== null ) {
|
|
// The structure we found at this point is [ …,
|
|
// 'a_magicWordName' => 'matchedSynonym',
|
|
// n => 'matchedSynonym (again)',
|
|
// n + 1 => 'parameterValue',
|
|
// … ]
|
|
return [ $magicName, $matches[$key + 1] ?? false ];
|
|
}
|
|
// Skip the initial full match and any non-matching group
|
|
if ( $match !== '' && $key !== 0 ) {
|
|
$parts = explode( '_', $key, 2 );
|
|
if ( !isset( $parts[1] ) ) {
|
|
throw new LogicException( 'Unexpected group name' );
|
|
}
|
|
$magicName = $parts[1];
|
|
}
|
|
}
|
|
throw new LogicException( 'Unexpected $m array with no match' );
|
|
}
|
|
|
|
/**
|
|
* Match some text, with parameter capture
|
|
*
|
|
* @param string $text
|
|
* @return (string|false)[] Magic word name in the first element and the parameter in the second
|
|
* element. Both elements are false if there was no match.
|
|
*/
|
|
public function matchVariableStartToEnd( $text ): array {
|
|
$regexes = $this->getVariableStartToEndRegex();
|
|
foreach ( $regexes as $regex ) {
|
|
$m = [];
|
|
if ( preg_match( $regex, $text, $m ) ) {
|
|
return $this->parseMatch( $m );
|
|
}
|
|
}
|
|
return [ false, false ];
|
|
}
|
|
|
|
/**
|
|
* Match some text, without parameter capture
|
|
*
|
|
* @see MagicWord::matchStartToEnd
|
|
* @param string $text
|
|
* @return string|false The magic word name, or false if there was no capture
|
|
*/
|
|
public function matchStartToEnd( $text ) {
|
|
$hash = $this->getHash();
|
|
if ( isset( $hash[1][$text] ) ) {
|
|
return $hash[1][$text];
|
|
}
|
|
$lc = $this->factory->getContentLanguage()->lc( $text );
|
|
return $hash[0][$lc] ?? false;
|
|
}
|
|
|
|
/**
|
|
* Return an associative array for all items that match.
|
|
*
|
|
* Cannot be used for magic words with parameters.
|
|
* Removes the matched items from the input string (passed by reference)
|
|
*
|
|
* @see MagicWord::matchAndRemove
|
|
* @param string &$text
|
|
* @return array<string,false> Keyed by magic word ID
|
|
*/
|
|
public function matchAndRemove( &$text ): array {
|
|
$found = [];
|
|
$regexes = $this->getRegex();
|
|
$res = preg_replace_callback( $regexes, function ( $m ) use ( &$found ) {
|
|
[ $name, $param ] = $this->parseMatch( $m );
|
|
$found[$name] = $param;
|
|
return '';
|
|
}, $text );
|
|
// T321234: Don't try to fix old revisions with broken UTF-8, just return $text as is
|
|
if ( $res === null ) {
|
|
$error = preg_last_error();
|
|
$errorText = preg_last_error_msg();
|
|
LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all error: {code} {errorText}', [
|
|
'code' => $error,
|
|
'regex' => $regexes,
|
|
'text' => $text,
|
|
'errorText' => $errorText
|
|
] );
|
|
if ( $error !== PREG_BAD_UTF8_ERROR ) {
|
|
throw new LogicException( "preg_match_all error $error: $errorText" );
|
|
}
|
|
} else {
|
|
$text = $res;
|
|
}
|
|
return $found;
|
|
}
|
|
|
|
/**
|
|
* Return the ID of the magic word at the start of $text, and remove
|
|
* the prefix from $text.
|
|
*
|
|
* Does not match parameters.
|
|
*
|
|
* @see MagicWord::matchStartAndRemove
|
|
* @param string &$text Unmodified if no match is found.
|
|
* @return string|false False if no match is found.
|
|
*/
|
|
public function matchStartAndRemove( &$text ) {
|
|
$regexes = $this->getRegexStart();
|
|
foreach ( $regexes as $regex ) {
|
|
if ( preg_match( $regex, $text, $m ) ) {
|
|
[ $id, ] = $this->parseMatch( $m );
|
|
if ( strlen( $m[0] ) >= strlen( $text ) ) {
|
|
$text = '';
|
|
} else {
|
|
$text = substr( $text, strlen( $m[0] ) );
|
|
}
|
|
return $id;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** @deprecated class alias since 1.40 */
|
|
class_alias( MagicWordArray::class, 'MagicWordArray' );
|