Garbage in, garbage out. When the wikitext is broken, it's still helpful if the user can see the broken wikitext. Even if it's not fully parsed. It's not the job of this class to fix broken UTF-8. The worst thing that can happen is that the wikitext contains some unparsed magic words. However, this is really only relevant for very old revisions (20 years old, see T321234). It's very normal that old revisions can't be 100% parsed any more, most notably because of deleted templates. This here is not much different. Bug: T321234 Change-Id: I0ce40f6575668847ef309599ee32de52190ab212
217 lines
6.9 KiB
PHP
217 lines
6.9 KiB
PHP
<?php
|
|
|
|
use Wikimedia\TestingAccessWrapper;
|
|
|
|
/**
|
|
* @covers \MediaWiki\Parser\MagicWordArray
|
|
* @covers \MediaWiki\Parser\MagicWord
|
|
*/
|
|
class MagicWordArrayTest extends MediaWikiUnitTestCase {
|
|
|
|
private const MAGIC_WORDS = [
|
|
'notitleconvert' => [ 0, '__NOTITLECONVERT__', '__NOTC__' ],
|
|
'notoc' => [ 0, '__NOTOC__' ],
|
|
'img_thumbnail' => [ 1, 'thumb', 'thumbnail' ],
|
|
'img_upright' => [ 1, 'upright', 'upright=$1', 'upright $1' ],
|
|
'img_width' => [ 1, '$1px' ],
|
|
];
|
|
|
|
public function testConstructor() {
|
|
$array = new MagicWordArray( [ 'ID' ], $this->getFactory() );
|
|
$this->assertSame( [ 'ID' ], $array->getNames() );
|
|
$this->assertSame( [ '(?i:(?P<a_ID>SYNONYM)|(?P<b_ID>alt\=\$1))', '(?!)' ],
|
|
$array->getBaseRegex() );
|
|
$this->assertSame( 'ID', $array->matchStartToEnd( 'SyNoNyM' ) );
|
|
}
|
|
|
|
public function testAdd() {
|
|
$array = new MagicWordArray( [], $this->getFactory() );
|
|
$array->add( 'ADD' );
|
|
$this->assertSame( [ 'ADD' ], $array->getNames() );
|
|
$this->assertSame( [ '(?i:(?P<a_ADD>SYNONYM)|(?P<b_ADD>alt\=\$1))', '(?!)' ],
|
|
$array->getBaseRegex() );
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchStartToEndCaseSensitive
|
|
*/
|
|
public function testMatchStartToEndCaseSensitive( string $input, $expected ) {
|
|
$array = new MagicWordArray( [ 'ID' ], $this->getFactory( true ) );
|
|
$this->assertSame( $expected, $array->matchStartToEnd( $input ) );
|
|
}
|
|
|
|
public function provideMatchStartToEndCaseSensitive() {
|
|
return [
|
|
'identifier is not automatically valid syntax' => [ 'ID', false ],
|
|
'mismatch' => [ 'unknown', false ],
|
|
'incorrect capitalization' => [ 'synonym', false ],
|
|
'exact match' => [ 'SYNONYM', 'ID' ],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchVariableStartToEnd
|
|
*/
|
|
public function testMatchVariableStartToEnd(
|
|
string $input,
|
|
array $expected = [ false, false ]
|
|
) {
|
|
$array = new MagicWordArray( [ 'ID' ], $this->getFactory() );
|
|
$this->assertSame( $expected, $array->matchVariableStartToEnd( $input ) );
|
|
}
|
|
|
|
public function provideMatchVariableStartToEnd() {
|
|
return [
|
|
'identifier is not automatically valid syntax' => [ 'ID' ],
|
|
'match' => [ 'SyNoNyM', [ 'ID', false ] ],
|
|
'end does not match' => [ 'SyNoNyMx' ],
|
|
'with empty parameter' => [ 'alt=', [ 'ID', '' ] ],
|
|
'with parameter' => [ 'alt=Description', [ 'ID', 'Description' ] ],
|
|
'whitespace is not ignored' => [ 'alt =' ],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchVariableStartToEndMultiple
|
|
*/
|
|
public function testMatchVariableStartToEndMultiple(
|
|
string $input,
|
|
array $expected = [ false, false ]
|
|
) {
|
|
$array = new MagicWordArray( array_keys( self::MAGIC_WORDS ), $this->getFactory() );
|
|
$this->assertSame( $expected, $array->matchVariableStartToEnd( $input ) );
|
|
|
|
/** @var MagicWordArray $spy */
|
|
$spy = TestingAccessWrapper::newFromObject( $array );
|
|
$this->assertSame( [
|
|
'/^(?:(?i:(?P<a_notitleconvert>__NOTITLECONVERT__)|(?P<b_notitleconvert>__NOTC__)|' .
|
|
'(?P<a_notoc>__NOTOC__)))$/Su',
|
|
'/^(?:(?P<a_img_thumbnail>thumb)|(?P<b_img_thumbnail>thumbnail)|' .
|
|
'(?P<a_img_upright>upright)|(?P<b_img_upright>upright\=(.*?))|(?P<c_img_upright>upright (.*?))|' .
|
|
'(?P<a_img_width>(.*?)px))$/S',
|
|
], $spy->getVariableStartToEndRegex() );
|
|
}
|
|
|
|
public function provideMatchVariableStartToEndMultiple() {
|
|
return [
|
|
[ 'thumb', [ 'img_thumbnail', false ] ],
|
|
[ 'upright=1.2', [ 'img_upright', '1.2' ] ],
|
|
[ 'upright=', [ 'img_upright', '' ] ],
|
|
[ 'upright', [ 'img_upright', false ] ],
|
|
[ '100px', [ 'img_width', '100' ] ],
|
|
[ 'px', [ 'img_width', '' ] ],
|
|
[ 'PX' ],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchStartAndRemove
|
|
*/
|
|
public function testMatchStartAndRemove(
|
|
string $input,
|
|
$expectedMatches,
|
|
string $expectedText = null
|
|
) {
|
|
$array = new MagicWordArray( [ 'ID' ], $this->getFactory() );
|
|
$text = $input;
|
|
$this->assertSame( $expectedMatches, $array->matchStartAndRemove( $text ) );
|
|
$this->assertSame( $expectedText ?? $input, $text );
|
|
}
|
|
|
|
public function provideMatchStartAndRemove() {
|
|
return [
|
|
'identifier is not automatically valid syntax' => [ 'ID', false ],
|
|
'match' => [ 'SyNoNyMx', 'ID', 'x' ],
|
|
'not at the start' => [ 'xSyNoNyMx', false ],
|
|
'this method does not support parameters' => [ 'alt=x', false ],
|
|
'unexpected behavior when used with parameters' => [ 'alt=$1x', 'ID', 'x' ],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchAndRemove
|
|
*/
|
|
public function testMatchAndRemove(
|
|
string $input,
|
|
array $expectedMatches = [],
|
|
string $expectedText = null
|
|
) {
|
|
$array = new MagicWordArray( [ 'ID' ], $this->getFactory() );
|
|
$text = $input;
|
|
$this->assertSame( $expectedMatches, $array->matchAndRemove( $text ) );
|
|
$this->assertSame( $expectedText ?? $input, $text );
|
|
}
|
|
|
|
public function provideMatchAndRemove() {
|
|
return [
|
|
'identifier is not automatically valid syntax' => [ 'ID' ],
|
|
'two matches' => [ 'xSyNoNyMxSyNoNyMx', [ 'ID' => false ], 'xxx' ],
|
|
'this method does not support parameters' => [ 'xalt=x' ],
|
|
'unexpected behavior when used with parameters' => [ 'xalt=$1x', [ 'ID' => false ], 'xx' ],
|
|
'T321234' => [ "\x83", [] ],
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @dataProvider provideMatchAndRemoveMultiple
|
|
*/
|
|
public function testMatchAndRemoveMultiple(
|
|
string $input,
|
|
array $expectedMatches = [],
|
|
string $expectedText = null
|
|
) {
|
|
$array = new MagicWordArray( array_keys( self::MAGIC_WORDS ), $this->getFactory() );
|
|
$text = $input;
|
|
$this->assertSame( $expectedMatches, $array->matchAndRemove( $text ) );
|
|
$this->assertSame( $expectedText ?? $input, $text );
|
|
}
|
|
|
|
public function provideMatchAndRemoveMultiple() {
|
|
return [
|
|
[
|
|
'x__NOTC__x__NOTOC__x__NOTITLECONVERT__x',
|
|
[ 'notitleconvert' => false, 'notoc' => false ],
|
|
'xxxx',
|
|
],
|
|
[
|
|
'__NOTOC__NOTITLECONVERT__NOTOC____NOTOC__',
|
|
[ 'notoc' => false ],
|
|
'NOTITLECONVERT',
|
|
],
|
|
[
|
|
'[[File:X.png|thumb|Alt]]',
|
|
[ 'img_thumbnail' => false ],
|
|
'[[File:X.png||Alt]]',
|
|
],
|
|
[
|
|
'[[File:X.png|thumb|100px]]',
|
|
[ 'img_thumbnail' => false ],
|
|
'[[File:X.png||100px]]',
|
|
],
|
|
[
|
|
'[[File:X.png|upright=1.2|thumbnail]]',
|
|
[ 'img_upright' => false, 'img_thumbnail' => false ],
|
|
// Note: The matchAndRemove method is obviously not designed for this, still we want
|
|
// to document the status quo
|
|
'[[File:X.png|=1.2|nail]]',
|
|
],
|
|
];
|
|
}
|
|
|
|
private function getFactory( ?bool $caseSensitive = null ): MagicWordFactory {
|
|
$language = $this->createNoOpMock( Language::class, [ 'lc', '__debugInfo' ] );
|
|
$language->method( 'lc' )->willReturnCallback( static fn ( $s ) => strtolower( $s ) );
|
|
|
|
$factory = $this->createNoOpMock( MagicWordFactory::class, [ 'getContentLanguage', 'get' ] );
|
|
$factory->method( 'getContentLanguage' )->willReturn( $language );
|
|
$factory->method( 'get' )->willReturnCallback(
|
|
static function ( $id ) use ( $caseSensitive, $language ) {
|
|
$data = self::MAGIC_WORDS[$id] ?? [ 0, 'SYNONYM', 'alt=$1' ];
|
|
$caseSensitive ??= (bool)$data[0];
|
|
return new MagicWord( $id, array_slice( $data, 1 ), $caseSensitive, $language );
|
|
}
|
|
);
|
|
return $factory;
|
|
}
|
|
|
|
}
|