The result of wfEscapeWikiText() did not consider inclusion of a string
within table context, and so `!!` and `!` at start of line were not
previously escaped.
Additionally, if you are inserting the result of wfEscapeWikiText()
into a wikitext string, the result can inadventently form new tokens
at the left and right edges. For example:
{|
|<escaped string>
When the escaped string starts with a `-` or `+` it forms the `|-` or `|+`
token and changes the parse.
On the right side:
<escaped string>_FORCETOC__
is not a magic word unless the escaped string ends with a `_`.
Token-gluing can also occur with the double newline token denoting
a paragraph separator.
This patch adds additional cases to wfEscapeWikiText to armor
its output against all contexts in which the output may be
inserted.
Depends-On: I56147520620d56a4b98c2f22d1f469b49aed761f
Change-Id: I34f2fa8c329e6f6771453b2f94dc4afbec31dac8
123 lines
2.8 KiB
PHP
123 lines
2.8 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @group GlobalFunctions
|
|
* @covers ::wfEscapeWikiText
|
|
*/
|
|
class WfEscapeWikiTextTest extends MediaWikiUnitTestCase {
|
|
/**
|
|
* @dataProvider provideEscape
|
|
*/
|
|
public function testEscape( $input, $expected ) {
|
|
// save global
|
|
global $wgEnableMagicLinks;
|
|
$old = $wgEnableMagicLinks;
|
|
$wgEnableMagicLinks = [];
|
|
|
|
$actual = wfEscapeWikiText( $input );
|
|
// Sanity check that the output can be decoded back to the input
|
|
// input as well.
|
|
$decoded = html_entity_decode( $actual, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 );
|
|
$this->assertEquals( $decoded, $input );
|
|
// And that the output was what we expected
|
|
$this->assertEquals( $expected, $actual );
|
|
|
|
// restore global
|
|
$wgEnableMagicLinks = $old;
|
|
}
|
|
|
|
public function provideEscape() {
|
|
return [
|
|
'empty string' => [
|
|
'',
|
|
'',
|
|
],
|
|
'no escapes' => [
|
|
'a',
|
|
'a',
|
|
],
|
|
'braces and brackets' => [
|
|
'[[WikiLink]] {{Template}} <html>',
|
|
'[[WikiLink]] {{Template}} <html>',
|
|
],
|
|
'quotes' => [
|
|
'"\'',
|
|
'"'',
|
|
],
|
|
'tokens' => [
|
|
'{| |- |+ !! ~~~~~ __FOO__',
|
|
'{| |- |+ !! ~~~~~ __FOO__',
|
|
],
|
|
'start of line' => [
|
|
"* foo\n! bar\n# bat\n:baz\n pre\n----",
|
|
"* foo\n! bar\n# bat\n:baz\n pre\n----",
|
|
],
|
|
'paragraph separators' => [
|
|
"a\n\n\n\nb",
|
|
"a\n \n b",
|
|
],
|
|
'language converter' => [
|
|
'-{ foo ; bar }-',
|
|
'-{ foo ; bar }-',
|
|
],
|
|
'left-side context: |+' => [
|
|
'+ foo + bar',
|
|
'+ foo + bar',
|
|
],
|
|
'left-side context: |-' => [
|
|
'- foo - bar',
|
|
'- foo - bar',
|
|
],
|
|
'left-side context: __FOO__' => [
|
|
'_FOO__',
|
|
'_FOO__',
|
|
],
|
|
'left-side context: ~~~' => [
|
|
'~~ long string here',
|
|
'~~ long string here',
|
|
],
|
|
'left-side context: newlines' => [
|
|
"\n\n\nFoo",
|
|
" \n Foo",
|
|
],
|
|
'right-side context: ~~~' => [
|
|
'long string here ~~',
|
|
'long string here ~~',
|
|
],
|
|
'right-side context: __FOO__' => [
|
|
'__FOO_',
|
|
'__FOO_',
|
|
],
|
|
'right-side context: newlines' => [
|
|
"foo\n\n\n",
|
|
"foo\n ",
|
|
],
|
|
// A single character input needs to be protected against both
|
|
// left-side context and right-side context.
|
|
'both-side context: +' => [ // | + + (left side)
|
|
'+',
|
|
'+',
|
|
],
|
|
'both-side context: -' => [ // | + - (left side)
|
|
'-',
|
|
'-',
|
|
],
|
|
'both-side context: _' => [ // _ + _FOO as well as __FOO_ + _
|
|
'_',
|
|
'_',
|
|
],
|
|
'both-side context: ~' => [ // ~ + ~~ as well as ~~ + ~
|
|
'~',
|
|
'~',
|
|
],
|
|
'both-side context: \\n' => [ // \n + \n
|
|
"\n",
|
|
' ',
|
|
],
|
|
'both-side context: \\t' => [ // \n + \t + \n becomes paragraph break
|
|
"\t",
|
|
'	',
|
|
],
|
|
];
|
|
}
|
|
}
|