wiki.techinc.nl/tests/phpunit/unit/includes/GlobalFunctions/WfEscapeWikiTextTest.php
C. Scott Ananian 3fc635dcb2 wfEscapeWikiText: protect string against wikitext tokens formed at edges
The result of wfEscapeWikiText() did not consider inclusion of a string
within table context, and so `!!` and `!` at start of line were not
previously escaped.

Additionally, if you are inserting the result of wfEscapeWikiText()
into a wikitext string, the result can inadventently form new tokens
at the left and right edges.  For example:

   {|
   |<escaped string>

When the escaped string starts with a `-` or `+` it forms the `|-` or `|+`
token and changes the parse.

On the right side:

  <escaped string>_FORCETOC__

is not a magic word unless the escaped string ends with a `_`.

Token-gluing can also occur with the double newline token denoting
a paragraph separator.

This patch adds additional cases to wfEscapeWikiText to armor
its output against all contexts in which the output may be
inserted.

Depends-On: I56147520620d56a4b98c2f22d1f469b49aed761f
Change-Id: I34f2fa8c329e6f6771453b2f94dc4afbec31dac8
2024-01-26 17:48:46 -05:00

123 lines
2.8 KiB
PHP

<?php
/**
* @group GlobalFunctions
* @covers ::wfEscapeWikiText
*/
class WfEscapeWikiTextTest extends MediaWikiUnitTestCase {
/**
* @dataProvider provideEscape
*/
public function testEscape( $input, $expected ) {
// save global
global $wgEnableMagicLinks;
$old = $wgEnableMagicLinks;
$wgEnableMagicLinks = [];
$actual = wfEscapeWikiText( $input );
// Sanity check that the output can be decoded back to the input
// input as well.
$decoded = html_entity_decode( $actual, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 );
$this->assertEquals( $decoded, $input );
// And that the output was what we expected
$this->assertEquals( $expected, $actual );
// restore global
$wgEnableMagicLinks = $old;
}
public function provideEscape() {
return [
'empty string' => [
'',
'',
],
'no escapes' => [
'a',
'a',
],
'braces and brackets' => [
'[[WikiLink]] {{Template}} <html>',
'&#91;&#91;WikiLink&#93;&#93; &#123;&#123;Template&#125;&#125; &#60;html&#62;',
],
'quotes' => [
'"\'',
'&#34;&#39;',
],
'tokens' => [
'{| |- |+ !! ~~~~~ __FOO__',
'&#123;&#124; &#124;- &#124;+ &#33;! ~~&#126;~~ _&#95;FOO_&#95;',
],
'start of line' => [
"* foo\n! bar\n# bat\n:baz\n pre\n----",
"&#42; foo\n&#33; bar\n&#35; bat\n&#58;baz\n&#32;pre\n&#45;---",
],
'paragraph separators' => [
"a\n\n\n\nb",
"a\n&#10;\n&#10;b",
],
'language converter' => [
'-{ foo ; bar }-',
'&#45;&#123; foo &#59; bar &#125;-',
],
'left-side context: |+' => [
'+ foo + bar',
'&#43; foo + bar',
],
'left-side context: |-' => [
'- foo - bar',
'&#45; foo - bar',
],
'left-side context: __FOO__' => [
'_FOO__',
'&#95;FOO_&#95;',
],
'left-side context: ~~~' => [
'~~ long string here',
'&#126;~ long string here',
],
'left-side context: newlines' => [
"\n\n\nFoo",
"&#10;\n&#10;Foo",
],
'right-side context: ~~~' => [
'long string here ~~',
'long string here ~&#126;',
],
'right-side context: __FOO__' => [
'__FOO_',
'&#95;&#95;FOO&#95;',
],
'right-side context: newlines' => [
"foo\n\n\n",
"foo\n&#10;&#10;",
],
// A single character input needs to be protected against both
// left-side context and right-side context.
'both-side context: +' => [ // | + + (left side)
'+',
'&#43;',
],
'both-side context: -' => [ // | + - (left side)
'-',
'&#45;',
],
'both-side context: _' => [ // _ + _FOO as well as __FOO_ + _
'_',
'&#95;',
],
'both-side context: ~' => [ // ~ + ~~ as well as ~~ + ~
'~',
'&#126;',
],
'both-side context: \\n' => [ // \n + \n
"\n",
'&#10;',
],
'both-side context: \\t' => [ // \n + \t + \n becomes paragraph break
"\t",
'&#9;',
],
];
}
}