Protect language converter markup in the preprocessor.

This ensures that `{{echo|-{R|foo}-}}` is parsed correctly as
a template invocation with a single argument, not as two separate
arguments split by the `|`.

Bug: T146304
Change-Id: I709d007c70a3fd19264790055042c615999b2f67
This commit is contained in:
C. Scott Ananian 2016-09-20 18:26:32 -04:00 committed by Tim Starling
parent 1676448145
commit 2877402276
4 changed files with 76 additions and 13 deletions

View file

@ -48,7 +48,13 @@ abstract class Preprocessor {
'names' => [ 2 => null ],
'min' => 2,
'max' => 2,
]
],
'-{' => [
'end' => '}-',
'names' => [ 1 => null ],
'min' => 1,
'max' => 1,
],
];
/**

View file

@ -193,6 +193,8 @@ class Preprocessor_DOM extends Preprocessor {
* @return string
*/
public function preprocessToXml( $text, $flags = 0 ) {
global $wgDisableLangConversion;
$forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
$xmlishElements = $this->parser->getStripList();
@ -220,6 +222,10 @@ class Preprocessor_DOM extends Preprocessor {
$stack = new PPDStack;
$searchBase = "[{<\n"; # }
if ( !$wgDisableLangConversion ) {
$searchBase .= '-';
}
// For fast reverse searches
$revText = strrev( $text );
$lengthText = strlen( $text );
@ -298,7 +304,10 @@ class Preprocessor_DOM extends Preprocessor {
break;
}
} else {
$curChar = $text[$i];
$curChar = $curTwoChar = $text[$i];
if ( ( $i + 1 ) < $lengthText ) {
$curTwoChar .= $text[$i + 1];
}
if ( $curChar == '|' ) {
$found = 'pipe';
} elseif ( $curChar == '=' ) {
@ -311,11 +320,20 @@ class Preprocessor_DOM extends Preprocessor {
} else {
$found = 'line-start';
}
} elseif ( $curTwoChar == $currentClosing ) {
$found = 'close';
$curChar = $curTwoChar;
} elseif ( $curChar == $currentClosing ) {
$found = 'close';
} elseif ( isset( $this->rules[$curTwoChar] ) ) {
$curChar = $curTwoChar;
$found = 'open';
$rule = $this->rules[$curChar];
} elseif ( isset( $this->rules[$curChar] ) ) {
$found = 'open';
$rule = $this->rules[$curChar];
} elseif ( $curChar == '-' ) {
$found = 'dash';
} else {
# Some versions of PHP have a strcspn which stops on null characters
# Ignore and continue
@ -595,7 +613,8 @@ class Preprocessor_DOM extends Preprocessor {
// input pointer.
} elseif ( $found == 'open' ) {
# count opening brace characters
$count = strspn( $text, $curChar, $i );
$curLen = strlen( $curChar );
$count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i );
# we need to add to stack only if opening brace count is enough for one of the rules
if ( $count >= $rule['min'] ) {
@ -615,12 +634,13 @@ class Preprocessor_DOM extends Preprocessor {
# Add literal brace(s)
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
}
$i += $count;
$i += $curLen * $count;
} elseif ( $found == 'close' ) {
$piece = $stack->top;
# lets check if there are enough characters for closing brace
$maxCount = $piece->count;
$count = strspn( $text, $curChar, $i, $maxCount );
$curLen = strlen( $curChar );
$count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount );
# check for maximum matching characters (if there are 5 closing
# characters, we will probably need only 3 - depending on the rules)
@ -643,7 +663,7 @@ class Preprocessor_DOM extends Preprocessor {
# No matching element found in callback array
# Output a literal closing brace and continue
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
$i += $count;
$i += $curLen * $count;
continue;
}
$name = $rule['names'][$matchingCount];
@ -682,7 +702,7 @@ class Preprocessor_DOM extends Preprocessor {
}
# Advance input pointer
$i += $matchingCount;
$i += $curLen * $matchingCount;
# Unwind the stack
$stack->pop();
@ -716,6 +736,9 @@ class Preprocessor_DOM extends Preprocessor {
$stack->getCurrentPart()->eqpos = strlen( $accum );
$accum .= '=';
++$i;
} elseif ( $found == 'dash' ) {
$accum .= '-';
++$i;
}
}

View file

@ -117,6 +117,8 @@ class Preprocessor_Hash extends Preprocessor {
* @return PPNode_Hash_Tree
*/
public function preprocessToObj( $text, $flags = 0 ) {
global $wgDisableLangConversion;
$tree = $this->cacheGetTree( $text, $flags );
if ( $tree !== false ) {
$store = json_decode( $tree );
@ -152,6 +154,10 @@ class Preprocessor_Hash extends Preprocessor {
$stack = new PPDStack_Hash;
$searchBase = "[{<\n";
if ( !$wgDisableLangConversion ) {
$searchBase .= '-';
}
// For fast reverse searches
$revText = strrev( $text );
$lengthText = strlen( $text );
@ -229,7 +235,10 @@ class Preprocessor_Hash extends Preprocessor {
break;
}
} else {
$curChar = $text[$i];
$curChar = $curTwoChar = $text[$i];
if ( ( $i + 1 ) < $lengthText ) {
$curTwoChar .= $text[$i + 1];
}
if ( $curChar == '|' ) {
$found = 'pipe';
} elseif ( $curChar == '=' ) {
@ -242,11 +251,20 @@ class Preprocessor_Hash extends Preprocessor {
} else {
$found = 'line-start';
}
} elseif ( $curTwoChar == $currentClosing ) {
$found = 'close';
$curChar = $curTwoChar;
} elseif ( $curChar == $currentClosing ) {
$found = 'close';
} elseif ( isset( $this->rules[$curTwoChar] ) ) {
$curChar = $curTwoChar;
$found = 'open';
$rule = $this->rules[$curChar];
} elseif ( isset( $this->rules[$curChar] ) ) {
$found = 'open';
$rule = $this->rules[$curChar];
} elseif ( $curChar == '-' ) {
$found = 'dash';
} else {
# Some versions of PHP have a strcspn which stops on null characters
# Ignore and continue
@ -538,7 +556,8 @@ class Preprocessor_Hash extends Preprocessor {
// input pointer.
} elseif ( $found == 'open' ) {
# count opening brace characters
$count = strspn( $text, $curChar, $i );
$curLen = strlen( $curChar );
$count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i );
# we need to add to stack only if opening brace count is enough for one of the rules
if ( $count >= $rule['min'] ) {
@ -557,12 +576,13 @@ class Preprocessor_Hash extends Preprocessor {
# Add literal brace(s)
self::addLiteral( $accum, str_repeat( $curChar, $count ) );
}
$i += $count;
$i += $curLen * $count;
} elseif ( $found == 'close' ) {
$piece = $stack->top;
# lets check if there are enough characters for closing brace
$maxCount = $piece->count;
$count = strspn( $text, $curChar, $i, $maxCount );
$curLen = strlen( $curChar );
$count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount );
# check for maximum matching characters (if there are 5 closing
# characters, we will probably need only 3 - depending on the rules)
@ -585,7 +605,7 @@ class Preprocessor_Hash extends Preprocessor {
# No matching element found in callback array
# Output a literal closing brace and continue
self::addLiteral( $accum, str_repeat( $curChar, $count ) );
$i += $count;
$i += $curLen * $count;
continue;
}
$name = $rule['names'][$matchingCount];
@ -627,7 +647,7 @@ class Preprocessor_Hash extends Preprocessor {
}
# Advance input pointer
$i += $matchingCount;
$i += $curLen * $matchingCount;
# Unwind the stack
$stack->pop();
@ -661,6 +681,9 @@ class Preprocessor_Hash extends Preprocessor {
$accum[] = [ 'equals', [ '=' ] ];
$stack->getCurrentPart()->eqpos = count( $accum ) - 1;
++$i;
} elseif ( $found == 'dash' ) {
self::addLiteral( $accum, '-' );
++$i;
}
}

View file

@ -20595,6 +20595,17 @@ language=sr variant=sr-ec
</p>
!! end
!! test
T146304: Don't break template parsing if language converter markup is in the parameter.
!! options
language=sr variant=sr-ec
!! wikitext
{{echo|-{R|foo}-}}
!! html/php
<p>foo
</p>
!! end
# FIXME: This test is currently broken in the PHP parser (bug 52661)
!! test
Don't break image parsing if language converter markup is in the caption.