* Lua modules have been written to inspect nowiki strip state markers and extract nowiki content to further process them. Callers might have used nowikis in arguments for any number of reasons including needing to have the argument be treated as raw text intead of wikitext. While we might add first-class typing features to wikitext, templates, extensions, and the like in the future which would let Parsoid process template arguments based on type info (rather than as wikitext always), we need a solution now to enable modules to work properly with Parsoid. * The core issue is the decoupled model used by Parsoid where transclusions are preprocessed before further processing. Since nowikis cannot be processed and stripped during preprocessing, Lua modules don't have access to nowiki strip markers in this model. * In this patch, we change extension tag processsing for nowikis. When generating HTML, nowikis are replaced with a 'nowiki' strip marker with the nowiki's "innerXML" (only tag contents). In this patch, during preprocessing, instead of adding a 'general' strip marker with the "outerXML" (tag contents and the tag wrapper), we add a 'nowiki' strip marker with its "outerXML". * Since Parsoid (and any clients using the preprocessed output) will unstrip all strip markers, the shift from a general to nowiki strip marker won't make a difference. * To support Scribunto and Lua modules unstrip usage, this patch adds new functionality to StripState to replace the (preprocessing-)nowiki strip markers with whatever its users want. So, Scribunto could pass in a callback that replaces these with the "innerXML" by stripping out the tag wrapper. * Hat tip to Tim Starling for recommending this strategy. * Updated strip state tests. Bug: T272507 Bug: T299103 Depends-On: Id6ea611549e98893f53094116a3851e9c42b8dc8 Change-Id: Ied0295feab06027a8df885b3215435e596f0353b
267 lines
6.1 KiB
PHP
267 lines
6.1 KiB
PHP
<?php
|
|
/**
|
|
* Holder for stripped items when parsing wiki markup.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Parser
|
|
*/
|
|
|
|
/**
|
|
* @todo document, briefly.
|
|
* @newable
|
|
* @ingroup Parser
|
|
*/
|
|
class StripState {
|
|
protected $data;
|
|
protected $regex;
|
|
|
|
protected $parser;
|
|
|
|
protected $circularRefGuard;
|
|
protected $depth = 0;
|
|
protected $highestDepth = 0;
|
|
protected $expandSize = 0;
|
|
|
|
protected $depthLimit = 20;
|
|
protected $sizeLimit = 5000000;
|
|
|
|
/**
|
|
* @stable to call
|
|
*
|
|
* @param Parser|null $parser
|
|
* @param array $options
|
|
*/
|
|
public function __construct( Parser $parser = null, $options = [] ) {
|
|
$this->data = [
|
|
'nowiki' => [],
|
|
'general' => []
|
|
];
|
|
$this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
|
|
$this->circularRefGuard = [];
|
|
$this->parser = $parser;
|
|
|
|
if ( isset( $options['depthLimit'] ) ) {
|
|
$this->depthLimit = $options['depthLimit'];
|
|
}
|
|
if ( isset( $options['sizeLimit'] ) ) {
|
|
$this->sizeLimit = $options['sizeLimit'];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add a nowiki strip item
|
|
* @param string $marker
|
|
* @param string $value
|
|
*/
|
|
public function addNoWiki( $marker, $value ) {
|
|
$this->addItem( 'nowiki', $marker, $value );
|
|
}
|
|
|
|
/**
|
|
* @param string $marker
|
|
* @param string $value
|
|
*/
|
|
public function addGeneral( $marker, $value ) {
|
|
$this->addItem( 'general', $marker, $value );
|
|
}
|
|
|
|
/**
|
|
* @throws MWException
|
|
* @param string $type
|
|
* @param string $marker
|
|
* @param string $value
|
|
*/
|
|
protected function addItem( $type, $marker, $value ) {
|
|
if ( !preg_match( $this->regex, $marker, $m ) ) {
|
|
throw new MWException( "Invalid marker: $marker" );
|
|
}
|
|
|
|
$this->data[$type][$m[1]] = $value;
|
|
}
|
|
|
|
/**
|
|
* @param string $text
|
|
* @return mixed
|
|
*/
|
|
public function unstripGeneral( $text ) {
|
|
return $this->unstripType( 'general', $text );
|
|
}
|
|
|
|
/**
|
|
* @param string $text
|
|
* @return mixed
|
|
*/
|
|
public function unstripNoWiki( $text ) {
|
|
return $this->unstripType( 'nowiki', $text );
|
|
}
|
|
|
|
/**
|
|
* @param string $text
|
|
* @param callable $callback
|
|
* @return string
|
|
*/
|
|
public function replaceNoWikis( string $text, callable $callback ): string {
|
|
// Shortcut
|
|
if ( !count( $this->data['nowiki'] ) ) {
|
|
return $text;
|
|
}
|
|
|
|
$callback = function ( $m ) use ( $callback ) {
|
|
$marker = $m[1];
|
|
if ( isset( $this->data['nowiki'][$marker] ) ) {
|
|
$value = $this->data['nowiki'][$marker];
|
|
if ( $value instanceof Closure ) {
|
|
$value = $value();
|
|
}
|
|
|
|
$this->expandSize += strlen( $value );
|
|
if ( $this->expandSize > $this->sizeLimit ) {
|
|
return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
|
|
}
|
|
|
|
return call_user_func( $callback, $value );
|
|
} else {
|
|
return $m[0];
|
|
}
|
|
};
|
|
|
|
return preg_replace_callback( $this->regex, $callback, $text );
|
|
}
|
|
|
|
/**
|
|
* @param string $text
|
|
* @return mixed
|
|
*/
|
|
public function unstripBoth( $text ) {
|
|
$text = $this->unstripType( 'general', $text );
|
|
$text = $this->unstripType( 'nowiki', $text );
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* @param string $type
|
|
* @param string $text
|
|
* @return mixed
|
|
*/
|
|
protected function unstripType( $type, $text ) {
|
|
// Shortcut
|
|
if ( !count( $this->data[$type] ) ) {
|
|
return $text;
|
|
}
|
|
|
|
$callback = function ( $m ) use ( $type ) {
|
|
$marker = $m[1];
|
|
if ( isset( $this->data[$type][$marker] ) ) {
|
|
if ( isset( $this->circularRefGuard[$marker] ) ) {
|
|
return $this->getWarning( 'parser-unstrip-loop-warning' );
|
|
}
|
|
|
|
if ( $this->depth > $this->highestDepth ) {
|
|
$this->highestDepth = $this->depth;
|
|
}
|
|
if ( $this->depth >= $this->depthLimit ) {
|
|
return $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit );
|
|
}
|
|
|
|
$value = $this->data[$type][$marker];
|
|
if ( $value instanceof Closure ) {
|
|
$value = $value();
|
|
}
|
|
|
|
$this->expandSize += strlen( $value );
|
|
if ( $this->expandSize > $this->sizeLimit ) {
|
|
return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
|
|
}
|
|
|
|
$this->circularRefGuard[$marker] = true;
|
|
$this->depth++;
|
|
$ret = $this->unstripType( $type, $value );
|
|
$this->depth--;
|
|
unset( $this->circularRefGuard[$marker] );
|
|
|
|
return $ret;
|
|
} else {
|
|
return $m[0];
|
|
}
|
|
};
|
|
|
|
$text = preg_replace_callback( $this->regex, $callback, $text );
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Get warning HTML and register a limitation warning with the parser
|
|
*
|
|
* @param string $type
|
|
* @param int|string $max
|
|
* @return string
|
|
*/
|
|
private function getLimitationWarning( $type, $max = '' ) {
|
|
if ( $this->parser ) {
|
|
$this->parser->limitationWarn( $type, $max );
|
|
}
|
|
return $this->getWarning( "$type-warning", $max );
|
|
}
|
|
|
|
/**
|
|
* Get warning HTML
|
|
*
|
|
* @param string $message
|
|
* @param int|string $max
|
|
* @return string
|
|
*/
|
|
private function getWarning( $message, $max = '' ) {
|
|
return '<span class="error">' .
|
|
wfMessage( $message )
|
|
->numParams( $max )->inContentLanguage()->text() .
|
|
'</span>';
|
|
}
|
|
|
|
/**
|
|
* Get an array of parameters to pass to ParserOutput::setLimitReportData()
|
|
*
|
|
* @internal Should only be called by Parser
|
|
* @return array
|
|
*/
|
|
public function getLimitReport() {
|
|
return [
|
|
[ 'limitreport-unstrip-depth',
|
|
[
|
|
$this->highestDepth,
|
|
$this->depthLimit
|
|
],
|
|
],
|
|
[ 'limitreport-unstrip-size',
|
|
[
|
|
$this->expandSize,
|
|
$this->sizeLimit
|
|
],
|
|
]
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Remove any strip markers found in the given text.
|
|
*
|
|
* @param string $text
|
|
* @return string
|
|
*/
|
|
public function killMarkers( $text ) {
|
|
return preg_replace( $this->regex, '', $text );
|
|
}
|
|
}
|