wiki.techinc.nl/includes/parser/StripState.php
Subramanya Sastry c8a944a94b Add support to enable Scribunto & Parsoid to handle nowikis properly
* Lua modules have been written to inspect nowiki strip state markers
  and extract nowiki content to further process them. Callers might have
  used nowikis in arguments for any number of reasons including needing
  to have the argument be treated as raw text intead of wikitext.

  While we might add first-class typing features to wikitext, templates,
  extensions, and the like in the future which would let Parsoid process
  template arguments based on type info (rather than as wikitext always),
  we need a solution now to enable modules to work properly with Parsoid.

* The core issue is the decoupled model used by Parsoid where
  transclusions are preprocessed before further processing. Since
  nowikis cannot be processed and stripped during preprocessing,
  Lua modules don't have access to nowiki strip markers in this model.

* In this patch, we change extension tag processsing for nowikis.

  When generating HTML, nowikis are replaced with a 'nowiki' strip
  marker with the nowiki's "innerXML" (only tag contents).

  In this patch, during preprocessing, instead of adding a 'general'
  strip marker with the "outerXML" (tag contents and the tag wrapper),
  we add a 'nowiki' strip marker with its "outerXML".

* Since Parsoid (and any clients using the preprocessed output) will
  unstrip all strip markers, the shift from a general to nowiki
  strip marker won't make a difference.

* To support Scribunto and Lua modules unstrip usage, this patch adds
  new functionality to StripState to replace the (preprocessing-)nowiki
  strip markers with whatever its users want. So, Scribunto could
  pass in a callback that replaces these with the "innerXML" by
  stripping out the tag wrapper.

* Hat tip to Tim Starling for recommending this strategy.

* Updated strip state tests.

Bug: T272507
Bug: T299103
Depends-On: Id6ea611549e98893f53094116a3851e9c42b8dc8
Change-Id: Ied0295feab06027a8df885b3215435e596f0353b
2022-09-01 21:04:42 +00:00

267 lines
6.1 KiB
PHP

<?php
/**
* Holder for stripped items when parsing wiki markup.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Parser
*/
/**
* @todo document, briefly.
* @newable
* @ingroup Parser
*/
class StripState {
protected $data;
protected $regex;
protected $parser;
protected $circularRefGuard;
protected $depth = 0;
protected $highestDepth = 0;
protected $expandSize = 0;
protected $depthLimit = 20;
protected $sizeLimit = 5000000;
/**
* @stable to call
*
* @param Parser|null $parser
* @param array $options
*/
public function __construct( Parser $parser = null, $options = [] ) {
$this->data = [
'nowiki' => [],
'general' => []
];
$this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
$this->circularRefGuard = [];
$this->parser = $parser;
if ( isset( $options['depthLimit'] ) ) {
$this->depthLimit = $options['depthLimit'];
}
if ( isset( $options['sizeLimit'] ) ) {
$this->sizeLimit = $options['sizeLimit'];
}
}
/**
* Add a nowiki strip item
* @param string $marker
* @param string $value
*/
public function addNoWiki( $marker, $value ) {
$this->addItem( 'nowiki', $marker, $value );
}
/**
* @param string $marker
* @param string $value
*/
public function addGeneral( $marker, $value ) {
$this->addItem( 'general', $marker, $value );
}
/**
* @throws MWException
* @param string $type
* @param string $marker
* @param string $value
*/
protected function addItem( $type, $marker, $value ) {
if ( !preg_match( $this->regex, $marker, $m ) ) {
throw new MWException( "Invalid marker: $marker" );
}
$this->data[$type][$m[1]] = $value;
}
/**
* @param string $text
* @return mixed
*/
public function unstripGeneral( $text ) {
return $this->unstripType( 'general', $text );
}
/**
* @param string $text
* @return mixed
*/
public function unstripNoWiki( $text ) {
return $this->unstripType( 'nowiki', $text );
}
/**
* @param string $text
* @param callable $callback
* @return string
*/
public function replaceNoWikis( string $text, callable $callback ): string {
// Shortcut
if ( !count( $this->data['nowiki'] ) ) {
return $text;
}
$callback = function ( $m ) use ( $callback ) {
$marker = $m[1];
if ( isset( $this->data['nowiki'][$marker] ) ) {
$value = $this->data['nowiki'][$marker];
if ( $value instanceof Closure ) {
$value = $value();
}
$this->expandSize += strlen( $value );
if ( $this->expandSize > $this->sizeLimit ) {
return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
}
return call_user_func( $callback, $value );
} else {
return $m[0];
}
};
return preg_replace_callback( $this->regex, $callback, $text );
}
/**
* @param string $text
* @return mixed
*/
public function unstripBoth( $text ) {
$text = $this->unstripType( 'general', $text );
$text = $this->unstripType( 'nowiki', $text );
return $text;
}
/**
* @param string $type
* @param string $text
* @return mixed
*/
protected function unstripType( $type, $text ) {
// Shortcut
if ( !count( $this->data[$type] ) ) {
return $text;
}
$callback = function ( $m ) use ( $type ) {
$marker = $m[1];
if ( isset( $this->data[$type][$marker] ) ) {
if ( isset( $this->circularRefGuard[$marker] ) ) {
return $this->getWarning( 'parser-unstrip-loop-warning' );
}
if ( $this->depth > $this->highestDepth ) {
$this->highestDepth = $this->depth;
}
if ( $this->depth >= $this->depthLimit ) {
return $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit );
}
$value = $this->data[$type][$marker];
if ( $value instanceof Closure ) {
$value = $value();
}
$this->expandSize += strlen( $value );
if ( $this->expandSize > $this->sizeLimit ) {
return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
}
$this->circularRefGuard[$marker] = true;
$this->depth++;
$ret = $this->unstripType( $type, $value );
$this->depth--;
unset( $this->circularRefGuard[$marker] );
return $ret;
} else {
return $m[0];
}
};
$text = preg_replace_callback( $this->regex, $callback, $text );
return $text;
}
/**
* Get warning HTML and register a limitation warning with the parser
*
* @param string $type
* @param int|string $max
* @return string
*/
private function getLimitationWarning( $type, $max = '' ) {
if ( $this->parser ) {
$this->parser->limitationWarn( $type, $max );
}
return $this->getWarning( "$type-warning", $max );
}
/**
* Get warning HTML
*
* @param string $message
* @param int|string $max
* @return string
*/
private function getWarning( $message, $max = '' ) {
return '<span class="error">' .
wfMessage( $message )
->numParams( $max )->inContentLanguage()->text() .
'</span>';
}
/**
* Get an array of parameters to pass to ParserOutput::setLimitReportData()
*
* @internal Should only be called by Parser
* @return array
*/
public function getLimitReport() {
return [
[ 'limitreport-unstrip-depth',
[
$this->highestDepth,
$this->depthLimit
],
],
[ 'limitreport-unstrip-size',
[
$this->expandSize,
$this->sizeLimit
],
]
];
}
/**
* Remove any strip markers found in the given text.
*
* @param string $text
* @return string
*/
public function killMarkers( $text ) {
return preg_replace( $this->regex, '', $text );
}
}