To prevent large template DOM caches from sending servers into swap, throw an exception when more than some number of DOM elements are parsed. Unfortunately, it wasn't possible to return a normal error message, because it broke PST and extractSections and corrupted the article text. It's safer to refuse to save the edit, and we don't have decent ways to do that short of throwing an exception. Ideally we would like to have an upstream patch that hooks libxml to allocate memory from PHP's request pool, then a fatal error would be raised instead of swapping. Change-Id: I4cb4f6fd313e1e0940b56cc5e586afd1bea9267a
1734 lines
48 KiB
PHP
1734 lines
48 KiB
PHP
<?php
|
|
/**
|
|
* Preprocessor using PHP's dom extension
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Parser
|
|
*/
|
|
|
|
/**
|
|
* @ingroup Parser
|
|
*/
|
|
class Preprocessor_DOM implements Preprocessor {
|
|
|
|
/**
|
|
* @var Parser
|
|
*/
|
|
var $parser;
|
|
|
|
var $memoryLimit;
|
|
|
|
const CACHE_VERSION = 1;
|
|
|
|
function __construct( $parser ) {
|
|
$this->parser = $parser;
|
|
$mem = ini_get( 'memory_limit' );
|
|
$this->memoryLimit = false;
|
|
if ( strval( $mem ) !== '' && $mem != -1 ) {
|
|
if ( preg_match( '/^\d+$/', $mem ) ) {
|
|
$this->memoryLimit = $mem;
|
|
} elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
|
|
$this->memoryLimit = $m[1] * 1048576;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return PPFrame_DOM
|
|
*/
|
|
function newFrame() {
|
|
return new PPFrame_DOM( $this );
|
|
}
|
|
|
|
/**
|
|
* @param $args array
|
|
* @return PPCustomFrame_DOM
|
|
*/
|
|
function newCustomFrame( $args ) {
|
|
return new PPCustomFrame_DOM( $this, $args );
|
|
}
|
|
|
|
/**
|
|
* @param $values
|
|
* @return PPNode_DOM
|
|
*/
|
|
function newPartNodeArray( $values ) {
|
|
//NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
|
|
$xml = "<list>";
|
|
|
|
foreach ( $values as $k => $val ) {
|
|
|
|
if ( is_int( $k ) ) {
|
|
$xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>";
|
|
} else {
|
|
$xml .= "<part><name>" . htmlspecialchars( $k ) . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>";
|
|
}
|
|
}
|
|
|
|
$xml .= "</list>";
|
|
|
|
$dom = new DOMDocument();
|
|
$dom->loadXML( $xml );
|
|
$root = $dom->documentElement;
|
|
|
|
$node = new PPNode_DOM( $root->childNodes );
|
|
return $node;
|
|
}
|
|
|
|
/**
|
|
* @throws MWException
|
|
* @return bool
|
|
*/
|
|
function memCheck() {
|
|
if ( $this->memoryLimit === false ) {
|
|
return true;
|
|
}
|
|
$usage = memory_get_usage();
|
|
if ( $usage > $this->memoryLimit * 0.9 ) {
|
|
$limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
|
|
throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
|
|
}
|
|
return $usage <= $this->memoryLimit * 0.8;
|
|
}
|
|
|
|
/**
|
|
* Preprocess some wikitext and return the document tree.
|
|
* This is the ghost of Parser::replace_variables().
|
|
*
|
|
* @param $text String: the text to parse
|
|
* @param $flags Integer: bitwise combination of:
|
|
* Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being
|
|
* included. Default is to assume a direct page view.
|
|
*
|
|
* The generated DOM tree must depend only on the input text and the flags.
|
|
* The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
|
|
*
|
|
* Any flag added to the $flags parameter here, or any other parameter liable to cause a
|
|
* change in the DOM tree for a given text, must be passed through the section identifier
|
|
* in the section edit link and thus back to extractSections().
|
|
*
|
|
* The output of this function is currently only cached in process memory, but a persistent
|
|
* cache may be implemented at a later date which takes further advantage of these strict
|
|
* dependency requirements.
|
|
*
|
|
* @return PPNode_DOM
|
|
*/
|
|
function preprocessToObj( $text, $flags = 0 ) {
|
|
wfProfileIn( __METHOD__ );
|
|
global $wgMemc, $wgPreprocessorCacheThreshold;
|
|
|
|
$xml = false;
|
|
$cacheable = ( $wgPreprocessorCacheThreshold !== false
|
|
&& strlen( $text ) > $wgPreprocessorCacheThreshold );
|
|
if ( $cacheable ) {
|
|
wfProfileIn( __METHOD__.'-cacheable' );
|
|
|
|
$cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
|
|
$cacheValue = $wgMemc->get( $cacheKey );
|
|
if ( $cacheValue ) {
|
|
$version = substr( $cacheValue, 0, 8 );
|
|
if ( intval( $version ) == self::CACHE_VERSION ) {
|
|
$xml = substr( $cacheValue, 8 );
|
|
// From the cache
|
|
wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
|
|
}
|
|
}
|
|
}
|
|
if ( $xml === false ) {
|
|
if ( $cacheable ) {
|
|
wfProfileIn( __METHOD__.'-cache-miss' );
|
|
$xml = $this->preprocessToXml( $text, $flags );
|
|
$cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
|
|
$wgMemc->set( $cacheKey, $cacheValue, 86400 );
|
|
wfProfileOut( __METHOD__.'-cache-miss' );
|
|
wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
|
|
} else {
|
|
$xml = $this->preprocessToXml( $text, $flags );
|
|
}
|
|
|
|
}
|
|
|
|
// Fail if the number of elements exceeds acceptable limits
|
|
// Do not attempt to generate the DOM
|
|
$this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
|
|
$max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
|
|
if ( $this->parser->mGeneratedPPNodeCount > $max ) {
|
|
throw new MWException( __METHOD__.': generated node count limit exceeded' );
|
|
}
|
|
|
|
wfProfileIn( __METHOD__.'-loadXML' );
|
|
$dom = new DOMDocument;
|
|
wfSuppressWarnings();
|
|
$result = $dom->loadXML( $xml );
|
|
wfRestoreWarnings();
|
|
if ( !$result ) {
|
|
// Try running the XML through UtfNormal to get rid of invalid characters
|
|
$xml = UtfNormal::cleanUp( $xml );
|
|
// 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
|
|
$result = $dom->loadXML( $xml, 1 << 19 );
|
|
if ( !$result ) {
|
|
throw new MWException( __METHOD__.' generated invalid XML' );
|
|
}
|
|
}
|
|
$obj = new PPNode_DOM( $dom->documentElement );
|
|
wfProfileOut( __METHOD__.'-loadXML' );
|
|
if ( $cacheable ) {
|
|
wfProfileOut( __METHOD__.'-cacheable' );
|
|
}
|
|
wfProfileOut( __METHOD__ );
|
|
return $obj;
|
|
}
|
|
|
|
/**
|
|
* @param $text string
|
|
* @param $flags int
|
|
* @return string
|
|
*/
|
|
function preprocessToXml( $text, $flags = 0 ) {
|
|
wfProfileIn( __METHOD__ );
|
|
$rules = array(
|
|
'{' => array(
|
|
'end' => '}',
|
|
'names' => array(
|
|
2 => 'template',
|
|
3 => 'tplarg',
|
|
),
|
|
'min' => 2,
|
|
'max' => 3,
|
|
),
|
|
'[' => array(
|
|
'end' => ']',
|
|
'names' => array( 2 => null ),
|
|
'min' => 2,
|
|
'max' => 2,
|
|
)
|
|
);
|
|
|
|
$forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
|
|
|
|
$xmlishElements = $this->parser->getStripList();
|
|
$enableOnlyinclude = false;
|
|
if ( $forInclusion ) {
|
|
$ignoredTags = array( 'includeonly', '/includeonly' );
|
|
$ignoredElements = array( 'noinclude' );
|
|
$xmlishElements[] = 'noinclude';
|
|
if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
|
|
$enableOnlyinclude = true;
|
|
}
|
|
} else {
|
|
$ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
|
|
$ignoredElements = array( 'includeonly' );
|
|
$xmlishElements[] = 'includeonly';
|
|
}
|
|
$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
|
|
|
|
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
|
|
$elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
|
|
|
|
$stack = new PPDStack;
|
|
|
|
$searchBase = "[{<\n"; #}
|
|
$revText = strrev( $text ); // For fast reverse searches
|
|
$lengthText = strlen( $text );
|
|
|
|
$i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
|
|
$accum =& $stack->getAccum(); # Current accumulator
|
|
$accum = '<root>';
|
|
$findEquals = false; # True to find equals signs in arguments
|
|
$findPipe = false; # True to take notice of pipe characters
|
|
$headingIndex = 1;
|
|
$inHeading = false; # True if $i is inside a possible heading
|
|
$noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
|
|
$findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
|
|
$fakeLineStart = true; # Do a line-start run without outputting an LF character
|
|
|
|
while ( true ) {
|
|
//$this->memCheck();
|
|
|
|
if ( $findOnlyinclude ) {
|
|
// Ignore all input up to the next <onlyinclude>
|
|
$startPos = strpos( $text, '<onlyinclude>', $i );
|
|
if ( $startPos === false ) {
|
|
// Ignored section runs to the end
|
|
$accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
|
|
break;
|
|
}
|
|
$tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
|
|
$accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
|
|
$i = $tagEndPos;
|
|
$findOnlyinclude = false;
|
|
}
|
|
|
|
if ( $fakeLineStart ) {
|
|
$found = 'line-start';
|
|
$curChar = '';
|
|
} else {
|
|
# Find next opening brace, closing brace or pipe
|
|
$search = $searchBase;
|
|
if ( $stack->top === false ) {
|
|
$currentClosing = '';
|
|
} else {
|
|
$currentClosing = $stack->top->close;
|
|
$search .= $currentClosing;
|
|
}
|
|
if ( $findPipe ) {
|
|
$search .= '|';
|
|
}
|
|
if ( $findEquals ) {
|
|
// First equals will be for the template
|
|
$search .= '=';
|
|
}
|
|
$rule = null;
|
|
# Output literal section, advance input counter
|
|
$literalLength = strcspn( $text, $search, $i );
|
|
if ( $literalLength > 0 ) {
|
|
$accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
|
|
$i += $literalLength;
|
|
}
|
|
if ( $i >= $lengthText ) {
|
|
if ( $currentClosing == "\n" ) {
|
|
// Do a past-the-end run to finish off the heading
|
|
$curChar = '';
|
|
$found = 'line-end';
|
|
} else {
|
|
# All done
|
|
break;
|
|
}
|
|
} else {
|
|
$curChar = $text[$i];
|
|
if ( $curChar == '|' ) {
|
|
$found = 'pipe';
|
|
} elseif ( $curChar == '=' ) {
|
|
$found = 'equals';
|
|
} elseif ( $curChar == '<' ) {
|
|
$found = 'angle';
|
|
} elseif ( $curChar == "\n" ) {
|
|
if ( $inHeading ) {
|
|
$found = 'line-end';
|
|
} else {
|
|
$found = 'line-start';
|
|
}
|
|
} elseif ( $curChar == $currentClosing ) {
|
|
$found = 'close';
|
|
} elseif ( isset( $rules[$curChar] ) ) {
|
|
$found = 'open';
|
|
$rule = $rules[$curChar];
|
|
} else {
|
|
# Some versions of PHP have a strcspn which stops on null characters
|
|
# Ignore and continue
|
|
++$i;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( $found == 'angle' ) {
|
|
$matches = false;
|
|
// Handle </onlyinclude>
|
|
if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
|
|
$findOnlyinclude = true;
|
|
continue;
|
|
}
|
|
|
|
// Determine element name
|
|
if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
|
|
// Element name missing or not listed
|
|
$accum .= '<';
|
|
++$i;
|
|
continue;
|
|
}
|
|
// Handle comments
|
|
if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
|
|
// To avoid leaving blank lines, when a comment is both preceded
|
|
// and followed by a newline (ignoring spaces), trim leading and
|
|
// trailing spaces and one of the newlines.
|
|
|
|
// Find the end
|
|
$endPos = strpos( $text, '-->', $i + 4 );
|
|
if ( $endPos === false ) {
|
|
// Unclosed comment in input, runs to end
|
|
$inner = substr( $text, $i );
|
|
$accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
|
|
$i = $lengthText;
|
|
} else {
|
|
// Search backwards for leading whitespace
|
|
$wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0;
|
|
// Search forwards for trailing whitespace
|
|
// $wsEnd will be the position of the last space (or the '>' if there's none)
|
|
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
|
|
// Eat the line if possible
|
|
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
|
|
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
|
|
// it's a possible beneficial b/c break.
|
|
if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
|
|
&& substr( $text, $wsEnd + 1, 1 ) == "\n" )
|
|
{
|
|
$startPos = $wsStart;
|
|
$endPos = $wsEnd + 1;
|
|
// Remove leading whitespace from the end of the accumulator
|
|
// Sanity check first though
|
|
$wsLength = $i - $wsStart;
|
|
if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
|
|
$accum = substr( $accum, 0, -$wsLength );
|
|
}
|
|
// Do a line-start run next time to look for headings after the comment
|
|
$fakeLineStart = true;
|
|
} else {
|
|
// No line to eat, just take the comment itself
|
|
$startPos = $i;
|
|
$endPos += 2;
|
|
}
|
|
|
|
if ( $stack->top ) {
|
|
$part = $stack->top->getCurrentPart();
|
|
if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
|
|
$part->visualEnd = $wsStart;
|
|
}
|
|
// Else comments abutting, no change in visual end
|
|
$part->commentEnd = $endPos;
|
|
}
|
|
$i = $endPos + 1;
|
|
$inner = substr( $text, $startPos, $endPos - $startPos + 1 );
|
|
$accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
|
|
}
|
|
continue;
|
|
}
|
|
$name = $matches[1];
|
|
$lowerName = strtolower( $name );
|
|
$attrStart = $i + strlen( $name ) + 1;
|
|
|
|
// Find end of tag
|
|
$tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
|
|
if ( $tagEndPos === false ) {
|
|
// Infinite backtrack
|
|
// Disable tag search to prevent worst-case O(N^2) performance
|
|
$noMoreGT = true;
|
|
$accum .= '<';
|
|
++$i;
|
|
continue;
|
|
}
|
|
|
|
// Handle ignored tags
|
|
if ( in_array( $lowerName, $ignoredTags ) ) {
|
|
$accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
|
|
$i = $tagEndPos + 1;
|
|
continue;
|
|
}
|
|
|
|
$tagStartPos = $i;
|
|
if ( $text[$tagEndPos-1] == '/' ) {
|
|
$attrEnd = $tagEndPos - 1;
|
|
$inner = null;
|
|
$i = $tagEndPos + 1;
|
|
$close = '';
|
|
} else {
|
|
$attrEnd = $tagEndPos;
|
|
// Find closing tag
|
|
if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
|
|
$text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
|
|
{
|
|
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
|
|
$i = $matches[0][1] + strlen( $matches[0][0] );
|
|
$close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
|
|
} else {
|
|
// No end tag -- let it run out to the end of the text.
|
|
$inner = substr( $text, $tagEndPos + 1 );
|
|
$i = $lengthText;
|
|
$close = '';
|
|
}
|
|
}
|
|
// <includeonly> and <noinclude> just become <ignore> tags
|
|
if ( in_array( $lowerName, $ignoredElements ) ) {
|
|
$accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
|
|
. '</ignore>';
|
|
continue;
|
|
}
|
|
|
|
$accum .= '<ext>';
|
|
if ( $attrEnd <= $attrStart ) {
|
|
$attr = '';
|
|
} else {
|
|
$attr = substr( $text, $attrStart, $attrEnd - $attrStart );
|
|
}
|
|
$accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
|
|
// Note that the attr element contains the whitespace between name and attribute,
|
|
// this is necessary for precise reconstruction during pre-save transform.
|
|
'<attr>' . htmlspecialchars( $attr ) . '</attr>';
|
|
if ( $inner !== null ) {
|
|
$accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
|
|
}
|
|
$accum .= $close . '</ext>';
|
|
} elseif ( $found == 'line-start' ) {
|
|
// Is this the start of a heading?
|
|
// Line break belongs before the heading element in any case
|
|
if ( $fakeLineStart ) {
|
|
$fakeLineStart = false;
|
|
} else {
|
|
$accum .= $curChar;
|
|
$i++;
|
|
}
|
|
|
|
$count = strspn( $text, '=', $i, 6 );
|
|
if ( $count == 1 && $findEquals ) {
|
|
// DWIM: This looks kind of like a name/value separator
|
|
// Let's let the equals handler have it and break the potential heading
|
|
// This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
|
|
} elseif ( $count > 0 ) {
|
|
$piece = array(
|
|
'open' => "\n",
|
|
'close' => "\n",
|
|
'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ),
|
|
'startPos' => $i,
|
|
'count' => $count );
|
|
$stack->push( $piece );
|
|
$accum =& $stack->getAccum();
|
|
$flags = $stack->getFlags();
|
|
extract( $flags );
|
|
$i += $count;
|
|
}
|
|
} elseif ( $found == 'line-end' ) {
|
|
$piece = $stack->top;
|
|
// A heading must be open, otherwise \n wouldn't have been in the search list
|
|
assert( '$piece->open == "\n"' );
|
|
$part = $piece->getCurrentPart();
|
|
// Search back through the input to see if it has a proper close
|
|
// Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
|
|
$wsLength = strspn( $revText, " \t", $lengthText - $i );
|
|
$searchStart = $i - $wsLength;
|
|
if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
|
|
// Comment found at line end
|
|
// Search for equals signs before the comment
|
|
$searchStart = $part->visualEnd;
|
|
$searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
|
|
}
|
|
$count = $piece->count;
|
|
$equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
|
|
if ( $equalsLength > 0 ) {
|
|
if ( $searchStart - $equalsLength == $piece->startPos ) {
|
|
// This is just a single string of equals signs on its own line
|
|
// Replicate the doHeadings behaviour /={count}(.+)={count}/
|
|
// First find out how many equals signs there really are (don't stop at 6)
|
|
$count = $equalsLength;
|
|
if ( $count < 3 ) {
|
|
$count = 0;
|
|
} else {
|
|
$count = min( 6, intval( ( $count - 1 ) / 2 ) );
|
|
}
|
|
} else {
|
|
$count = min( $equalsLength, $count );
|
|
}
|
|
if ( $count > 0 ) {
|
|
// Normal match, output <h>
|
|
$element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
|
|
$headingIndex++;
|
|
} else {
|
|
// Single equals sign on its own line, count=0
|
|
$element = $accum;
|
|
}
|
|
} else {
|
|
// No match, no <h>, just pass down the inner text
|
|
$element = $accum;
|
|
}
|
|
// Unwind the stack
|
|
$stack->pop();
|
|
$accum =& $stack->getAccum();
|
|
$flags = $stack->getFlags();
|
|
extract( $flags );
|
|
|
|
// Append the result to the enclosing accumulator
|
|
$accum .= $element;
|
|
// Note that we do NOT increment the input pointer.
|
|
// This is because the closing linebreak could be the opening linebreak of
|
|
// another heading. Infinite loops are avoided because the next iteration MUST
|
|
// hit the heading open case above, which unconditionally increments the
|
|
// input pointer.
|
|
} elseif ( $found == 'open' ) {
|
|
# count opening brace characters
|
|
$count = strspn( $text, $curChar, $i );
|
|
|
|
# we need to add to stack only if opening brace count is enough for one of the rules
|
|
if ( $count >= $rule['min'] ) {
|
|
# Add it to the stack
|
|
$piece = array(
|
|
'open' => $curChar,
|
|
'close' => $rule['end'],
|
|
'count' => $count,
|
|
'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
|
|
);
|
|
|
|
$stack->push( $piece );
|
|
$accum =& $stack->getAccum();
|
|
$flags = $stack->getFlags();
|
|
extract( $flags );
|
|
} else {
|
|
# Add literal brace(s)
|
|
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
|
|
}
|
|
$i += $count;
|
|
} elseif ( $found == 'close' ) {
|
|
$piece = $stack->top;
|
|
# lets check if there are enough characters for closing brace
|
|
$maxCount = $piece->count;
|
|
$count = strspn( $text, $curChar, $i, $maxCount );
|
|
|
|
# check for maximum matching characters (if there are 5 closing
|
|
# characters, we will probably need only 3 - depending on the rules)
|
|
$rule = $rules[$piece->open];
|
|
if ( $count > $rule['max'] ) {
|
|
# The specified maximum exists in the callback array, unless the caller
|
|
# has made an error
|
|
$matchingCount = $rule['max'];
|
|
} else {
|
|
# Count is less than the maximum
|
|
# Skip any gaps in the callback array to find the true largest match
|
|
# Need to use array_key_exists not isset because the callback can be null
|
|
$matchingCount = $count;
|
|
while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
|
|
--$matchingCount;
|
|
}
|
|
}
|
|
|
|
if ( $matchingCount <= 0 ) {
|
|
# No matching element found in callback array
|
|
# Output a literal closing brace and continue
|
|
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
|
|
$i += $count;
|
|
continue;
|
|
}
|
|
$name = $rule['names'][$matchingCount];
|
|
if ( $name === null ) {
|
|
// No element, just literal text
|
|
$element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
|
|
} else {
|
|
# Create XML element
|
|
# Note: $parts is already XML, does not need to be encoded further
|
|
$parts = $piece->parts;
|
|
$title = $parts[0]->out;
|
|
unset( $parts[0] );
|
|
|
|
# The invocation is at the start of the line if lineStart is set in
|
|
# the stack, and all opening brackets are used up.
|
|
if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
|
|
$attr = ' lineStart="1"';
|
|
} else {
|
|
$attr = '';
|
|
}
|
|
|
|
$element = "<$name$attr>";
|
|
$element .= "<title>$title</title>";
|
|
$argIndex = 1;
|
|
foreach ( $parts as $part ) {
|
|
if ( isset( $part->eqpos ) ) {
|
|
$argName = substr( $part->out, 0, $part->eqpos );
|
|
$argValue = substr( $part->out, $part->eqpos + 1 );
|
|
$element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
|
|
} else {
|
|
$element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
|
|
$argIndex++;
|
|
}
|
|
}
|
|
$element .= "</$name>";
|
|
}
|
|
|
|
# Advance input pointer
|
|
$i += $matchingCount;
|
|
|
|
# Unwind the stack
|
|
$stack->pop();
|
|
$accum =& $stack->getAccum();
|
|
|
|
# Re-add the old stack element if it still has unmatched opening characters remaining
|
|
if ( $matchingCount < $piece->count ) {
|
|
$piece->parts = array( new PPDPart );
|
|
$piece->count -= $matchingCount;
|
|
# do we still qualify for any callback with remaining count?
|
|
$names = $rules[$piece->open]['names'];
|
|
$skippedBraces = 0;
|
|
$enclosingAccum =& $accum;
|
|
while ( $piece->count ) {
|
|
if ( array_key_exists( $piece->count, $names ) ) {
|
|
$stack->push( $piece );
|
|
$accum =& $stack->getAccum();
|
|
break;
|
|
}
|
|
--$piece->count;
|
|
$skippedBraces ++;
|
|
}
|
|
$enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
|
|
}
|
|
$flags = $stack->getFlags();
|
|
extract( $flags );
|
|
|
|
# Add XML element to the enclosing accumulator
|
|
$accum .= $element;
|
|
} elseif ( $found == 'pipe' ) {
|
|
$findEquals = true; // shortcut for getFlags()
|
|
$stack->addPart();
|
|
$accum =& $stack->getAccum();
|
|
++$i;
|
|
} elseif ( $found == 'equals' ) {
|
|
$findEquals = false; // shortcut for getFlags()
|
|
$stack->getCurrentPart()->eqpos = strlen( $accum );
|
|
$accum .= '=';
|
|
++$i;
|
|
}
|
|
}
|
|
|
|
# Output any remaining unclosed brackets
|
|
foreach ( $stack->stack as $piece ) {
|
|
$stack->rootAccum .= $piece->breakSyntax();
|
|
}
|
|
$stack->rootAccum .= '</root>';
|
|
$xml = $stack->rootAccum;
|
|
|
|
wfProfileOut( __METHOD__ );
|
|
|
|
return $xml;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stack class to help Preprocessor::preprocessToObj()
|
|
* @ingroup Parser
|
|
*/
|
|
class PPDStack {
|
|
var $stack, $rootAccum;
|
|
|
|
/**
|
|
* @var PPDStack
|
|
*/
|
|
var $top;
|
|
var $out;
|
|
var $elementClass = 'PPDStackElement';
|
|
|
|
static $false = false;
|
|
|
|
function __construct() {
|
|
$this->stack = array();
|
|
$this->top = false;
|
|
$this->rootAccum = '';
|
|
$this->accum =& $this->rootAccum;
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*/
|
|
function count() {
|
|
return count( $this->stack );
|
|
}
|
|
|
|
function &getAccum() {
|
|
return $this->accum;
|
|
}
|
|
|
|
function getCurrentPart() {
|
|
if ( $this->top === false ) {
|
|
return false;
|
|
} else {
|
|
return $this->top->getCurrentPart();
|
|
}
|
|
}
|
|
|
|
function push( $data ) {
|
|
if ( $data instanceof $this->elementClass ) {
|
|
$this->stack[] = $data;
|
|
} else {
|
|
$class = $this->elementClass;
|
|
$this->stack[] = new $class( $data );
|
|
}
|
|
$this->top = $this->stack[ count( $this->stack ) - 1 ];
|
|
$this->accum =& $this->top->getAccum();
|
|
}
|
|
|
|
function pop() {
|
|
if ( !count( $this->stack ) ) {
|
|
throw new MWException( __METHOD__.': no elements remaining' );
|
|
}
|
|
$temp = array_pop( $this->stack );
|
|
|
|
if ( count( $this->stack ) ) {
|
|
$this->top = $this->stack[ count( $this->stack ) - 1 ];
|
|
$this->accum =& $this->top->getAccum();
|
|
} else {
|
|
$this->top = self::$false;
|
|
$this->accum =& $this->rootAccum;
|
|
}
|
|
return $temp;
|
|
}
|
|
|
|
function addPart( $s = '' ) {
|
|
$this->top->addPart( $s );
|
|
$this->accum =& $this->top->getAccum();
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
function getFlags() {
|
|
if ( !count( $this->stack ) ) {
|
|
return array(
|
|
'findEquals' => false,
|
|
'findPipe' => false,
|
|
'inHeading' => false,
|
|
);
|
|
} else {
|
|
return $this->top->getFlags();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @ingroup Parser
|
|
*/
|
|
class PPDStackElement {
|
|
var $open, // Opening character (\n for heading)
|
|
$close, // Matching closing character
|
|
$count, // Number of opening characters found (number of "=" for heading)
|
|
$parts, // Array of PPDPart objects describing pipe-separated parts.
|
|
$lineStart; // True if the open char appeared at the start of the input line. Not set for headings.
|
|
|
|
var $partClass = 'PPDPart';
|
|
|
|
function __construct( $data = array() ) {
|
|
$class = $this->partClass;
|
|
$this->parts = array( new $class );
|
|
|
|
foreach ( $data as $name => $value ) {
|
|
$this->$name = $value;
|
|
}
|
|
}
|
|
|
|
function &getAccum() {
|
|
return $this->parts[count($this->parts) - 1]->out;
|
|
}
|
|
|
|
function addPart( $s = '' ) {
|
|
$class = $this->partClass;
|
|
$this->parts[] = new $class( $s );
|
|
}
|
|
|
|
function getCurrentPart() {
|
|
return $this->parts[count($this->parts) - 1];
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
function getFlags() {
|
|
$partCount = count( $this->parts );
|
|
$findPipe = $this->open != "\n" && $this->open != '[';
|
|
return array(
|
|
'findPipe' => $findPipe,
|
|
'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
|
|
'inHeading' => $this->open == "\n",
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get the output string that would result if the close is not found.
|
|
*
|
|
* @return string
|
|
*/
|
|
function breakSyntax( $openingCount = false ) {
|
|
if ( $this->open == "\n" ) {
|
|
$s = $this->parts[0]->out;
|
|
} else {
|
|
if ( $openingCount === false ) {
|
|
$openingCount = $this->count;
|
|
}
|
|
$s = str_repeat( $this->open, $openingCount );
|
|
$first = true;
|
|
foreach ( $this->parts as $part ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$s .= '|';
|
|
}
|
|
$s .= $part->out;
|
|
}
|
|
}
|
|
return $s;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @ingroup Parser
|
|
*/
|
|
class PPDPart {
|
|
var $out; // Output accumulator string
|
|
|
|
// Optional member variables:
|
|
// eqpos Position of equals sign in output accumulator
|
|
// commentEnd Past-the-end input pointer for the last comment encountered
|
|
// visualEnd Past-the-end input pointer for the end of the accumulator minus comments
|
|
|
|
function __construct( $out = '' ) {
|
|
$this->out = $out;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* An expansion frame, used as a context to expand the result of preprocessToObj()
|
|
* @ingroup Parser
|
|
*/
|
|
class PPFrame_DOM implements PPFrame {
|
|
|
|
/**
|
|
* @var Preprocessor
|
|
*/
|
|
var $preprocessor;
|
|
|
|
/**
|
|
* @var Parser
|
|
*/
|
|
var $parser;
|
|
|
|
/**
|
|
* @var Title
|
|
*/
|
|
var $title;
|
|
var $titleCache;
|
|
|
|
/**
|
|
* Hashtable listing templates which are disallowed for expansion in this frame,
|
|
* having been encountered previously in parent frames.
|
|
*/
|
|
var $loopCheckHash;
|
|
|
|
/**
|
|
* Recursion depth of this frame, top = 0
|
|
* Note that this is NOT the same as expansion depth in expand()
|
|
*/
|
|
var $depth;
|
|
|
|
|
|
/**
|
|
* Construct a new preprocessor frame.
|
|
* @param $preprocessor Preprocessor The parent preprocessor
|
|
*/
|
|
function __construct( $preprocessor ) {
|
|
$this->preprocessor = $preprocessor;
|
|
$this->parser = $preprocessor->parser;
|
|
$this->title = $this->parser->mTitle;
|
|
$this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
|
|
$this->loopCheckHash = array();
|
|
$this->depth = 0;
|
|
}
|
|
|
|
/**
|
|
* Create a new child frame
|
|
* $args is optionally a multi-root PPNode or array containing the template arguments
|
|
*
|
|
* @return PPTemplateFrame_DOM
|
|
*/
|
|
function newChild( $args = false, $title = false, $indexOffset = 0 ) {
|
|
$namedArgs = array();
|
|
$numberedArgs = array();
|
|
if ( $title === false ) {
|
|
$title = $this->title;
|
|
}
|
|
if ( $args !== false ) {
|
|
$xpath = false;
|
|
if ( $args instanceof PPNode ) {
|
|
$args = $args->node;
|
|
}
|
|
foreach ( $args as $arg ) {
|
|
if ( $arg instanceof PPNode ) {
|
|
$arg = $arg->node;
|
|
}
|
|
if ( !$xpath ) {
|
|
$xpath = new DOMXPath( $arg->ownerDocument );
|
|
}
|
|
|
|
$nameNodes = $xpath->query( 'name', $arg );
|
|
$value = $xpath->query( 'value', $arg );
|
|
if ( $nameNodes->item( 0 )->hasAttributes() ) {
|
|
// Numbered parameter
|
|
$index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
|
|
$index = $index - $indexOffset;
|
|
$numberedArgs[$index] = $value->item( 0 );
|
|
unset( $namedArgs[$index] );
|
|
} else {
|
|
// Named parameter
|
|
$name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
|
|
$namedArgs[$name] = $value->item( 0 );
|
|
unset( $numberedArgs[$name] );
|
|
}
|
|
}
|
|
}
|
|
return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
|
|
}
|
|
|
|
/**
|
|
* @throws MWException
|
|
* @param $root
|
|
* @param $flags int
|
|
* @return string
|
|
*/
|
|
function expand( $root, $flags = 0 ) {
|
|
static $expansionDepth = 0;
|
|
if ( is_string( $root ) ) {
|
|
return $root;
|
|
}
|
|
|
|
if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
|
|
$this->parser->limitationWarn( 'node-count-exceeded',
|
|
$this->parser->mPPNodeCount,
|
|
$this->parser->mOptions->getMaxPPNodeCount()
|
|
);
|
|
return '<span class="error">Node-count limit exceeded</span>';
|
|
}
|
|
|
|
if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
|
|
$this->parser->limitationWarn( 'expansion-depth-exceeded',
|
|
$expansionDepth,
|
|
$this->parser->mOptions->getMaxPPExpandDepth()
|
|
);
|
|
return '<span class="error">Expansion depth limit exceeded</span>';
|
|
}
|
|
wfProfileIn( __METHOD__ );
|
|
++$expansionDepth;
|
|
if ( $expansionDepth > $this->parser->mHighestExpansionDepth ) {
|
|
$this->parser->mHighestExpansionDepth = $expansionDepth;
|
|
}
|
|
|
|
if ( $root instanceof PPNode_DOM ) {
|
|
$root = $root->node;
|
|
}
|
|
if ( $root instanceof DOMDocument ) {
|
|
$root = $root->documentElement;
|
|
}
|
|
|
|
$outStack = array( '', '' );
|
|
$iteratorStack = array( false, $root );
|
|
$indexStack = array( 0, 0 );
|
|
|
|
while ( count( $iteratorStack ) > 1 ) {
|
|
$level = count( $outStack ) - 1;
|
|
$iteratorNode =& $iteratorStack[ $level ];
|
|
$out =& $outStack[$level];
|
|
$index =& $indexStack[$level];
|
|
|
|
if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node;
|
|
|
|
if ( is_array( $iteratorNode ) ) {
|
|
if ( $index >= count( $iteratorNode ) ) {
|
|
// All done with this iterator
|
|
$iteratorStack[$level] = false;
|
|
$contextNode = false;
|
|
} else {
|
|
$contextNode = $iteratorNode[$index];
|
|
$index++;
|
|
}
|
|
} elseif ( $iteratorNode instanceof DOMNodeList ) {
|
|
if ( $index >= $iteratorNode->length ) {
|
|
// All done with this iterator
|
|
$iteratorStack[$level] = false;
|
|
$contextNode = false;
|
|
} else {
|
|
$contextNode = $iteratorNode->item( $index );
|
|
$index++;
|
|
}
|
|
} else {
|
|
// Copy to $contextNode and then delete from iterator stack,
|
|
// because this is not an iterator but we do have to execute it once
|
|
$contextNode = $iteratorStack[$level];
|
|
$iteratorStack[$level] = false;
|
|
}
|
|
|
|
if ( $contextNode instanceof PPNode_DOM ) {
|
|
$contextNode = $contextNode->node;
|
|
}
|
|
|
|
$newIterator = false;
|
|
|
|
if ( $contextNode === false ) {
|
|
// nothing to do
|
|
} elseif ( is_string( $contextNode ) ) {
|
|
$out .= $contextNode;
|
|
} elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
|
|
$newIterator = $contextNode;
|
|
} elseif ( $contextNode instanceof DOMNode ) {
|
|
if ( $contextNode->nodeType == XML_TEXT_NODE ) {
|
|
$out .= $contextNode->nodeValue;
|
|
} elseif ( $contextNode->nodeName == 'template' ) {
|
|
# Double-brace expansion
|
|
$xpath = new DOMXPath( $contextNode->ownerDocument );
|
|
$titles = $xpath->query( 'title', $contextNode );
|
|
$title = $titles->item( 0 );
|
|
$parts = $xpath->query( 'part', $contextNode );
|
|
if ( $flags & PPFrame::NO_TEMPLATES ) {
|
|
$newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
|
|
} else {
|
|
$lineStart = $contextNode->getAttribute( 'lineStart' );
|
|
$params = array(
|
|
'title' => new PPNode_DOM( $title ),
|
|
'parts' => new PPNode_DOM( $parts ),
|
|
'lineStart' => $lineStart );
|
|
$ret = $this->parser->braceSubstitution( $params, $this );
|
|
if ( isset( $ret['object'] ) ) {
|
|
$newIterator = $ret['object'];
|
|
} else {
|
|
$out .= $ret['text'];
|
|
}
|
|
}
|
|
} elseif ( $contextNode->nodeName == 'tplarg' ) {
|
|
# Triple-brace expansion
|
|
$xpath = new DOMXPath( $contextNode->ownerDocument );
|
|
$titles = $xpath->query( 'title', $contextNode );
|
|
$title = $titles->item( 0 );
|
|
$parts = $xpath->query( 'part', $contextNode );
|
|
if ( $flags & PPFrame::NO_ARGS ) {
|
|
$newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
|
|
} else {
|
|
$params = array(
|
|
'title' => new PPNode_DOM( $title ),
|
|
'parts' => new PPNode_DOM( $parts ) );
|
|
$ret = $this->parser->argSubstitution( $params, $this );
|
|
if ( isset( $ret['object'] ) ) {
|
|
$newIterator = $ret['object'];
|
|
} else {
|
|
$out .= $ret['text'];
|
|
}
|
|
}
|
|
} elseif ( $contextNode->nodeName == 'comment' ) {
|
|
# HTML-style comment
|
|
# Remove it in HTML, pre+remove and STRIP_COMMENTS modes
|
|
if ( $this->parser->ot['html']
|
|
|| ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
|
|
|| ( $flags & PPFrame::STRIP_COMMENTS ) )
|
|
{
|
|
$out .= '';
|
|
}
|
|
# Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
|
|
# Not in RECOVER_COMMENTS mode (extractSections) though
|
|
elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) {
|
|
$out .= $this->parser->insertStripItem( $contextNode->textContent );
|
|
}
|
|
# Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
|
|
else {
|
|
$out .= $contextNode->textContent;
|
|
}
|
|
} elseif ( $contextNode->nodeName == 'ignore' ) {
|
|
# Output suppression used by <includeonly> etc.
|
|
# OT_WIKI will only respect <ignore> in substed templates.
|
|
# The other output types respect it unless NO_IGNORE is set.
|
|
# extractSections() sets NO_IGNORE and so never respects it.
|
|
if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) {
|
|
$out .= $contextNode->textContent;
|
|
} else {
|
|
$out .= '';
|
|
}
|
|
} elseif ( $contextNode->nodeName == 'ext' ) {
|
|
# Extension tag
|
|
$xpath = new DOMXPath( $contextNode->ownerDocument );
|
|
$names = $xpath->query( 'name', $contextNode );
|
|
$attrs = $xpath->query( 'attr', $contextNode );
|
|
$inners = $xpath->query( 'inner', $contextNode );
|
|
$closes = $xpath->query( 'close', $contextNode );
|
|
$params = array(
|
|
'name' => new PPNode_DOM( $names->item( 0 ) ),
|
|
'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
|
|
'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
|
|
'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
|
|
);
|
|
$out .= $this->parser->extensionSubstitution( $params, $this );
|
|
} elseif ( $contextNode->nodeName == 'h' ) {
|
|
# Heading
|
|
$s = $this->expand( $contextNode->childNodes, $flags );
|
|
|
|
# Insert a heading marker only for <h> children of <root>
|
|
# This is to stop extractSections from going over multiple tree levels
|
|
if ( $contextNode->parentNode->nodeName == 'root'
|
|
&& $this->parser->ot['html'] )
|
|
{
|
|
# Insert heading index marker
|
|
$headingIndex = $contextNode->getAttribute( 'i' );
|
|
$titleText = $this->title->getPrefixedDBkey();
|
|
$this->parser->mHeadings[] = array( $titleText, $headingIndex );
|
|
$serial = count( $this->parser->mHeadings ) - 1;
|
|
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
|
|
$count = $contextNode->getAttribute( 'level' );
|
|
$s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
|
|
$this->parser->mStripState->addGeneral( $marker, '' );
|
|
}
|
|
$out .= $s;
|
|
} else {
|
|
# Generic recursive expansion
|
|
$newIterator = $contextNode->childNodes;
|
|
}
|
|
} else {
|
|
wfProfileOut( __METHOD__ );
|
|
throw new MWException( __METHOD__.': Invalid parameter type' );
|
|
}
|
|
|
|
if ( $newIterator !== false ) {
|
|
if ( $newIterator instanceof PPNode_DOM ) {
|
|
$newIterator = $newIterator->node;
|
|
}
|
|
$outStack[] = '';
|
|
$iteratorStack[] = $newIterator;
|
|
$indexStack[] = 0;
|
|
} elseif ( $iteratorStack[$level] === false ) {
|
|
// Return accumulated value to parent
|
|
// With tail recursion
|
|
while ( $iteratorStack[$level] === false && $level > 0 ) {
|
|
$outStack[$level - 1] .= $out;
|
|
array_pop( $outStack );
|
|
array_pop( $iteratorStack );
|
|
array_pop( $indexStack );
|
|
$level--;
|
|
}
|
|
}
|
|
}
|
|
--$expansionDepth;
|
|
wfProfileOut( __METHOD__ );
|
|
return $outStack[0];
|
|
}
|
|
|
|
/**
|
|
* @param $sep
|
|
* @param $flags
|
|
* @return string
|
|
*/
|
|
function implodeWithFlags( $sep, $flags /*, ... */ ) {
|
|
$args = array_slice( func_get_args(), 2 );
|
|
|
|
$first = true;
|
|
$s = '';
|
|
foreach ( $args as $root ) {
|
|
if ( $root instanceof PPNode_DOM ) $root = $root->node;
|
|
if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
|
|
$root = array( $root );
|
|
}
|
|
foreach ( $root as $node ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$s .= $sep;
|
|
}
|
|
$s .= $this->expand( $node, $flags );
|
|
}
|
|
}
|
|
return $s;
|
|
}
|
|
|
|
/**
|
|
* Implode with no flags specified
|
|
* This previously called implodeWithFlags but has now been inlined to reduce stack depth
|
|
*
|
|
* @return string
|
|
*/
|
|
function implode( $sep /*, ... */ ) {
|
|
$args = array_slice( func_get_args(), 1 );
|
|
|
|
$first = true;
|
|
$s = '';
|
|
foreach ( $args as $root ) {
|
|
if ( $root instanceof PPNode_DOM ) {
|
|
$root = $root->node;
|
|
}
|
|
if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
|
|
$root = array( $root );
|
|
}
|
|
foreach ( $root as $node ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$s .= $sep;
|
|
}
|
|
$s .= $this->expand( $node );
|
|
}
|
|
}
|
|
return $s;
|
|
}
|
|
|
|
/**
|
|
* Makes an object that, when expand()ed, will be the same as one obtained
|
|
* with implode()
|
|
*
|
|
* @return array
|
|
*/
|
|
function virtualImplode( $sep /*, ... */ ) {
|
|
$args = array_slice( func_get_args(), 1 );
|
|
$out = array();
|
|
$first = true;
|
|
|
|
foreach ( $args as $root ) {
|
|
if ( $root instanceof PPNode_DOM ) {
|
|
$root = $root->node;
|
|
}
|
|
if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
|
|
$root = array( $root );
|
|
}
|
|
foreach ( $root as $node ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$out[] = $sep;
|
|
}
|
|
$out[] = $node;
|
|
}
|
|
}
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* Virtual implode with brackets
|
|
* @return array
|
|
*/
|
|
function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
|
|
$args = array_slice( func_get_args(), 3 );
|
|
$out = array( $start );
|
|
$first = true;
|
|
|
|
foreach ( $args as $root ) {
|
|
if ( $root instanceof PPNode_DOM ) {
|
|
$root = $root->node;
|
|
}
|
|
if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
|
|
$root = array( $root );
|
|
}
|
|
foreach ( $root as $node ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$out[] = $sep;
|
|
}
|
|
$out[] = $node;
|
|
}
|
|
}
|
|
$out[] = $end;
|
|
return $out;
|
|
}
|
|
|
|
function __toString() {
|
|
return 'frame{}';
|
|
}
|
|
|
|
function getPDBK( $level = false ) {
|
|
if ( $level === false ) {
|
|
return $this->title->getPrefixedDBkey();
|
|
} else {
|
|
return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
function getArguments() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
function getNumberedArguments() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
function getNamedArguments() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* Returns true if there are no arguments in this frame
|
|
*
|
|
* @return bool
|
|
*/
|
|
function isEmpty() {
|
|
return true;
|
|
}
|
|
|
|
function getArgument( $name ) {
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the infinite loop check is OK, false if a loop is detected
|
|
*
|
|
* @return bool
|
|
*/
|
|
function loopCheck( $title ) {
|
|
return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
|
|
}
|
|
|
|
/**
|
|
* Return true if the frame is a template frame
|
|
*
|
|
* @return bool
|
|
*/
|
|
function isTemplate() {
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get a title of frame
|
|
*
|
|
* @return Title
|
|
*/
|
|
function getTitle() {
|
|
return $this->title;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Expansion frame with template arguments
|
|
* @ingroup Parser
|
|
*/
|
|
class PPTemplateFrame_DOM extends PPFrame_DOM {
|
|
var $numberedArgs, $namedArgs;
|
|
|
|
/**
|
|
* @var PPFrame_DOM
|
|
*/
|
|
var $parent;
|
|
var $numberedExpansionCache, $namedExpansionCache;
|
|
|
|
/**
|
|
* @param $preprocessor
|
|
* @param $parent PPFrame_DOM
|
|
* @param $numberedArgs array
|
|
* @param $namedArgs array
|
|
* @param $title Title
|
|
*/
|
|
function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
|
|
parent::__construct( $preprocessor );
|
|
|
|
$this->parent = $parent;
|
|
$this->numberedArgs = $numberedArgs;
|
|
$this->namedArgs = $namedArgs;
|
|
$this->title = $title;
|
|
$pdbk = $title ? $title->getPrefixedDBkey() : false;
|
|
$this->titleCache = $parent->titleCache;
|
|
$this->titleCache[] = $pdbk;
|
|
$this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
|
|
if ( $pdbk !== false ) {
|
|
$this->loopCheckHash[$pdbk] = true;
|
|
}
|
|
$this->depth = $parent->depth + 1;
|
|
$this->numberedExpansionCache = $this->namedExpansionCache = array();
|
|
}
|
|
|
|
function __toString() {
|
|
$s = 'tplframe{';
|
|
$first = true;
|
|
$args = $this->numberedArgs + $this->namedArgs;
|
|
foreach ( $args as $name => $value ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$s .= ', ';
|
|
}
|
|
$s .= "\"$name\":\"" .
|
|
str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
|
|
}
|
|
$s .= '}';
|
|
return $s;
|
|
}
|
|
|
|
/**
|
|
* Returns true if there are no arguments in this frame
|
|
*
|
|
* @return bool
|
|
*/
|
|
function isEmpty() {
|
|
return !count( $this->numberedArgs ) && !count( $this->namedArgs );
|
|
}
|
|
|
|
function getArguments() {
|
|
$arguments = array();
|
|
foreach ( array_merge(
|
|
array_keys($this->numberedArgs),
|
|
array_keys($this->namedArgs)) as $key ) {
|
|
$arguments[$key] = $this->getArgument($key);
|
|
}
|
|
return $arguments;
|
|
}
|
|
|
|
function getNumberedArguments() {
|
|
$arguments = array();
|
|
foreach ( array_keys($this->numberedArgs) as $key ) {
|
|
$arguments[$key] = $this->getArgument($key);
|
|
}
|
|
return $arguments;
|
|
}
|
|
|
|
function getNamedArguments() {
|
|
$arguments = array();
|
|
foreach ( array_keys($this->namedArgs) as $key ) {
|
|
$arguments[$key] = $this->getArgument($key);
|
|
}
|
|
return $arguments;
|
|
}
|
|
|
|
function getNumberedArgument( $index ) {
|
|
if ( !isset( $this->numberedArgs[$index] ) ) {
|
|
return false;
|
|
}
|
|
if ( !isset( $this->numberedExpansionCache[$index] ) ) {
|
|
# No trimming for unnamed arguments
|
|
$this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS );
|
|
}
|
|
return $this->numberedExpansionCache[$index];
|
|
}
|
|
|
|
function getNamedArgument( $name ) {
|
|
if ( !isset( $this->namedArgs[$name] ) ) {
|
|
return false;
|
|
}
|
|
if ( !isset( $this->namedExpansionCache[$name] ) ) {
|
|
# Trim named arguments post-expand, for backwards compatibility
|
|
$this->namedExpansionCache[$name] = trim(
|
|
$this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) );
|
|
}
|
|
return $this->namedExpansionCache[$name];
|
|
}
|
|
|
|
function getArgument( $name ) {
|
|
$text = $this->getNumberedArgument( $name );
|
|
if ( $text === false ) {
|
|
$text = $this->getNamedArgument( $name );
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Return true if the frame is a template frame
|
|
*
|
|
* @return bool
|
|
*/
|
|
function isTemplate() {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Expansion frame with custom arguments
|
|
* @ingroup Parser
|
|
*/
|
|
class PPCustomFrame_DOM extends PPFrame_DOM {
|
|
var $args;
|
|
|
|
function __construct( $preprocessor, $args ) {
|
|
parent::__construct( $preprocessor );
|
|
$this->args = $args;
|
|
}
|
|
|
|
function __toString() {
|
|
$s = 'cstmframe{';
|
|
$first = true;
|
|
foreach ( $this->args as $name => $value ) {
|
|
if ( $first ) {
|
|
$first = false;
|
|
} else {
|
|
$s .= ', ';
|
|
}
|
|
$s .= "\"$name\":\"" .
|
|
str_replace( '"', '\\"', $value->__toString() ) . '"';
|
|
}
|
|
$s .= '}';
|
|
return $s;
|
|
}
|
|
|
|
/**
|
|
* @return bool
|
|
*/
|
|
function isEmpty() {
|
|
return !count( $this->args );
|
|
}
|
|
|
|
function getArgument( $index ) {
|
|
if ( !isset( $this->args[$index] ) ) {
|
|
return false;
|
|
}
|
|
return $this->args[$index];
|
|
}
|
|
|
|
function getArguments() {
|
|
return $this->args;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @ingroup Parser
|
|
*/
|
|
class PPNode_DOM implements PPNode {
|
|
|
|
/**
|
|
* @var DOMElement
|
|
*/
|
|
var $node;
|
|
var $xpath;
|
|
|
|
function __construct( $node, $xpath = false ) {
|
|
$this->node = $node;
|
|
}
|
|
|
|
/**
|
|
* @return DOMXPath
|
|
*/
|
|
function getXPath() {
|
|
if ( $this->xpath === null ) {
|
|
$this->xpath = new DOMXPath( $this->node->ownerDocument );
|
|
}
|
|
return $this->xpath;
|
|
}
|
|
|
|
function __toString() {
|
|
if ( $this->node instanceof DOMNodeList ) {
|
|
$s = '';
|
|
foreach ( $this->node as $node ) {
|
|
$s .= $node->ownerDocument->saveXML( $node );
|
|
}
|
|
} else {
|
|
$s = $this->node->ownerDocument->saveXML( $this->node );
|
|
}
|
|
return $s;
|
|
}
|
|
|
|
/**
|
|
* @return bool|PPNode_DOM
|
|
*/
|
|
function getChildren() {
|
|
return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
|
|
}
|
|
|
|
/**
|
|
* @return bool|PPNode_DOM
|
|
*/
|
|
function getFirstChild() {
|
|
return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
|
|
}
|
|
|
|
/**
|
|
* @return bool|PPNode_DOM
|
|
*/
|
|
function getNextSibling() {
|
|
return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
|
|
}
|
|
|
|
/**
|
|
* @param $type
|
|
*
|
|
* @return bool|PPNode_DOM
|
|
*/
|
|
function getChildrenOfType( $type ) {
|
|
return new self( $this->getXPath()->query( $type, $this->node ) );
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*/
|
|
function getLength() {
|
|
if ( $this->node instanceof DOMNodeList ) {
|
|
return $this->node->length;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param $i
|
|
* @return bool|PPNode_DOM
|
|
*/
|
|
function item( $i ) {
|
|
$item = $this->node->item( $i );
|
|
return $item ? new self( $item ) : false;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
function getName() {
|
|
if ( $this->node instanceof DOMNodeList ) {
|
|
return '#nodelist';
|
|
} else {
|
|
return $this->node->nodeName;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Split a "<part>" node into an associative array containing:
|
|
* - name PPNode name
|
|
* - index String index
|
|
* - value PPNode value
|
|
*
|
|
* @return array
|
|
*/
|
|
function splitArg() {
|
|
$xpath = $this->getXPath();
|
|
$names = $xpath->query( 'name', $this->node );
|
|
$values = $xpath->query( 'value', $this->node );
|
|
if ( !$names->length || !$values->length ) {
|
|
throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
|
|
}
|
|
$name = $names->item( 0 );
|
|
$index = $name->getAttribute( 'index' );
|
|
return array(
|
|
'name' => new self( $name ),
|
|
'index' => $index,
|
|
'value' => new self( $values->item( 0 ) ) );
|
|
}
|
|
|
|
/**
|
|
* Split an "<ext>" node into an associative array containing name, attr, inner and close
|
|
* All values in the resulting array are PPNodes. Inner and close are optional.
|
|
*
|
|
* @return array
|
|
*/
|
|
function splitExt() {
|
|
$xpath = $this->getXPath();
|
|
$names = $xpath->query( 'name', $this->node );
|
|
$attrs = $xpath->query( 'attr', $this->node );
|
|
$inners = $xpath->query( 'inner', $this->node );
|
|
$closes = $xpath->query( 'close', $this->node );
|
|
if ( !$names->length || !$attrs->length ) {
|
|
throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
|
|
}
|
|
$parts = array(
|
|
'name' => new self( $names->item( 0 ) ),
|
|
'attr' => new self( $attrs->item( 0 ) ) );
|
|
if ( $inners->length ) {
|
|
$parts['inner'] = new self( $inners->item( 0 ) );
|
|
}
|
|
if ( $closes->length ) {
|
|
$parts['close'] = new self( $closes->item( 0 ) );
|
|
}
|
|
return $parts;
|
|
}
|
|
|
|
/**
|
|
* Split a "<h>" node
|
|
* @return array
|
|
*/
|
|
function splitHeading() {
|
|
if ( $this->getName() !== 'h' ) {
|
|
throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
|
|
}
|
|
return array(
|
|
'i' => $this->node->getAttribute( 'i' ),
|
|
'level' => $this->node->getAttribute( 'level' ),
|
|
'contents' => $this->getChildren()
|
|
);
|
|
}
|
|
}
|