wiki.techinc.nl/includes/Parser.php

2820 lines
83 KiB
PHP

<?php
// require_once('Tokenizer.php');
/**
* File for Parser and related classes
*
* @package MediaWiki
* @version $Id$
*/
/**
* Variable substitution O(N^2) attack
*
* Without countermeasures, it would be possible to attack the parser by saving
* a page filled with a large number of inclusions of large pages. The size of
* the generated page would be proportional to the square of the input size.
* Hence, we limit the number of inclusions of any given page, thus bringing any
* attack back to O(N).
*/
define( 'MAX_INCLUDE_REPEAT', 100 );
define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
# Allowed values for $mOutputType
define( 'OT_HTML', 1 );
define( 'OT_WIKI', 2 );
define( 'OT_MSG' , 3 );
# string parameter for extractTags which will cause it
# to strip HTML comments in addition to regular
# <XML>-style tags. This should not be anything we
# may want to use in wikisyntax
define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
# prefix for escaping, used in two functions at least
define( 'UNIQ_PREFIX', 'NaodW29');
# Constants needed for external link processing
define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
define( 'HTTP_PROTOCOLS', 'http|https' );
# Everything except bracket, space, or control characters
define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
# Including space
define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
define( 'EXT_IMAGE_REGEX',
'/^('.HTTP_PROTOCOLS.':)'. # Protocol
'('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
'('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
);
/**
* PHP Parser
*
* Processes wiki markup
*
* <pre>
* There are three main entry points into the Parser class:
* parse()
* produces HTML output
* preSaveTransform().
* produces altered wiki markup.
* transformMsg()
* performs brace substitution on MediaWiki messages
*
* Globals used:
* objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
*
* NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
*
* settings:
* $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
* $wgNamespacesWithSubpages, $wgAllowExternalImages*,
* $wgLocaltimezone
*
* * only within ParserOptions
* </pre>
*
* @package MediaWiki
*/
class Parser
{
/**#@+
* @access private
*/
# Persistent:
var $mTagHooks;
# Cleared with clearState():
var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
# Temporary:
var $mOptions, $mTitle, $mOutputType,
$mTemplates, // cache of already loaded templates, avoids
// multiple SQL queries for the same string
$mTemplatePath; // stores an unsorted hash of all the templates already loaded
// in this path. Used for loop detection.
/**#@-*/
/**
* Constructor
*
* @access public
*/
function Parser() {
$this->mTemplates = array();
$this->mTemplatePath = array();
$this->mTagHooks = array();
$this->clearState();
}
/**
* Clear Parser state
*
* @access private
*/
function clearState() {
$this->mOutput = new ParserOutput;
$this->mAutonumber = 0;
$this->mLastSection = "";
$this->mDTopen = false;
$this->mVariables = false;
$this->mIncludeCount = array();
$this->mStripState = array();
$this->mArgStack = array();
$this->mInPre = false;
}
/**
* First pass--just handle <nowiki> sections, pass the rest off
* to internalParse() which does all the real work.
*
* @access private
* @return ParserOutput a ParserOutput
*/
function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
global $wgUseTidy;
$fname = 'Parser::parse';
wfProfileIn( $fname );
if ( $clearState ) {
$this->clearState();
}
$this->mOptions = $options;
$this->mTitle =& $title;
$this->mOutputType = OT_HTML;
$stripState = NULL;
$text = $this->strip( $text, $this->mStripState );
$text = $this->internalParse( $text, $linestart );
$text = $this->unstrip( $text, $this->mStripState );
# Clean up special characters, only run once, next-to-last before doBlockLevels
if(!$wgUseTidy) {
$fixtags = array(
# french spaces, last one Guillemet-left
# only if there is something before the space
'/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
# french spaces, Guillemet-right
"/(\\302\\253) /i"=>"\\1&nbsp;",
'/<hr *>/i' => '<hr />',
'/<br *>/i' => '<br />',
'/<center *>/i' => '<div class="center">',
'/<\\/center *>/i' => '</div>',
# Clean up spare ampersands; note that we probably ought to be
# more careful about named entities.
'/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
} else {
$fixtags = array(
# french spaces, last one Guillemet-left
'/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
# french spaces, Guillemet-right
'/(\\302\\253) /i' => '\\1&nbsp;',
'/<center *>/i' => '<div class="center">',
'/<\\/center *>/i' => '</div>'
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
}
# only once and last
$text = $this->doBlockLevels( $text, $linestart );
$text = $this->unstripNoWiki( $text, $this->mStripState );
if($wgUseTidy) {
$text = $this->tidy($text);
}
$this->mOutput->setText( $text );
wfProfileOut( $fname );
return $this->mOutput;
}
/**
* Get a random string
*
* @access private
* @static
*/
function getRandomString() {
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
}
/**
* Replaces all occurrences of <$tag>content</$tag> in the text
* with a random marker and returns the new text. the output parameter
* $content will be an associative array filled with data on the form
* $unique_marker => content.
*
* If $content is already set, the additional entries will be appended
* If $tag is set to STRIP_COMMENTS, the function will extract
* <!-- HTML comments -->
*
* @access private
* @static
*/
function extractTags($tag, $text, &$content, $uniq_prefix = ''){
$rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
if ( !$content ) {
$content = array( );
}
$n = 1;
$stripped = '';
while ( '' != $text ) {
if($tag==STRIP_COMMENTS) {
$p = preg_split( '/<!--/i', $text, 2 );
} else {
$p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
}
$stripped .= $p[0];
if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
$text = '';
} else {
if($tag==STRIP_COMMENTS) {
$q = preg_split( '/-->/i', $p[1], 2 );
} else {
$q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
}
$marker = $rnd . sprintf('%08X', $n++);
$content[$marker] = $q[0];
$stripped .= $marker;
$text = $q[1];
}
}
return $stripped;
}
/**
* Strips and renders nowiki, pre, math, hiero
* If $render is set, performs necessary rendering operations on plugins
* Returns the text, and fills an array with data needed in unstrip()
* If the $state is already a valid strip state, it adds to the state
*
* @param bool $stripcomments when set, HTML comments <!-- like this -->
* will be stripped in addition to other tags. This is important
* for section editing, where these comments cause confusion when
* counting the sections in the wikisource
*
* @access private
*/
function strip( $text, &$state, $stripcomments = false ) {
$render = ($this->mOutputType == OT_HTML);
$html_content = array();
$nowiki_content = array();
$math_content = array();
$pre_content = array();
$comment_content = array();
$ext_content = array();
# Replace any instances of the placeholders
$uniq_prefix = UNIQ_PREFIX;
#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
# html
global $wgRawHtml, $wgWhitelistEdit;
if( $wgRawHtml && $wgWhitelistEdit ) {
$text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
foreach( $html_content as $marker => $content ) {
if ($render ) {
# Raw and unchecked for validity.
$html_content[$marker] = $content;
} else {
$html_content[$marker] = '<html>'.$content.'</html>';
}
}
}
# nowiki
$text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
foreach( $nowiki_content as $marker => $content ) {
if( $render ){
$nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
} else {
$nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
}
}
# math
$text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
foreach( $math_content as $marker => $content ){
if( $render ) {
if( $this->mOptions->getUseTeX() ) {
$math_content[$marker] = renderMath( $content );
} else {
$math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
}
} else {
$math_content[$marker] = '<math>'.$content.'</math>';
}
}
# pre
$text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
foreach( $pre_content as $marker => $content ){
if( $render ){
$pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
} else {
$pre_content[$marker] = '<pre>'.$content.'</pre>';
}
}
# Comments
if($stripcomments) {
$text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
foreach( $comment_content as $marker => $content ){
$comment_content[$marker] = '<!--'.$content.'-->';
}
}
# Extensions
foreach ( $this->mTagHooks as $tag => $callback ) {
$ext_contents[$tag] = array();
$text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
foreach( $ext_content[$tag] as $marker => $content ) {
if ( $render ) {
$ext_content[$tag][$marker] = $callback( $content );
} else {
$ext_content[$tag][$marker] = "<$tag>$content</$tag>";
}
}
}
# Merge state with the pre-existing state, if there is one
if ( $state ) {
$state['html'] = $state['html'] + $html_content;
$state['nowiki'] = $state['nowiki'] + $nowiki_content;
$state['math'] = $state['math'] + $math_content;
$state['pre'] = $state['pre'] + $pre_content;
$state['comment'] = $state['comment'] + $comment_content;
foreach( $ext_content as $tag => $array ) {
if ( array_key_exists( $tag, $state ) ) {
$state[$tag] = $state[$tag] + $array;
}
}
} else {
$state = array(
'html' => $html_content,
'nowiki' => $nowiki_content,
'math' => $math_content,
'pre' => $pre_content,
'comment' => $comment_content,
) + $ext_content;
}
return $text;
}
/**
* restores pre, math, and heiro removed by strip()
*
* always call unstripNoWiki() after this one
* @access private
*/
function unstrip( $text, &$state ) {
# Must expand in reverse order, otherwise nested tags will be corrupted
$contentDict = end( $state );
for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
if( key($state) != 'nowiki' && key($state) != 'html') {
for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
$text = str_replace( key( $contentDict ), $content, $text );
}
}
}
return $text;
}
/**
* always call this after unstrip() to preserve the order
*
* @access private
*/
function unstripNoWiki( $text, &$state ) {
# Must expand in reverse order, otherwise nested tags will be corrupted
for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
$text = str_replace( key( $state['nowiki'] ), $content, $text );
}
global $wgRawHtml;
if ($wgRawHtml) {
for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
$text = str_replace( key( $state['html'] ), $content, $text );
}
}
return $text;
}
/**
* Add an item to the strip state
* Returns the unique tag which must be inserted into the stripped text
* The tag will be replaced with the original text in unstrip()
*
* @access private
*/
function insertStripItem( $text, &$state ) {
$rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
if ( !$state ) {
$state = array(
'html' => array(),
'nowiki' => array(),
'math' => array(),
'pre' => array()
);
}
$state['item'][$rnd] = $text;
return $rnd;
}
/**
* Return allowed HTML attributes
*
* @access private
*/
function getHTMLattrs () {
$htmlattrs = array( # Allowed attributes--no scripting, etc.
'title', 'align', 'lang', 'dir', 'width', 'height',
'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
/* FONT */ 'type', 'start', 'value', 'compact',
/* For various lists, mostly deprecated but safe */
'summary', 'width', 'border', 'frame', 'rules',
'cellspacing', 'cellpadding', 'valign', 'char',
'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
'headers', 'scope', 'rowspan', 'colspan', /* Tables */
'id', 'class', 'name', 'style' /* For CSS */
);
return $htmlattrs ;
}
/**
* Remove non approved attributes and javascript in css
*
* @access private
*/
function fixTagAttributes ( $t ) {
if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
$htmlattrs = $this->getHTMLattrs() ;
# Strip non-approved attributes from the tag
$t = preg_replace(
'/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
"(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
$t);
$t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
# Strip javascript "expression" from stylesheets. Brute force approach:
# If anythin offensive is found, all attributes of the HTML tag are dropped
if( preg_match(
'/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
wfMungeToUtf8( $t ) ) )
{
$t='';
}
return trim ( $t ) ;
}
/**
* interface with html tidy, used if $wgUseTidy = true
*
* @access private
*/
function tidy ( $text ) {
global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
global $wgInputEncoding, $wgOutputEncoding;
$fname = 'Parser::tidy';
wfProfileIn( $fname );
$cleansource = '';
switch(strtoupper($wgOutputEncoding)) {
case 'ISO-8859-1':
$wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
break;
case 'UTF-8':
$wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
break;
default:
$wgTidyOpts .= ' -raw';
}
$wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
'<head><title>test</title></head><body>'.$text.'</body></html>';
$descriptorspec = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
if (is_resource($process)) {
fwrite($pipes[0], $wrappedtext);
fclose($pipes[0]);
while (!feof($pipes[1])) {
$cleansource .= fgets($pipes[1], 1024);
}
fclose($pipes[1]);
$return_value = proc_close($process);
}
wfProfileOut( $fname );
if( $cleansource == '' && $text != '') {
wfDebug( "Tidy error detected!\n" );
return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
} else {
return $cleansource;
}
}
/**
* parse the wiki syntax used to render tables
*
* @access private
*/
function doTableStuff ( $t ) {
$fname = 'Parser::doTableStuff';
wfProfileIn( $fname );
$t = explode ( "\n" , $t ) ;
$td = array () ; # Is currently a td tag open?
$ltd = array () ; # Was it TD or TH?
$tr = array () ; # Is currently a tr tag open?
$ltr = array () ; # tr attributes
$indent_level = 0; # indent level of the table
foreach ( $t AS $k => $x )
{
$x = trim ( $x ) ;
$fc = substr ( $x , 0 , 1 ) ;
if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
$indent_level = strlen( $matches[1] );
$t[$k] = "\n" .
str_repeat( '<dl><dd>', $indent_level ) .
'<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
array_push ( $td , false ) ;
array_push ( $ltd , '' ) ;
array_push ( $tr , false ) ;
array_push ( $ltr , '' ) ;
}
else if ( count ( $td ) == 0 ) { } # Don't do any of the following
else if ( '|}' == substr ( $x , 0 , 2 ) ) {
$z = "</table>\n" ;
$l = array_pop ( $ltd ) ;
if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
array_pop ( $ltr ) ;
$t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
}
else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
$x = substr ( $x , 1 ) ;
while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
$z = '' ;
$l = array_pop ( $ltd ) ;
if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
array_pop ( $ltr ) ;
$t[$k] = $z ;
array_push ( $tr , false ) ;
array_push ( $td , false ) ;
array_push ( $ltd , '' ) ;
array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
}
else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
if ( '|+' == substr ( $x , 0 , 2 ) ) {
$fc = '+' ;
$x = substr ( $x , 1 ) ;
}
$after = substr ( $x , 1 ) ;
if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
$after = explode ( '||' , $after ) ;
$t[$k] = '' ;
foreach ( $after AS $theline )
{
$z = '' ;
if ( $fc != '+' )
{
$tra = array_pop ( $ltr ) ;
if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
array_push ( $tr , true ) ;
array_push ( $ltr , '' ) ;
}
$l = array_pop ( $ltd ) ;
if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
if ( $fc == '|' ) $l = 'td' ;
else if ( $fc == '!' ) $l = 'th' ;
else if ( $fc == '+' ) $l = 'caption' ;
else $l = '' ;
array_push ( $ltd , $l ) ;
$y = explode ( '|' , $theline , 2 ) ;
if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
$t[$k] .= $y ;
array_push ( $td , true ) ;
}
}
}
# Closing open td, tr && table
while ( count ( $td ) > 0 )
{
if ( array_pop ( $td ) ) $t[] = '</td>' ;
if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
$t[] = '</table>' ;
}
$t = implode ( "\n" , $t ) ;
# $t = $this->removeHTMLtags( $t );
wfProfileOut( $fname );
return $t ;
}
/**
* Helper function for parse() that transforms wiki markup into
* HTML. Only called for $mOutputType == OT_HTML.
*
* @access private
*/
function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
global $wgContLang;
$fname = 'Parser::internalParse';
wfProfileIn( $fname );
$text = $this->removeHTMLtags( $text );
$text = $this->replaceVariables( $text, $args );
$text = $wgContLang->convert($text);
$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
$text = $this->doHeadings( $text );
if($this->mOptions->getUseDynamicDates()) {
global $wgDateFormatter;
$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
}
$text = $this->doAllQuotes( $text );
$text = $this->replaceExternalLinks( $text );
$text = $this->doMagicLinks( $text );
$text = $this->replaceInternalLinks ( $text );
# Another call to replace links and images inside captions of images
$text = $this->replaceInternalLinks ( $text );
$text = $this->doTableStuff( $text );
$text = $this->formatHeadings( $text, $isMain );
$sk =& $this->mOptions->getSkin();
$text = $sk->transformContent( $text );
wfProfileOut( $fname );
return $text;
}
/**
* Replace special strings like "ISBN xxx" and "RFC xxx" with
* magic external links.
*
* @access private
*/
function &doMagicLinks( &$text ) {
global $wgUseGeoMode;
$text = $this->magicISBN( $text );
if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
$text = $this->magicGEO( $text );
}
$text = $this->magicRFC( $text );
return $text;
}
/**
* Parse ^^ tokens and return html
*
* @access private
*/
function doExponent ( $text ) {
$fname = 'Parser::doExponent';
wfProfileIn( $fname);
$text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
wfProfileOut( $fname);
return $text;
}
/**
* Parse headers and return html
*
* @access private
*/
function doHeadings( $text ) {
$fname = 'Parser::doHeadings';
wfProfileIn( $fname );
for ( $i = 6; $i >= 1; --$i ) {
$h = substr( '======', 0, $i );
$text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
"<h{$i}>\\1</h{$i}>\\2", $text );
}
wfProfileOut( $fname );
return $text;
}
/**
* Replace single quotes with HTML markup
* @access private
* @return string the altered text
*/
function doAllQuotes( $text ) {
$fname = 'Parser::doAllQuotes';
wfProfileIn( $fname );
$outtext = '';
$lines = explode( "\n", $text );
foreach ( $lines as $line ) {
$outtext .= $this->doQuotes ( $line ) . "\n";
}
$outtext = substr($outtext, 0,-1);
wfProfileOut( $fname );
return $outtext;
}
/**
* Helper function for doAllQuotes()
* @access private
*/
function doQuotes( $text ) {
$arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
if (count ($arr) == 1)
return $text;
else
{
# First, do some preliminary work. This may shift some apostrophes from
# being mark-up to being text. It also counts the number of occurrences
# of bold and italics mark-ups.
$i = 0;
$numbold = 0;
$numitalics = 0;
foreach ($arr as $r)
{
if (($i % 2) == 1)
{
# If there are ever four apostrophes, assume the first is supposed to
# be text, and the remaining three constitute mark-up for bold text.
if (strlen ($arr[$i]) == 4)
{
$arr[$i-1] .= "'";
$arr[$i] = "'''";
}
# If there are more than 5 apostrophes in a row, assume they're all
# text except for the last 5.
else if (strlen ($arr[$i]) > 5)
{
$arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
$arr[$i] = "'''''";
}
# Count the number of occurrences of bold and italics mark-ups.
# We are not counting sequences of five apostrophes.
if (strlen ($arr[$i]) == 2) $numitalics++; else
if (strlen ($arr[$i]) == 3) $numbold++; else
if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
}
$i++;
}
# If there is an odd number of both bold and italics, it is likely
# that one of the bold ones was meant to be an apostrophe followed
# by italics. Which one we cannot know for certain, but it is more
# likely to be one that has a single-letter word before it.
if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
{
$i = 0;
$firstsingleletterword = -1;
$firstmultiletterword = -1;
$firstspace = -1;
foreach ($arr as $r)
{
if (($i % 2 == 1) and (strlen ($r) == 3))
{
$x1 = substr ($arr[$i-1], -1);
$x2 = substr ($arr[$i-1], -2, 1);
if ($x1 == ' ') {
if ($firstspace == -1) $firstspace = $i;
} else if ($x2 == ' ') {
if ($firstsingleletterword == -1) $firstsingleletterword = $i;
} else {
if ($firstmultiletterword == -1) $firstmultiletterword = $i;
}
}
$i++;
}
# If there is a single-letter word, use it!
if ($firstsingleletterword > -1)
{
$arr [ $firstsingleletterword ] = "''";
$arr [ $firstsingleletterword-1 ] .= "'";
}
# If not, but there's a multi-letter word, use that one.
else if ($firstmultiletterword > -1)
{
$arr [ $firstmultiletterword ] = "''";
$arr [ $firstmultiletterword-1 ] .= "'";
}
# ... otherwise use the first one that has neither.
# (notice that it is possible for all three to be -1 if, for example,
# there is only one pentuple-apostrophe in the line)
else if ($firstspace > -1)
{
$arr [ $firstspace ] = "''";
$arr [ $firstspace-1 ] .= "'";
}
}
# Now let's actually convert our apostrophic mush to HTML!
$output = '';
$buffer = '';
$state = '';
$i = 0;
foreach ($arr as $r)
{
if (($i % 2) == 0)
{
if ($state == 'both')
$buffer .= $r;
else
$output .= $r;
}
else
{
if (strlen ($r) == 2)
{
if ($state == 'i')
{ $output .= '</i>'; $state = ''; }
else if ($state == 'bi')
{ $output .= '</i>'; $state = 'b'; }
else if ($state == 'ib')
{ $output .= '</b></i><b>'; $state = 'b'; }
else if ($state == 'both')
{ $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
else # $state can be 'b' or ''
{ $output .= '<i>'; $state .= 'i'; }
}
else if (strlen ($r) == 3)
{
if ($state == 'b')
{ $output .= '</b>'; $state = ''; }
else if ($state == 'bi')
{ $output .= '</i></b><i>'; $state = 'i'; }
else if ($state == 'ib')
{ $output .= '</b>'; $state = 'i'; }
else if ($state == 'both')
{ $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
else # $state can be 'i' or ''
{ $output .= '<b>'; $state .= 'b'; }
}
else if (strlen ($r) == 5)
{
if ($state == 'b')
{ $output .= '</b><i>'; $state = 'i'; }
else if ($state == 'i')
{ $output .= '</i><b>'; $state = 'b'; }
else if ($state == 'bi')
{ $output .= '</i></b>'; $state = ''; }
else if ($state == 'ib')
{ $output .= '</b></i>'; $state = ''; }
else if ($state == 'both')
{ $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
else # ($state == '')
{ $buffer = ''; $state = 'both'; }
}
}
$i++;
}
# Now close all remaining tags. Notice that the order is important.
if ($state == 'b' || $state == 'ib')
$output .= '</b>';
if ($state == 'i' || $state == 'bi' || $state == 'ib')
$output .= '</i>';
if ($state == 'bi')
$output .= '</b>';
if ($state == 'both')
$output .= '<b><i>'.$buffer.'</i></b>';
return $output;
}
}
/**
* Replace external links
*
* Note: we have to do external links before the internal ones,
* and otherwise take great care in the order of things here, so
* that we don't end up interpreting some URLs twice.
*
* @access private
*/
function replaceExternalLinks( $text ) {
$fname = 'Parser::replaceExternalLinks';
wfProfileIn( $fname );
$sk =& $this->mOptions->getSkin();
$linktrail = wfMsgForContent('linktrail');
$bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
$s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
$i = 0;
while ( $i<count( $bits ) ) {
$url = $bits[$i++];
$protocol = $bits[$i++];
$text = $bits[$i++];
$trail = $bits[$i++];
# If the link text is an image URL, replace it with an <img> tag
# This happened by accident in the original parser, but some people used it extensively
$img = $this->maybeMakeImageLink( $text );
if ( $img !== false ) {
$text = $img;
}
$dtrail = '';
# No link text, e.g. [http://domain.tld/some.link]
if ( $text == '' ) {
# Autonumber if allowed
if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
$text = '[' . ++$this->mAutonumber . ']';
} else {
# Otherwise just use the URL
$text = htmlspecialchars( $url );
}
} else {
# Have link text, e.g. [http://domain.tld/some.link text]s
# Check for trail
if ( preg_match( $linktrail, $trail, $m2 ) ) {
$dtrail = $m2[1];
$trail = $m2[2];
}
}
$encUrl = htmlspecialchars( $url );
# Bit in parentheses showing the URL for the printable version
if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
$paren = '';
} else {
# Expand the URL for printable version
if ( ! $sk->suppressUrlExpansion() ) {
$paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
} else {
$paren = '';
}
}
# Process the trail (i.e. everything after this link up until start of the next link),
# replacing any non-bracketed links
$trail = $this->replaceFreeExternalLinks( $trail );
$la = $sk->getExternalLinkAttributes( $url, $text );
# Use the encoded URL
# This means that users can paste URLs directly into the text
# Funny characters like &ouml; aren't valid in URLs anyway
# This was changed in August 2004
$s .= "<a href=\"{$url}\"{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
}
wfProfileOut( $fname );
return $s;
}
/**
* Replace anything that looks like a URL with a link
* @access private
*/
function replaceFreeExternalLinks( $text ) {
$bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
$s = array_shift( $bits );
$i = 0;
$sk =& $this->mOptions->getSkin();
while ( $i < count( $bits ) ){
$protocol = $bits[$i++];
$remainder = $bits[$i++];
if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
# Found some characters after the protocol that look promising
$url = $protocol . $m[1];
$trail = $m[2];
# Move trailing punctuation to $trail
$sep = ',;\.:!?';
# If there is no left bracket, then consider right brackets fair game too
if ( strpos( $url, '(' ) === false ) {
$sep .= ')';
}
$numSepChars = strspn( strrev( $url ), $sep );
if ( $numSepChars ) {
$trail = substr( $url, -$numSepChars ) . $trail;
$url = substr( $url, 0, -$numSepChars );
}
# Replace &amp; from obsolete syntax with &
$url = str_replace( '&amp;', '&', $url );
# Is this an external image?
$text = $this->maybeMakeImageLink( $url );
if ( $text === false ) {
# Not an image, make a link
$text = $sk->makeExternalLink( $url, $url );
}
$s .= $text . $trail;
} else {
$s .= $protocol . $remainder;
}
}
return $s;
}
/**
* make an image if it's allowed
* @access private
*/
function maybeMakeImageLink( $url ) {
$sk =& $this->mOptions->getSkin();
$text = false;
if ( $this->mOptions->getAllowExternalImages() ) {
if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
# Image found
$text = $sk->makeImage( htmlspecialchars( $url ) );
}
}
return $text;
}
/**
* Process [[ ]] wikilinks
*
* @access private
*/
function replaceInternalLinks( $s ) {
global $wgLang, $wgContLang, $wgLinkCache;
global $wgNamespacesWithSubpages;
static $fname = 'Parser::replaceInternalLinks' ;
wfProfileIn( $fname );
wfProfileIn( $fname.'-setup' );
static $tc = FALSE;
# the % is needed to support urlencoded titles as well
if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
$sk =& $this->mOptions->getSkin();
$redirect = MagicWord::get ( MAG_REDIRECT ) ;
$a = explode( '[[', ' ' . $s );
$s = array_shift( $a );
$s = substr( $s, 1 );
# Match a link having the form [[namespace:link|alternate]]trail
static $e1 = FALSE;
if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
# Match the end of a line for a word that's not followed by whitespace,
# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
# Special and Media are pseudo-namespaces; no pages actually exist in them
$nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
if ( $useLinkPrefixExtension ) {
if ( preg_match( $e2, $s, $m ) ) {
$first_prefix = $m[2];
$s = $m[1];
} else {
$first_prefix = false;
}
} else {
$prefix = '';
}
wfProfileOut( $fname.'-setup' );
# start procedeeding each line
foreach ( $a as $line ) {
wfProfileIn( $fname.'-prefixhandling' );
if ( $useLinkPrefixExtension ) {
if ( preg_match( $e2, $s, $m ) ) {
$prefix = $m[2];
$s = $m[1];
} else {
$prefix='';
}
# first link
if($first_prefix) {
$prefix = $first_prefix;
$first_prefix = false;
}
}
wfProfileOut( $fname.'-prefixhandling' );
if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
$text = $m[2];
# fix up urlencoded title texts
if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
$trail = $m[3];
} else { # Invalid form; output directly
$s .= $prefix . '[[' . $line ;
continue;
}
# Valid link forms:
# Foobar -- normal
# :Foobar -- override special treatment of prefix (images, language links)
# /Foobar -- convert to CurrentPage/Foobar
# /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
# Look at the first character
$c = substr($m[1],0,1);
$noforce = ($c != ':');
# subpage
if( $c == '/' ) {
# / at end means we don't want the slash to be shown
if(substr($m[1],-1,1)=='/') {
$m[1]=substr($m[1],1,strlen($m[1])-2);
$noslash=$m[1];
} else {
$noslash=substr($m[1],1);
}
# Some namespaces don't allow subpages
if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
# subpages allowed here
$link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
if( '' == $text ) {
$text= $m[1];
} # this might be changed for ugliness reasons
} else {
# no subpage allowed, use standard link
$link = $noslash;
}
} elseif( $noforce ) { # no subpage
$link = $m[1];
} else {
# We don't want to keep the first character
$link = substr( $m[1], 1 );
}
$wasblank = ( '' == $text );
if( $wasblank ) $text = $link;
$nt = Title::newFromText( $link );
if( !$nt ) {
$s .= $prefix . '[[' . $line;
continue;
}
$ns = $nt->getNamespace();
$iw = $nt->getInterWiki();
# Link not escaped by : , create the various objects
if( $noforce ) {
# Interwikis
if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
$tmp = $prefix . $trail ;
$s .= (trim($tmp) == '')? '': $tmp;
continue;
}
if ( $ns == NS_IMAGE ) {
$s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
$wgLinkCache->addImageLinkObj( $nt );
continue;
}
if ( $ns == NS_CATEGORY ) {
$t = $nt->getText() ;
$nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
$wgLinkCache->suspend(); # Don't save in links/brokenlinks
$pPLC=$sk->postParseLinkColour();
$sk->postParseLinkColour( false );
$t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
$sk->postParseLinkColour( $pPLC );
$wgLinkCache->resume();
if ( $wasblank ) {
if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
$sortkey = $this->mTitle->getText();
} else {
$sortkey = $this->mTitle->getPrefixedText();
}
} else {
$sortkey = $text;
}
$wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
$this->mOutput->mCategoryLinks[] = $t ;
$s .= $prefix . $trail ;
continue;
}
}
if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
( strpos( $link, '#' ) === FALSE ) ) {
# Self-links are handled specially; generally de-link and change to bold.
$s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
continue;
}
if( $ns == NS_MEDIA ) {
$s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
$wgLinkCache->addImageLinkObj( $nt );
continue;
} elseif( $ns == NS_SPECIAL ) {
$s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
continue;
}
$s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
}
wfProfileOut( $fname );
return $s;
}
/**#@+
* Used by doBlockLevels()
* @access private
*/
/* private */ function closeParagraph() {
$result = '';
if ( '' != $this->mLastSection ) {
$result = '</' . $this->mLastSection . ">\n";
}
$this->mInPre = false;
$this->mLastSection = '';
return $result;
}
# getCommon() returns the length of the longest common substring
# of both arguments, starting at the beginning of both.
#
/* private */ function getCommon( $st1, $st2 ) {
$fl = strlen( $st1 );
$shorter = strlen( $st2 );
if ( $fl < $shorter ) { $shorter = $fl; }
for ( $i = 0; $i < $shorter; ++$i ) {
if ( $st1{$i} != $st2{$i} ) { break; }
}
return $i;
}
# These next three functions open, continue, and close the list
# element appropriate to the prefix character passed into them.
#
/* private */ function openList( $char ) {
$result = $this->closeParagraph();
if ( '*' == $char ) { $result .= '<ul><li>'; }
else if ( '#' == $char ) { $result .= '<ol><li>'; }
else if ( ':' == $char ) { $result .= '<dl><dd>'; }
else if ( ';' == $char ) {
$result .= '<dl><dt>';
$this->mDTopen = true;
}
else { $result = '<!-- ERR 1 -->'; }
return $result;
}
/* private */ function nextItem( $char ) {
if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
else if ( ':' == $char || ';' == $char ) {
$close = '</dd>';
if ( $this->mDTopen ) { $close = '</dt>'; }
if ( ';' == $char ) {
$this->mDTopen = true;
return $close . '<dt>';
} else {
$this->mDTopen = false;
return $close . '<dd>';
}
}
return '<!-- ERR 2 -->';
}
/* private */ function closeList( $char ) {
if ( '*' == $char ) { $text = '</li></ul>'; }
else if ( '#' == $char ) { $text = '</li></ol>'; }
else if ( ':' == $char ) {
if ( $this->mDTopen ) {
$this->mDTopen = false;
$text = '</dt></dl>';
} else {
$text = '</dd></dl>';
}
}
else { return '<!-- ERR 3 -->'; }
return $text."\n";
}
/**#@-*/
/**
* Make lists from lines starting with ':', '*', '#', etc.
*
* @access private
* @return string the lists rendered as HTML
*/
function doBlockLevels( $text, $linestart ) {
$fname = 'Parser::doBlockLevels';
wfProfileIn( $fname );
# Parsing through the text line by line. The main thing
# happening here is handling of block-level elements p, pre,
# and making lists from lines starting with * # : etc.
#
$textLines = explode( "\n", $text );
$lastPrefix = $output = $lastLine = '';
$this->mDTopen = $inBlockElem = false;
$prefixLength = 0;
$paragraphStack = false;
if ( !$linestart ) {
$output .= array_shift( $textLines );
}
foreach ( $textLines as $oLine ) {
$lastPrefixLength = strlen( $lastPrefix );
$preCloseMatch = preg_match('/<\\/pre/i', $oLine );
$preOpenMatch = preg_match('/<pre/i', $oLine );
if ( !$this->mInPre ) {
# Multiple prefixes may abut each other for nested lists.
$prefixLength = strspn( $oLine, '*#:;' );
$pref = substr( $oLine, 0, $prefixLength );
# eh?
$pref2 = str_replace( ';', ':', $pref );
$t = substr( $oLine, $prefixLength );
$this->mInPre = !empty($preOpenMatch);
} else {
# Don't interpret any other prefixes in preformatted text
$prefixLength = 0;
$pref = $pref2 = '';
$t = $oLine;
}
# List generation
if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
# Same as the last item, so no need to deal with nesting or opening stuff
$output .= $this->nextItem( substr( $pref, -1 ) );
$paragraphStack = false;
if ( substr( $pref, -1 ) == ';') {
# The one nasty exception: definition lists work like this:
# ; title : definition text
# So we check for : in the remainder text to split up the
# title and definition, without b0rking links.
# FIXME: This is not foolproof. Something better in Tokenizer might help.
if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
$term = $match[1];
$output .= $term . $this->nextItem( ':' );
$t = $match[2];
}
}
} elseif( $prefixLength || $lastPrefixLength ) {
# Either open or close a level...
$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
$paragraphStack = false;
while( $commonPrefixLength < $lastPrefixLength ) {
$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
--$lastPrefixLength;
}
if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
}
while ( $prefixLength > $commonPrefixLength ) {
$char = substr( $pref, $commonPrefixLength, 1 );
$output .= $this->openList( $char );
if ( ';' == $char ) {
# FIXME: This is dupe of code above
if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
$term = $match[1];
$output .= $term . $this->nextItem( ':' );
$t = $match[2];
}
}
++$commonPrefixLength;
}
$lastPrefix = $pref2;
}
if( 0 == $prefixLength ) {
# No prefix (not in list)--go to paragraph mode
$uniq_prefix = UNIQ_PREFIX;
// XXX: use a stack for nestable elements like span, table and div
$openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
$closematch = preg_match(
'/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
'<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
if ( $openmatch or $closematch ) {
$paragraphStack = false;
$output .= $this->closeParagraph();
if($preOpenMatch and !$preCloseMatch) {
$this->mInPre = true;
}
if ( $closematch ) {
$inBlockElem = false;
} else {
$inBlockElem = true;
}
} else if ( !$inBlockElem && !$this->mInPre ) {
if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
// pre
if ($this->mLastSection != 'pre') {
$paragraphStack = false;
$output .= $this->closeParagraph().'<pre>';
$this->mLastSection = 'pre';
}
$t = substr( $t, 1 );
} else {
// paragraph
if ( '' == trim($t) ) {
if ( $paragraphStack ) {
$output .= $paragraphStack.'<br />';
$paragraphStack = false;
$this->mLastSection = 'p';
} else {
if ($this->mLastSection != 'p' ) {
$output .= $this->closeParagraph();
$this->mLastSection = '';
$paragraphStack = '<p>';
} else {
$paragraphStack = '</p><p>';
}
}
} else {
if ( $paragraphStack ) {
$output .= $paragraphStack;
$paragraphStack = false;
$this->mLastSection = 'p';
} else if ($this->mLastSection != 'p') {
$output .= $this->closeParagraph().'<p>';
$this->mLastSection = 'p';
}
}
}
}
}
if ($paragraphStack === false) {
$output .= $t."\n";
}
}
while ( $prefixLength ) {
$output .= $this->closeList( $pref2{$prefixLength-1} );
--$prefixLength;
}
if ( '' != $this->mLastSection ) {
$output .= '</' . $this->mLastSection . '>';
$this->mLastSection = '';
}
wfProfileOut( $fname );
return $output;
}
/**
* Return value of a magic variable (like PAGENAME)
*
* @access private
*/
function getVariableValue( $index ) {
global $wgContLang, $wgSitename, $wgServer;
switch ( $index ) {
case MAG_CURRENTMONTH:
return $wgContLang->formatNum( date( 'm' ) );
case MAG_CURRENTMONTHNAME:
return $wgContLang->getMonthName( date('n') );
case MAG_CURRENTMONTHNAMEGEN:
return $wgContLang->getMonthNameGen( date('n') );
case MAG_CURRENTDAY:
return $wgContLang->formatNum( date('j') );
case MAG_PAGENAME:
return $this->mTitle->getText();
case MAG_PAGENAMEE:
return $this->mTitle->getPartialURL();
case MAG_NAMESPACE:
# return Namespace::getCanonicalName($this->mTitle->getNamespace());
return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
case MAG_CURRENTDAYNAME:
return $wgContLang->getWeekdayName( date('w')+1 );
case MAG_CURRENTYEAR:
return $wgContLang->formatNum( date( 'Y' ) );
case MAG_CURRENTTIME:
return $wgContLang->time( wfTimestampNow(), false );
case MAG_NUMBEROFARTICLES:
return $wgContLang->formatNum( wfNumberOfArticles() );
case MAG_SITENAME:
return $wgSitename;
case MAG_SERVER:
return $wgServer;
default:
return NULL;
}
}
/**
* initialise the magic variables (like CURRENTMONTHNAME)
*
* @access private
*/
function initialiseVariables() {
global $wgVariableIDs;
$this->mVariables = array();
foreach ( $wgVariableIDs as $id ) {
$mw =& MagicWord::get( $id );
$mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
}
}
/**
* Replace magic variables, templates, and template arguments
* with the appropriate text. Templates are substituted recursively,
* taking care to avoid infinite loops.
*
* Note that the substitution depends on value of $mOutputType:
* OT_WIKI: only {{subst:}} templates
* OT_MSG: only magic variables
* OT_HTML: all templates and magic variables
*
* @param string $tex The text to transform
* @param array $args Key-value pairs representing template parameters to substitute
* @access private
*/
function replaceVariables( $text, $args = array() ) {
global $wgLang, $wgScript, $wgArticlePath;
# Prevent too big inclusions
if(strlen($text)> MAX_INCLUDE_SIZE)
return $text;
$fname = 'Parser::replaceVariables';
wfProfileIn( $fname );
$titleChars = Title::legalChars();
# This function is called recursively. To keep track of arguments we need a stack:
array_push( $this->mArgStack, $args );
# PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
$GLOBALS['wgCurParser'] =& $this;
# Variable substitution
$text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
# Argument substitution
$text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
}
# Template substitution
$regex = '/{{(['.$titleChars.']*)(\\|.*?|)}}/s';
$text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
array_pop( $this->mArgStack );
wfProfileOut( $fname );
return $text;
}
/**
* Replace magic variables
* @access private
*/
function variableSubstitution( $matches ) {
if ( !$this->mVariables ) {
$this->initialiseVariables();
}
$skip = false;
if ( $this->mOutputType == OT_WIKI ) {
# Do only magic variables prefixed by SUBST
$mwSubst =& MagicWord::get( MAG_SUBST );
if (!$mwSubst->matchStartAndRemove( $matches[1] ))
$skip = true;
# Note that if we don't substitute the variable below,
# we don't remove the {{subst:}} magic word, in case
# it is a template rather than a magic variable.
}
if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
$text = $this->mVariables[$matches[1]];
$this->mOutput->mContainsOldMagic = true;
} else {
$text = $matches[0];
}
return $text;
}
# Split template arguments
function getTemplateArgs( $argsString ) {
if ( $argsString === '' ) {
return array();
}
$args = explode( '|', substr( $argsString, 1 ) );
# If any of the arguments contains a '[[' but no ']]', it needs to be
# merged with the next arg because the '|' character between belongs
# to the link syntax and not the template parameter syntax.
$argc = count($args);
$i = 0;
for ( $i = 0; $i < $argc-1; $i++ ) {
if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
$args[$i] .= '|'.$args[$i+1];
array_splice($args, $i+1, 1);
$i--;
$argc--;
}
}
return $args;
}
/**
* Return the text of a template, after recursively
* replacing any variables or templates within the template.
*
* @param array $matches The parts of the template
* $matches[1]: the title, i.e. the part before the |
* $matches[2]: the parameters (including a leading |), if any
* @return string the text of the template
* @access private
*/
function braceSubstitution( $matches ) {
global $wgLinkCache, $wgContLang;
$fname = 'Parser::braceSubstitution';
$found = false;
$nowiki = false;
$noparse = false;
$itcamefromthedatabase = false;
$title = NULL;
# $part1 is the bit before the first |, and must contain only title characters
# $args is a list of arguments, starting from index 0, not including $part1
$part1 = $matches[1];
# If the second subpattern matched anything, it will start with |
$args = $this->getTemplateArgs($matches[2]);
$argc = count( $args );
# {{{}}}
if ( strpos( $matches[0], '{{{' ) !== false ) {
$text = $matches[0];
$found = true;
$noparse = true;
}
# SUBST
if ( !$found ) {
$mwSubst =& MagicWord::get( MAG_SUBST );
if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
# One of two possibilities is true:
# 1) Found SUBST but not in the PST phase
# 2) Didn't find SUBST and in the PST phase
# In either case, return without further processing
$text = $matches[0];
$found = true;
$noparse = true;
}
}
# MSG, MSGNW and INT
if ( !$found ) {
# Check for MSGNW:
$mwMsgnw =& MagicWord::get( MAG_MSGNW );
if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
$nowiki = true;
} else {
# Remove obsolete MSG:
$mwMsg =& MagicWord::get( MAG_MSG );
$mwMsg->matchStartAndRemove( $part1 );
}
# Check if it is an internal message
$mwInt =& MagicWord::get( MAG_INT );
if ( $mwInt->matchStartAndRemove( $part1 ) ) {
if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
$text = wfMsgReal( $part1, $args, true );
$found = true;
}
}
}
# NS
if ( !$found ) {
# Check for NS: (namespace expansion)
$mwNs = MagicWord::get( MAG_NS );
if ( $mwNs->matchStartAndRemove( $part1 ) ) {
if ( intval( $part1 ) ) {
$text = $wgContLang->getNsText( intval( $part1 ) );
$found = true;
} else {
$index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
if ( !is_null( $index ) ) {
$text = $wgContLang->getNsText( $index );
$found = true;
}
}
}
}
# LOCALURL and LOCALURLE
if ( !$found ) {
$mwLocal = MagicWord::get( MAG_LOCALURL );
$mwLocalE = MagicWord::get( MAG_LOCALURLE );
if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
$func = 'getLocalURL';
} elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
$func = 'escapeLocalURL';
} else {
$func = '';
}
if ( $func !== '' ) {
$title = Title::newFromText( $part1 );
if ( !is_null( $title ) ) {
if ( $argc > 0 ) {
$text = $title->$func( $args[0] );
} else {
$text = $title->$func();
}
$found = true;
}
}
}
# GRAMMAR
if ( !$found && $argc == 1 ) {
$mwGrammar =& MagicWord::get( MAG_GRAMMAR );
if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
$text = $wgContLang->convertGrammar( $args[0], $part1 );
$found = true;
}
}
# Template table test
# Did we encounter this template already? If yes, it is in the cache
# and we need to check for loops.
if ( !$found && isset( $this->mTemplates[$part1] ) ) {
# Infinite loop test
if ( isset( $this->mTemplatePath[$part1] ) ) {
$noparse = true;
$found = true;
}
# set $text to cached message.
$text = $this->mTemplates[$part1];
$found = true;
}
# Load from database
if ( !$found ) {
$title = Title::newFromText( $part1, NS_TEMPLATE );
if ( !is_null( $title ) && !$title->isExternal() ) {
# Check for excessive inclusion
$dbk = $title->getPrefixedDBkey();
if ( $this->incrementIncludeCount( $dbk ) ) {
# This should never be reached.
$article = new Article( $title );
$articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
if ( $articleContent !== false ) {
$found = true;
$text = $articleContent;
$itcamefromthedatabase = true;
}
}
# If the title is valid but undisplayable, make a link to it
if ( $this->mOutputType == OT_HTML && !$found ) {
$text = '[['.$title->getPrefixedText().']]';
$found = true;
}
# Template cache array insertion
$this->mTemplates[$part1] = $text;
}
}
# Recursive parsing, escaping and link table handling
# Only for HTML output
if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
$text = wfEscapeWikiText( $text );
} elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
# Clean up argument array
$assocArgs = array();
$index = 1;
foreach( $args as $arg ) {
$eqpos = strpos( $arg, '=' );
if ( $eqpos === false ) {
$assocArgs[$index++] = $arg;
} else {
$name = trim( substr( $arg, 0, $eqpos ) );
$value = trim( substr( $arg, $eqpos+1 ) );
if ( $value === false ) {
$value = '';
}
if ( $name !== false ) {
$assocArgs[$name] = $value;
}
}
}
# Add a new element to the templace recursion path
$this->mTemplatePath[$part1] = 1;
$text = $this->strip( $text, $this->mStripState );
$text = $this->removeHTMLtags( $text );
$text = $this->replaceVariables( $text, $assocArgs );
# Resume the link cache and register the inclusion as a link
if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
$wgLinkCache->addLinkObj( $title );
}
}
# Empties the template path
$this->mTemplatePath = array();
if ( !$found ) {
return $matches[0];
} else {
# replace ==section headers==
# XXX this needs to go away once we have a better parser.
if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
if( !is_null( $title ) )
$encodedname = base64_encode($title->getPrefixedDBkey());
else
$encodedname = base64_encode("");
$matches = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
PREG_SPLIT_DELIM_CAPTURE);
$text = '';
$nsec = 0;
for( $i = 0; $i < count($matches); $i += 2 ) {
$text .= $matches[$i];
if (!isset($matches[$i + 1]) || $matches[$i + 1] == "") continue;
$hl = $matches[$i + 1];
if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
$text .= $hl;
continue;
}
preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
$text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
. $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
$nsec++;
}
}
return $text;
}
}
/**
* Triple brace replacement -- used for template arguments
* @access private
*/
function argSubstitution( $matches ) {
$arg = trim( $matches[1] );
$text = $matches[0];
$inputArgs = end( $this->mArgStack );
if ( array_key_exists( $arg, $inputArgs ) ) {
$text = $inputArgs[$arg];
}
return $text;
}
/**
* Returns true if the function is allowed to include this entity
* @access private
*/
function incrementIncludeCount( $dbk ) {
if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
$this->mIncludeCount[$dbk] = 0;
}
if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
return true;
} else {
return false;
}
}
/**
* Cleans up HTML, removes dangerous tags and attributes, and
* removes HTML comments
* @access private
*/
function removeHTMLtags( $text ) {
global $wgUseTidy, $wgUserHtml;
$fname = 'Parser::removeHTMLtags';
wfProfileIn( $fname );
if( $wgUserHtml ) {
$htmlpairs = array( # Tags that must be closed
'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
'ruby', 'rt' , 'rb' , 'rp', 'p'
);
$htmlsingle = array(
'br', 'hr', 'li', 'dt', 'dd'
);
$htmlnest = array( # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'dl', 'font', 'big', 'small', 'sub', 'sup'
);
$tabletags = array( # Can only appear inside table
'td', 'th', 'tr'
);
} else {
$htmlpairs = array();
$htmlsingle = array();
$htmlnest = array();
$tabletags = array();
}
$htmlsingle = array_merge( $tabletags, $htmlsingle );
$htmlelements = array_merge( $htmlsingle, $htmlpairs );
$htmlattrs = $this->getHTMLattrs () ;
# Remove HTML comments
$text = $this->removeHTMLcomments( $text );
$bits = explode( '<', $text );
$text = array_shift( $bits );
if(!$wgUseTidy) {
$tagstack = array(); $tablestack = array();
foreach ( $bits as $x ) {
$prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
error_reporting( $prev );
$badtag = 0 ;
if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
# Check our stack
if ( $slash ) {
# Closing a tag...
if ( ! in_array( $t, $htmlsingle ) &&
( $ot = @array_pop( $tagstack ) ) != $t ) {
@array_push( $tagstack, $ot );
$badtag = 1;
} else {
if ( $t == 'table' ) {
$tagstack = array_pop( $tablestack );
}
$newparams = '';
}
} else {
# Keep track for later
if ( in_array( $t, $tabletags ) &&
! in_array( 'table', $tagstack ) ) {
$badtag = 1;
} else if ( in_array( $t, $tagstack ) &&
! in_array ( $t , $htmlnest ) ) {
$badtag = 1 ;
} else if ( ! in_array( $t, $htmlsingle ) ) {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
$tagstack = array();
}
array_push( $tagstack, $t );
}
# Strip non-approved attributes from the tag
$newparams = $this->fixTagAttributes($params);
}
if ( ! $badtag ) {
$rest = str_replace( '>', '&gt;', $rest );
$text .= "<$slash$t $newparams$brace$rest";
continue;
}
}
$text .= '&lt;' . str_replace( '>', '&gt;', $x);
}
# Close off any remaining tags
while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
$text .= "</$t>\n";
if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
}
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
@list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
$newparams = $this->fixTagAttributes($params);
$rest = str_replace( '>', '&gt;', $rest );
$text .= "<$slash$t $newparams$brace$rest";
} else {
$text .= '&lt;' . str_replace( '>', '&gt;', $x);
}
}
}
wfProfileOut( $fname );
return $text;
}
/**
* Remove '<!--', '-->', and everything between.
* To avoid leaving blank lines, when a comment is both preceded
* and followed by a newline (ignoring spaces), trim leading and
* trailing spaces and one of the newlines.
*
* @access private
*/
function removeHTMLcomments( $text ) {
$fname='Parser::removeHTMLcomments';
wfProfileIn( $fname );
while (($start = strpos($text, '<!--')) !== false) {
$end = strpos($text, '-->', $start + 4);
if ($end === false) {
# Unterminated comment; bail out
break;
}
$end += 3;
# Trim space and newline if the comment is both
# preceded and followed by a newline
$spaceStart = max($start - 1, 0);
$spaceLen = $end - $spaceStart;
while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
$spaceStart--;
$spaceLen++;
}
while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
$spaceLen++;
if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
# Remove the comment, leading and trailing
# spaces, and leave only one newline.
$text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
}
else {
# Remove just the comment.
$text = substr_replace($text, '', $start, $end - $start);
}
}
wfProfileOut( $fname );
return $text;
}
/**
* This function accomplishes several tasks:
* 1) Auto-number headings if that option is enabled
* 2) Add an [edit] link to sections for logged in users who have enabled the option
* 3) Add a Table of contents on the top for users who have enabled the option
* 4) Auto-anchor headings
*
* It loops through all headlines, collects the necessary data, then splits up the
* string and re-inserts the newly formatted headlines.
* @access private
*/
/* private */ function formatHeadings( $text, $isMain=true ) {
global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
$doNumberHeadings = $this->mOptions->getNumberHeadings();
$doShowToc = $this->mOptions->getShowToc();
$forceTocHere = false;
if( !$this->mTitle->userCanEdit() ) {
$showEditLink = 0;
$rightClickHack = 0;
} else {
$showEditLink = $this->mOptions->getEditSection();
$rightClickHack = $this->mOptions->getEditSectionOnRightClick();
}
# Inhibit editsection links if requested in the page
$esw =& MagicWord::get( MAG_NOEDITSECTION );
if( $esw->matchAndRemove( $text ) ) {
$showEditLink = 0;
}
# if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
# do not add TOC
$mw =& MagicWord::get( MAG_NOTOC );
if( $mw->matchAndRemove( $text ) ) {
$doShowToc = 0;
}
# never add the TOC to the Main Page. This is an entry page that should not
# be more than 1-2 screens large anyway
if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
$doShowToc = 0;
}
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
$numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
# if there are fewer than 4 headlines in the article, do not show TOC
if( $numMatches < 4 ) {
$doShowToc = 0;
}
# if the string __TOC__ (not case-sensitive) occurs in the HTML,
# override above conditions and always show TOC at that place
$mw =& MagicWord::get( MAG_TOC );
if ($mw->match( $text ) ) {
$doShowToc = 1;
$forceTocHere = true;
} else {
# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
# override above conditions and always show TOC above first header
$mw =& MagicWord::get( MAG_FORCETOC );
if ($mw->matchAndRemove( $text ) ) {
$doShowToc = 1;
}
}
# We need this to perform operations on the HTML
$sk =& $this->mOptions->getSkin();
# headline counter
$headlineCount = 0;
$sectionCount = 0; # headlineCount excluding template sections
# Ugh .. the TOC should have neat indentation levels which can be
# passed to the skin functions. These are determined here
$toclevel = 0;
$toc = '';
$full = '';
$head = array();
$sublevelCount = array();
$level = 0;
$prevlevel = 0;
foreach( $matches[3] as $headline ) {
$istemplate = 0;
$templatetitle = "";
$templatesection = 0;
if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
$istemplate = 1;
$templatetitle = base64_decode($mat[1]);
$templatesection = 1 + (int)base64_decode($mat[2]);
$headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
}
$numbering = '';
if( $level ) {
$prevlevel = $level;
}
$level = $matches[1][$headlineCount];
if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
# reset when we enter a new level
$sublevelCount[$level] = 0;
$toc .= $sk->tocIndent( $level - $prevlevel );
$toclevel += $level - $prevlevel;
}
if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
# reset when we step back a level
$sublevelCount[$level+1]=0;
$toc .= $sk->tocUnindent( $prevlevel - $level );
$toclevel -= $prevlevel - $level;
}
# count number of headlines for each level
@$sublevelCount[$level]++;
if( $doNumberHeadings || $doShowToc ) {
$dot = 0;
for( $i = 1; $i <= $level; $i++ ) {
if( !empty( $sublevelCount[$i] ) ) {
if( $dot ) {
$numbering .= '.';
}
$numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
$dot = 1;
}
}
}
# The canonized header is a version of the header text safe to use for links
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
$canonized_headline = $this->unstrip( $headline, $this->mStripState );
$canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
# Remove link placeholders by the link text.
# <!--LINK number-->
# turns into
# link text with suffix
$canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
"\$wgLinkHolders['texts'][\$1]",
$canonized_headline );
# strip out HTML
$canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
$tocline = trim( $canonized_headline );
$canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
$replacearray = array(
'%3A' => ':',
'%' => '.'
);
$canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
$refer[$headlineCount] = $canonized_headline;
# count how many in assoc. array so we can track dupes in anchors
@$refers[$canonized_headline]++;
$refcount[$headlineCount]=$refers[$canonized_headline];
# Prepend the number to the heading text
if( $doNumberHeadings || $doShowToc ) {
$tocline = $numbering . ' ' . $tocline;
# Don't number the heading if it is the only one (looks silly)
if( $doNumberHeadings && count( $matches[3] ) > 1) {
# the two are different if the line contains a link
$headline=$numbering . ' ' . $headline;
}
}
# Create the anchor for linking from the TOC to the section
$anchor = $canonized_headline;
if($refcount[$headlineCount] > 1 ) {
$anchor .= '_' . $refcount[$headlineCount];
}
if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
$toc .= $sk->tocLine($anchor,$tocline,$toclevel);
}
if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
if ( empty( $head[$headlineCount] ) ) {
$head[$headlineCount] = '';
}
if( $istemplate )
$head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
else
$head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
}
# Add the edit section span
if( $rightClickHack ) {
if( $istemplate )
$headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
else
$headline = $sk->editSectionScript($sectionCount+1,$headline);
}
# give headline the correct <h#> tag
@$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
$headlineCount++;
if( !$istemplate )
$sectionCount++;
}
if( $doShowToc ) {
$toclines = $headlineCount;
$toc .= $sk->tocUnindent( $toclevel );
$toc = $sk->tocTable( $toc );
}
# split up and insert constructed headlines
$blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
$i = 0;
foreach( $blocks as $block ) {
if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
# This is the [edit] link that appears for the top block of text when
# section editing is enabled
# Disabled because it broke block formatting
# For example, a bullet point in the top line
# $full .= $sk->editSectionLink(0);
}
$full .= $block;
if( $doShowToc && !$i && $isMain && !$forceTocHere) {
# Top anchor now in skin
$full = $full.$toc;
}
if( !empty( $head[$i] ) ) {
$full .= $head[$i];
}
$i++;
}
if($forceTocHere) {
$mw =& MagicWord::get( MAG_TOC );
return $mw->replace( $toc, $full );
} else {
return $full;
}
}
/**
* Return an HTML link for the "ISBN 123456" text
* @access private
*/
function magicISBN( $text ) {
global $wgLang;
$fname = 'Parser::magicISBN';
wfProfileIn( $fname );
$a = split( 'ISBN ', ' '.$text );
if ( count ( $a ) < 2 ) {
wfProfileOut( $fname );
return $text;
}
$text = substr( array_shift( $a ), 1);
$valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
foreach ( $a as $x ) {
$isbn = $blank = '' ;
while ( ' ' == $x{0} ) {
$blank .= ' ';
$x = substr( $x, 1 );
}
if ( $x == '' ) { # blank isbn
$text .= "ISBN $blank";
continue;
}
while ( strstr( $valid, $x{0} ) != false ) {
$isbn .= $x{0};
$x = substr( $x, 1 );
}
$num = str_replace( '-', '', $isbn );
$num = str_replace( ' ', '', $num );
if ( '' == $num ) {
$text .= "ISBN $blank$x";
} else {
$titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
$text .= '<a href="' .
$titleObj->escapeLocalUrl( 'isbn='.$num ) .
"\" class=\"internal\">ISBN $isbn</a>";
$text .= $x;
}
}
wfProfileOut( $fname );
return $text;
}
/**
* Return an HTML link for the "GEO ..." text
* @access private
*/
function magicGEO( $text ) {
global $wgLang, $wgUseGeoMode;
$fname = 'Parser::magicGEO';
wfProfileIn( $fname );
# These next five lines are only for the ~35000 U.S. Census Rambot pages...
$directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
$text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
$text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
$text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
$text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
$a = split( 'GEO ', ' '.$text );
if ( count ( $a ) < 2 ) {
wfProfileOut( $fname );
return $text;
}
$text = substr( array_shift( $a ), 1);
$valid = '0123456789.+-:';
foreach ( $a as $x ) {
$geo = $blank = '' ;
while ( ' ' == $x{0} ) {
$blank .= ' ';
$x = substr( $x, 1 );
}
while ( strstr( $valid, $x{0} ) != false ) {
$geo .= $x{0};
$x = substr( $x, 1 );
}
$num = str_replace( '+', '', $geo );
$num = str_replace( ' ', '', $num );
if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
$text .= "GEO $blank$x";
} else {
$titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
$text .= '<a href="' .
$titleObj->escapeLocalUrl( 'coordinates='.$num ) .
"\" class=\"internal\">GEO $geo</a>";
$text .= $x;
}
}
wfProfileOut( $fname );
return $text;
}
/**
* Return an HTML link for the "RFC 1234" text
* @access private
* @param string $text text to be processed
*/
function magicRFC( $text ) {
global $wgLang;
$valid = '0123456789';
$internal = false;
$a = split( 'RFC ', ' '.$text );
if ( count ( $a ) < 2 ) return $text;
$text = substr( array_shift( $a ), 1);
/* Check if RFC keyword is preceed by [[.
* This test is made here cause of the array_shift above
* that prevent the test to be done in the foreach.
*/
if(substr($text, -2) == '[[') { $internal = true; }
foreach ( $a as $x ) {
/* token might be empty if we have RFC RFC 1234 */
if($x=='') {
$text.='RFC ';
continue;
}
$rfc = $blank = '' ;
/** remove and save whitespaces in $blank */
while ( $x{0} == ' ' ) {
$blank .= ' ';
$x = substr( $x, 1 );
}
/** remove and save the rfc number in $rfc */
while ( strstr( $valid, $x{0} ) != false ) {
$rfc .= $x{0};
$x = substr( $x, 1 );
}
if ( $rfc == '') {
/* call back stripped spaces*/
$text .= "RFC $blank$x";
} elseif( $internal) {
/* normal link */
$text .= "RFC $rfc$x";
} else {
/* build the external link*/
$url = wfmsg( 'rfcurl' );
$url = str_replace( '$1', $rfc, $url);
$sk =& $this->mOptions->getSkin();
$la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
$text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
}
/* Check if the next RFC keyword is preceed by [[ */
$internal = (substr($x,-2) == '[[');
}
return $text;
}
/**
* Transform wiki markup when saving a page by doing \r\n -> \n
* conversion, substitting signatures, {{subst:}} templates, etc.
*
* @param string $text the text to transform
* @param Title &$title the Title object for the current article
* @param User &$user the User object describing the current user
* @param ParserOptions $options parsing options
* @param bool $clearState whether to clear the parser state first
* @return string the altered wiki markup
* @access public
*/
function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
$this->mOptions = $options;
$this->mTitle =& $title;
$this->mOutputType = OT_WIKI;
if ( $clearState ) {
$this->clearState();
}
$stripState = false;
$pairs = array(
"\r\n" => "\n",
);
$text = str_replace(array_keys($pairs), array_values($pairs), $text);
// now with regexes
/*
$pairs = array(
"/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
"/<br *?>/i" => "<br />",
);
$text = preg_replace(array_keys($pairs), array_values($pairs), $text);
*/
$text = $this->strip( $text, $stripState, false );
$text = $this->pstPass2( $text, $user );
$text = $this->unstrip( $text, $stripState );
$text = $this->unstripNoWiki( $text, $stripState );
return $text;
}
/**
* Pre-save transform helper function
* @access private
*/
function pstPass2( $text, &$user ) {
global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
# Variable replacement
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
$text = $this->replaceVariables( $text );
# Signatures
#
$n = $user->getName();
$k = $user->getOption( 'nickname' );
if ( '' == $k ) { $k = $n; }
if(isset($wgLocaltimezone)) {
$oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
}
/* Note: this is an ugly timezone hack for the European wikis */
$d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
' (' . date( 'T' ) . ')';
if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
$text = preg_replace( '/~~~~~/', $d, $text );
$text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
$text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
# Context links: [[|name]] and [[name (context)|]]
#
$tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
$np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
$namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
$conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
$p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
$p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
$p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
$p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
$context = '';
$t = $this->mTitle->getText();
if ( preg_match( $conpat, $t, $m ) ) {
$context = $m[2];
}
$text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
$text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
$text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
if ( '' == $context ) {
$text = preg_replace( $p2, '[[\\1]]', $text );
} else {
$text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
}
# Trim trailing whitespace
# MAG_END (__END__) tag allows for trailing
# whitespace to be deliberately included
$text = rtrim( $text );
$mw =& MagicWord::get( MAG_END );
$mw->matchAndRemove( $text );
return $text;
}
/**
* Set up some variables which are usually set up in parse()
* so that an external function can call some class members with confidence
* @access public
*/
function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
$this->mTitle =& $title;
$this->mOptions = $options;
$this->mOutputType = $outputType;
if ( $clearState ) {
$this->clearState();
}
}
/**
* Transform a MediaWiki message by replacing magic variables.
*
* @param string $text the text to transform
* @param ParserOptions $options options
* @return string the text with variables substituted
* @access public
*/
function transformMsg( $text, $options ) {
global $wgTitle;
static $executing = false;
# Guard against infinite recursion
if ( $executing ) {
return $text;
}
$executing = true;
$this->mTitle = $wgTitle;
$this->mOptions = $options;
$this->mOutputType = OT_MSG;
$this->clearState();
$text = $this->replaceVariables( $text );
$executing = false;
return $text;
}
/**
* Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
* Callback will be called with the text within
* Transform and return the text within
* @access public
*/
function setHook( $tag, $callback ) {
$oldVal = @$this->mTagHooks[$tag];
$this->mTagHooks[$tag] = $callback;
return $oldVal;
}
}
/**
* @todo document
* @package MediaWiki
*/
class ParserOutput
{
var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
var $mCacheTime; # Used in ParserCache
function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
$containsOldMagic = false )
{
$this->mText = $text;
$this->mLanguageLinks = $languageLinks;
$this->mCategoryLinks = $categoryLinks;
$this->mContainsOldMagic = $containsOldMagic;
$this->mCacheTime = '';
}
function getText() { return $this->mText; }
function getLanguageLinks() { return $this->mLanguageLinks; }
function getCategoryLinks() { return $this->mCategoryLinks; }
function getCacheTime() { return $this->mCacheTime; }
function containsOldMagic() { return $this->mContainsOldMagic; }
function setText( $text ) { return wfSetVar( $this->mText, $text ); }
function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
function merge( $other ) {
$this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
$this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
$this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
}
}
/**
* Set options of the Parser
* @todo document
* @package MediaWiki
*/
class ParserOptions
{
# All variables are private
var $mUseTeX; # Use texvc to expand <math> tags
var $mUseDynamicDates; # Use $wgDateFormatter to format dates
var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
var $mAllowExternalImages; # Allow external images inline
var $mSkin; # Reference to the preferred skin
var $mDateFormat; # Date format index
var $mEditSection; # Create "edit section" links
var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
var $mNumberHeadings; # Automatically number headings
var $mShowToc; # Show table of contents
function getUseTeX() { return $this->mUseTeX; }
function getUseDynamicDates() { return $this->mUseDynamicDates; }
function getInterwikiMagic() { return $this->mInterwikiMagic; }
function getAllowExternalImages() { return $this->mAllowExternalImages; }
function getSkin() { return $this->mSkin; }
function getDateFormat() { return $this->mDateFormat; }
function getEditSection() { return $this->mEditSection; }
function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
function getNumberHeadings() { return $this->mNumberHeadings; }
function getShowToc() { return $this->mShowToc; }
function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
function setSkin( &$x ) { $this->mSkin =& $x; }
# Get parser options
/* static */ function newFromUser( &$user ) {
$popts = new ParserOptions;
$popts->initialiseFromUser( $user );
return $popts;
}
# Get user options
function initialiseFromUser( &$userInput ) {
global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
$fname = 'ParserOptions::initialiseFromUser';
wfProfileIn( $fname );
if ( !$userInput ) {
$user = new User;
$user->setLoaded( true );
} else {
$user =& $userInput;
}
$this->mUseTeX = $wgUseTeX;
$this->mUseDynamicDates = $wgUseDynamicDates;
$this->mInterwikiMagic = $wgInterwikiMagic;
$this->mAllowExternalImages = $wgAllowExternalImages;
wfProfileIn( $fname.'-skin' );
$this->mSkin =& $user->getSkin();
wfProfileOut( $fname.'-skin' );
$this->mDateFormat = $user->getOption( 'date' );
$this->mEditSection = $user->getOption( 'editsection' );
$this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
$this->mNumberHeadings = $user->getOption( 'numberheadings' );
$this->mShowToc = $user->getOption( 'showtoc' );
wfProfileOut( $fname );
}
}
# Regex callbacks, used in Parser::replaceVariables
function wfBraceSubstitution( $matches ) {
global $wgCurParser;
return $wgCurParser->braceSubstitution( $matches );
}
function wfArgSubstitution( $matches ) {
global $wgCurParser;
return $wgCurParser->argSubstitution( $matches );
}
function wfVariableSubstitution( $matches ) {
global $wgCurParser;
return $wgCurParser->variableSubstitution( $matches );
}
/**
* Return the total number of articles
*/
function wfNumberOfArticles() {
global $wgNumberOfArticles;
wfLoadSiteStats();
return $wgNumberOfArticles;
}
/**
* Get various statistics from the database
* @private
*/
function wfLoadSiteStats() {
global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
$fname = 'wfLoadSiteStats';
if ( -1 != $wgNumberOfArticles ) return;
$dbr =& wfGetDB( DB_SLAVE );
$s = $dbr->getArray( 'site_stats',
array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
array( 'ss_row_id' => 1 ), $fname
);
if ( $s === false ) {
return;
} else {
$wgTotalViews = $s->ss_total_views;
$wgTotalEdits = $s->ss_total_edits;
$wgNumberOfArticles = $s->ss_good_articles;
}
}
function wfEscapeHTMLTagsOnly( $in ) {
return str_replace(
array( '"', '>', '<' ),
array( '&quot;', '&gt;', '&lt;' ),
$in );
}
?>