Implicitly marking parameter $... as nullable is deprecated in php8.4, the explicit nullable type must be used instead Created with autofix from Ide15839e98a6229c22584d1c1c88c690982e1d7a Break one long line in SpecialPage.php Bug: T376276 Change-Id: I807257b2ba1ab2744ab74d9572c9c3d3ac2a968e
545 lines
17 KiB
PHP
545 lines
17 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\CommentFormatter;
|
|
|
|
use File;
|
|
use HtmlArmor;
|
|
use MediaWiki\Cache\LinkBatch;
|
|
use MediaWiki\Cache\LinkBatchFactory;
|
|
use MediaWiki\Cache\LinkCache;
|
|
use MediaWiki\HookContainer\HookContainer;
|
|
use MediaWiki\HookContainer\HookRunner;
|
|
use MediaWiki\Html\Html;
|
|
use MediaWiki\Language\Language;
|
|
use MediaWiki\Linker\Linker;
|
|
use MediaWiki\Linker\LinkRenderer;
|
|
use MediaWiki\Linker\LinkTarget;
|
|
use MediaWiki\Parser\Parser;
|
|
use MediaWiki\Parser\Sanitizer;
|
|
use MediaWiki\SpecialPage\SpecialPage;
|
|
use MediaWiki\Title\MalformedTitleException;
|
|
use MediaWiki\Title\NamespaceInfo;
|
|
use MediaWiki\Title\Title;
|
|
use MediaWiki\Title\TitleParser;
|
|
use MediaWiki\Title\TitleValue;
|
|
use MediaWiki\WikiMap\WikiMap;
|
|
use RepoGroup;
|
|
use StringUtils;
|
|
|
|
/**
|
|
* The text processing backend for CommentFormatter.
|
|
*
|
|
* CommentParser objects should be discarded after the comment batch is
|
|
* complete, in order to reduce memory usage.
|
|
*
|
|
* @internal
|
|
*/
|
|
class CommentParser {
|
|
/** @var LinkRenderer */
|
|
private $linkRenderer;
|
|
/** @var LinkBatchFactory */
|
|
private $linkBatchFactory;
|
|
/** @var RepoGroup */
|
|
private $repoGroup;
|
|
/** @var Language */
|
|
private $userLang;
|
|
/** @var Language */
|
|
private $contLang;
|
|
/** @var TitleParser */
|
|
private $titleParser;
|
|
/** @var NamespaceInfo */
|
|
private $namespaceInfo;
|
|
/** @var HookRunner */
|
|
private $hookRunner;
|
|
/** @var LinkCache */
|
|
private $linkCache;
|
|
|
|
/** @var callable[] */
|
|
private $links = [];
|
|
/** @var LinkBatch|null */
|
|
private $linkBatch;
|
|
|
|
/** @var array Input to RepoGroup::findFiles() */
|
|
private $fileBatch;
|
|
/** @var File[] Resolved File objects indexed by DB key */
|
|
private $files = [];
|
|
|
|
/** @var int The maximum number of digits in a marker ID */
|
|
private const MAX_ID_SIZE = 7;
|
|
/** @var string Prefix for marker. ' and " included to break attributes (T355538) */
|
|
private const MARKER_PREFIX = "\x1B\"'";
|
|
|
|
/**
|
|
* @param LinkRenderer $linkRenderer
|
|
* @param LinkBatchFactory $linkBatchFactory
|
|
* @param LinkCache $linkCache
|
|
* @param RepoGroup $repoGroup
|
|
* @param Language $userLang
|
|
* @param Language $contLang
|
|
* @param TitleParser $titleParser
|
|
* @param NamespaceInfo $namespaceInfo
|
|
* @param HookContainer $hookContainer
|
|
*/
|
|
public function __construct(
|
|
LinkRenderer $linkRenderer,
|
|
LinkBatchFactory $linkBatchFactory,
|
|
LinkCache $linkCache,
|
|
RepoGroup $repoGroup,
|
|
Language $userLang,
|
|
Language $contLang,
|
|
TitleParser $titleParser,
|
|
NamespaceInfo $namespaceInfo,
|
|
HookContainer $hookContainer
|
|
) {
|
|
$this->linkRenderer = $linkRenderer;
|
|
$this->linkBatchFactory = $linkBatchFactory;
|
|
$this->linkCache = $linkCache;
|
|
$this->repoGroup = $repoGroup;
|
|
$this->userLang = $userLang;
|
|
$this->contLang = $contLang;
|
|
$this->titleParser = $titleParser;
|
|
$this->namespaceInfo = $namespaceInfo;
|
|
$this->hookRunner = new HookRunner( $hookContainer );
|
|
}
|
|
|
|
/**
|
|
* Convert a comment to HTML, but replace links with markers which are
|
|
* resolved later.
|
|
*
|
|
* @param string $comment
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
public function preprocess( string $comment, ?LinkTarget $selfLinkTarget = null,
|
|
$samePage = false, $wikiId = false, $enableSectionLinks = true
|
|
) {
|
|
return $this->preprocessInternal( $comment, false, $selfLinkTarget,
|
|
$samePage, $wikiId, $enableSectionLinks );
|
|
}
|
|
|
|
/**
|
|
* Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
|
|
*
|
|
* @param string $comment
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
public function preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget = null,
|
|
$samePage = false, $wikiId = false, $enableSectionLinks = true
|
|
) {
|
|
return $this->preprocessInternal( $comment, true, $selfLinkTarget,
|
|
$samePage, $wikiId, $enableSectionLinks );
|
|
}
|
|
|
|
/**
|
|
* Execute pending batch queries and replace markers in the specified
|
|
* string(s) with actual links.
|
|
*
|
|
* @param string|string[] $comments
|
|
* @return string|string[]
|
|
*/
|
|
public function finalize( $comments ) {
|
|
$this->flushLinkBatches();
|
|
return preg_replace_callback(
|
|
'/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
|
|
function ( $m ) {
|
|
$callback = $this->links[(int)$m[1]] ?? null;
|
|
if ( $callback ) {
|
|
return $callback();
|
|
} else {
|
|
return '<!-- MISSING -->';
|
|
}
|
|
},
|
|
$comments
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param string $comment
|
|
* @param bool $unsafe
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
|
|
$enableSectionLinks
|
|
) {
|
|
// Sanitize text a bit
|
|
// \x1b needs to be stripped because it is used for link markers
|
|
$comment = strtr( $comment, "\n\x1b", " " );
|
|
// Allow HTML entities (for T15815)
|
|
if ( !$unsafe ) {
|
|
$comment = Sanitizer::escapeHtmlAllowEntities( $comment );
|
|
}
|
|
if ( $enableSectionLinks ) {
|
|
$comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
|
|
}
|
|
return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
|
|
}
|
|
|
|
/**
|
|
* Converts C-style comments in edit summaries into section links.
|
|
*
|
|
* Too many things are called "comments", so these are mostly now called
|
|
* section links rather than autocomments.
|
|
*
|
|
* We look for all comments, match any text before and after the comment,
|
|
* add a separator where needed and format the comment itself with CSS.
|
|
*
|
|
* @param string $comment Comment text
|
|
* @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
|
|
* @param bool $samePage Whether section links should refer to local page
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
* @return string Preprocessed comment
|
|
*/
|
|
private function doSectionLinks(
|
|
$comment,
|
|
$selfLinkTarget = null,
|
|
$samePage = false,
|
|
$wikiId = false
|
|
) {
|
|
$comment = preg_replace_callback(
|
|
// To detect the presence of content before or after the
|
|
// auto-comment, we use capturing groups inside optional zero-width
|
|
// assertions. But older versions of PCRE can't directly make
|
|
// zero-width assertions optional, so wrap them in a non-capturing
|
|
// group.
|
|
'!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
|
|
function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
|
|
// Ensure all match positions are defined
|
|
$match += [ '', '', '', '' ];
|
|
|
|
$pre = $match[1] !== '';
|
|
$auto = $match[2];
|
|
$post = $match[3] !== '';
|
|
$comment = null;
|
|
|
|
$this->hookRunner->onFormatAutocomments(
|
|
$comment, $pre, $auto, $post,
|
|
Title::castFromLinkTarget( $selfLinkTarget ),
|
|
$samePage,
|
|
$wikiId );
|
|
if ( $comment !== null ) {
|
|
return $comment;
|
|
}
|
|
|
|
if ( $selfLinkTarget ) {
|
|
$section = $auto;
|
|
# Remove links that a user may have manually put in the autosummary
|
|
# This could be improved by copying as much of Parser::stripSectionName as desired.
|
|
$section = str_replace( [
|
|
'[[:',
|
|
'[[',
|
|
']]'
|
|
], '', $section );
|
|
|
|
// We don't want any links in the auto text to be linked, but we still
|
|
// want to show any [[ ]]
|
|
$sectionText = str_replace( '[[', '[[', $auto );
|
|
|
|
$section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
|
|
if ( $section !== '' ) {
|
|
if ( $samePage ) {
|
|
$sectionTitle = new TitleValue( NS_MAIN, '', $section );
|
|
} else {
|
|
$sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
|
|
}
|
|
$auto = $this->makeSectionLink(
|
|
$sectionTitle,
|
|
$this->userLang->getArrow() .
|
|
Html::rawElement( 'bdi', [ 'dir' => $this->userLang->getDir() ], $sectionText ),
|
|
$wikiId,
|
|
$selfLinkTarget
|
|
);
|
|
}
|
|
}
|
|
if ( $pre ) {
|
|
# written summary $presep autocomment (summary /* section */)
|
|
$pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
|
|
}
|
|
if ( $post ) {
|
|
# autocomment $postsep written summary (/* section */ summary)
|
|
$auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
|
|
}
|
|
if ( $auto ) {
|
|
$auto = Html::rawElement( 'span', [ 'class' => 'autocomment' ], $auto );
|
|
}
|
|
return $pre . $auto;
|
|
},
|
|
$comment
|
|
);
|
|
return $comment;
|
|
}
|
|
|
|
/**
|
|
* Make a section link. These don't need to go into the LinkBatch, since
|
|
* the link class does not depend on whether the link is known.
|
|
*
|
|
* @param LinkTarget $target
|
|
* @param string $text
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
* @param LinkTarget $contextTitle
|
|
*
|
|
* @return string HTML link
|
|
*/
|
|
private function makeSectionLink(
|
|
LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle
|
|
) {
|
|
if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
|
|
return $this->linkRenderer->makeExternalLink(
|
|
WikiMap::getForeignURL(
|
|
$wikiId,
|
|
$target->getNamespace() === 0
|
|
? $target->getDBkey()
|
|
: $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
|
|
':' . $target->getDBkey(),
|
|
$target->getFragment()
|
|
),
|
|
new HtmlArmor( $text ), // Already escaped
|
|
$contextTitle
|
|
);
|
|
}
|
|
return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
|
|
}
|
|
|
|
/**
|
|
* Formats wiki links and media links in text; all other wiki formatting
|
|
* is ignored
|
|
*
|
|
* @todo FIXME: Doesn't handle sub-links as in image thumb texts like the main parser
|
|
*
|
|
* @param string $comment Text to format links in. WARNING! Since the output of this
|
|
* function is html, $comment must be sanitized for use as html. You probably want
|
|
* to pass $comment through Sanitizer::escapeHtmlAllowEntities() before calling
|
|
* this function.
|
|
* as used by WikiMap.
|
|
* @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
|
|
* @param bool $samePage Whether section links should refer to local page
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
*
|
|
* @return string HTML
|
|
*/
|
|
private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
|
|
return preg_replace_callback(
|
|
'/
|
|
\[\[
|
|
\s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
|
|
:? # ignore optional leading colon
|
|
([^[\]|]+) # 1. link target; page names cannot include [, ] or |
|
|
(?:\|
|
|
# 2. link text
|
|
# Stop matching at ]] without relying on backtracking.
|
|
((?:]?[^\]])*+)
|
|
)?
|
|
\]\]
|
|
([^[]*) # 3. link trail (the text up until the next link)
|
|
/x',
|
|
function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
|
|
$medians = '(?:';
|
|
$medians .= preg_quote(
|
|
$this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
|
|
$medians .= '|';
|
|
$medians .= preg_quote(
|
|
$this->contLang->getNsText( NS_MEDIA ),
|
|
'/'
|
|
) . '):';
|
|
|
|
$comment = $match[0];
|
|
|
|
// Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
|
|
if ( strpos( $match[1], '%' ) !== false ) {
|
|
$match[1] = strtr(
|
|
rawurldecode( $match[1] ),
|
|
[ '<' => '<', '>' => '>' ]
|
|
);
|
|
}
|
|
|
|
// Handle link renaming [[foo|text]] will show link as "text"
|
|
if ( $match[2] != "" ) {
|
|
$text = $match[2];
|
|
} else {
|
|
$text = $match[1];
|
|
}
|
|
$submatch = [];
|
|
$linkMarker = null;
|
|
if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
|
|
// Media link; trail not supported.
|
|
$linkRegexp = '/\[\[(.*?)\]\]/';
|
|
$linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
|
|
if ( $linkTarget ) {
|
|
$linkMarker = $this->addFileLink( $linkTarget, $text );
|
|
}
|
|
} else {
|
|
// Other kind of link
|
|
// Make sure its target is non-empty
|
|
if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
|
|
$match[1] = substr( $match[1], 1 );
|
|
}
|
|
// @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
|
|
if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
|
|
if ( preg_match(
|
|
$this->contLang->linkTrail(),
|
|
$match[3],
|
|
$submatch
|
|
) ) {
|
|
$trail = $submatch[1];
|
|
} else {
|
|
$trail = "";
|
|
}
|
|
$linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
|
|
[ $inside, $trail ] = Linker::splitTrail( $trail );
|
|
|
|
$linkText = $text;
|
|
$linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
|
|
|
|
try {
|
|
$target = $this->titleParser->parseTitle( $linkTarget );
|
|
|
|
if ( $target->getText() == '' && !$target->isExternal()
|
|
&& !$samePage && $selfLinkTarget
|
|
) {
|
|
$target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
|
|
}
|
|
|
|
// We should deprecate `null` as a valid value for
|
|
// $selfLinkTarget to ensure that we can use it as
|
|
// the title context for the external link.
|
|
// phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgTitle
|
|
global $wgTitle;
|
|
$linkMarker = $this->addPageLink(
|
|
$target,
|
|
$linkText . $inside,
|
|
$wikiId,
|
|
$selfLinkTarget ?? $wgTitle ?? SpecialPage::getTitleFor( 'Badtitle' )
|
|
);
|
|
$linkMarker .= $trail;
|
|
} catch ( MalformedTitleException $e ) {
|
|
// Fall through
|
|
}
|
|
}
|
|
}
|
|
if ( $linkMarker ) {
|
|
// If the link is still valid, go ahead and replace it in!
|
|
$comment = preg_replace(
|
|
// @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
|
|
// @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
|
|
$linkRegexp,
|
|
StringUtils::escapeRegexReplacement( $linkMarker ),
|
|
$comment,
|
|
1
|
|
);
|
|
}
|
|
|
|
return $comment;
|
|
},
|
|
$comment
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Add a deferred link to the list and return its marker.
|
|
*
|
|
* @param callable $callback
|
|
* @return string
|
|
*/
|
|
private function addLinkMarker( $callback ) {
|
|
$nextId = count( $this->links );
|
|
if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
|
|
throw new \RuntimeException( 'Too many links in comment batch' );
|
|
}
|
|
$this->links[] = $callback;
|
|
return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
|
|
}
|
|
|
|
/**
|
|
* Link to a LinkTarget. Return either HTML or a marker depending on whether
|
|
* existence checks are deferred.
|
|
*
|
|
* @param LinkTarget $target
|
|
* @param string $text
|
|
* @param string|false|null $wikiId
|
|
* @param LinkTarget $contextTitle
|
|
* @return string
|
|
*/
|
|
private function addPageLink( LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle ) {
|
|
if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
|
|
// Handle links from a foreign wiki ID
|
|
return $this->linkRenderer->makeExternalLink(
|
|
WikiMap::getForeignURL(
|
|
$wikiId,
|
|
$target->getNamespace() === 0
|
|
? $target->getDBkey()
|
|
: $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
|
|
':' . $target->getDBkey(),
|
|
$target->getFragment()
|
|
),
|
|
new HtmlArmor( $text ), // Already escaped
|
|
$contextTitle
|
|
);
|
|
} elseif ( $this->linkCache->getGoodLinkID( $target ) ||
|
|
Title::newFromLinkTarget( $target )->isAlwaysKnown()
|
|
) {
|
|
// Already known
|
|
return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
|
|
} elseif ( $this->linkCache->isBadLink( $target ) ) {
|
|
// Already cached as unknown
|
|
return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
|
|
}
|
|
|
|
// Defer page link
|
|
if ( !$this->linkBatch ) {
|
|
$this->linkBatch = $this->linkBatchFactory->newLinkBatch();
|
|
$this->linkBatch->setCaller( __METHOD__ );
|
|
}
|
|
$this->linkBatch->addObj( $target );
|
|
return $this->addLinkMarker( function () use ( $target, $text ) {
|
|
return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
|
|
} );
|
|
}
|
|
|
|
/**
|
|
* Link to a file, returning a marker.
|
|
*
|
|
* @param LinkTarget $target The name of the file.
|
|
* @param string $html The inner HTML of the link
|
|
* @return string
|
|
*/
|
|
private function addFileLink( LinkTarget $target, $html ) {
|
|
$this->fileBatch[] = [
|
|
'title' => $target
|
|
];
|
|
return $this->addLinkMarker( function () use ( $target, $html ) {
|
|
return Linker::makeMediaLinkFile(
|
|
$target,
|
|
$this->files[$target->getDBkey()] ?? false,
|
|
$html
|
|
);
|
|
} );
|
|
}
|
|
|
|
/**
|
|
* Execute any pending link batch or file batch
|
|
*/
|
|
private function flushLinkBatches() {
|
|
if ( $this->linkBatch ) {
|
|
$this->linkBatch->execute();
|
|
$this->linkBatch = null;
|
|
}
|
|
if ( $this->fileBatch ) {
|
|
$this->files += $this->repoGroup->findFiles( $this->fileBatch );
|
|
$this->fileBatch = [];
|
|
}
|
|
}
|
|
|
|
}
|