CommentParser: * Move comment formatting backend from Linker to a CommentParser service. Allow link existence and file existence to be batched. * Rename $local to $samePage since I think that is clearer. * Rename $title to $selfLinkTarget since it was unclear what the title was used for. * Rename the "autocomment" concept to "section link" in public interfaces, although the old term remains in CSS classes. * Keep unsafe HTML pass-through in separate "unsafe" methods, for easier static analysis and code review. CommentFormatter: * Add CommentFormatter and RowCommentFormatter services as a usable frontend for comment batches, and to replace the Linker static methods. * Provide fluent and parametric interfaces. Linker: * Remove Linker::makeCommentLink() without deprecation -- nothing calls it and it is obviously an internal helper. * Soft-deprecate Linker methods formatComment(), formatLinksInComment(), commentBlock() and revComment(). Caller migration: * CommentFormatter single: Linker, RollbackAction, ApiComparePages, ApiParse * CommentFormatter parametric batch: ImageHistoryPseudoPager * CommentFormatter fluent batch: ApiQueryFilearchive * RowCommentFormatter sequential: History feed, BlocklistPager, ProtectedPagesPager, ApiQueryProtectedTitles * RowCommentFormatter with index: ChangesFeed, ChangesList, ApiQueryDeletedrevs, ApiQueryLogEvents, ApiQueryRecentChanges * RevisionCommentBatch: HistoryPager, ContribsPager Bug: T285917 Change-Id: Ia3fd50a4a13138ba5003d884962da24746d562d0
528 lines
16 KiB
PHP
528 lines
16 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\CommentFormatter;
|
|
|
|
use File;
|
|
use HtmlArmor;
|
|
use Language;
|
|
use LinkBatch;
|
|
use LinkCache;
|
|
use Linker;
|
|
use MalformedTitleException;
|
|
use MediaWiki\Cache\LinkBatchFactory;
|
|
use MediaWiki\HookContainer\HookContainer;
|
|
use MediaWiki\HookContainer\HookRunner;
|
|
use MediaWiki\Linker\LinkRenderer;
|
|
use MediaWiki\Linker\LinkTarget;
|
|
use NamespaceInfo;
|
|
use Parser;
|
|
use RepoGroup;
|
|
use Title;
|
|
use TitleParser;
|
|
use TitleValue;
|
|
|
|
/**
|
|
* The text processing backend for CommentFormatter.
|
|
*
|
|
* CommentParser objects should be discarded after the comment batch is
|
|
* complete, in order to reduce memory usage.
|
|
*
|
|
* @internal
|
|
*/
|
|
class CommentParser {
|
|
/** @var LinkRenderer */
|
|
private $linkRenderer;
|
|
/** @var LinkBatchFactory */
|
|
private $linkBatchFactory;
|
|
/** @var RepoGroup */
|
|
private $repoGroup;
|
|
/** @var Language */
|
|
private $userLang;
|
|
/** @var Language */
|
|
private $contLang;
|
|
/** @var TitleParser */
|
|
private $titleParser;
|
|
/** @var NamespaceInfo */
|
|
private $namespaceInfo;
|
|
/** @var HookRunner */
|
|
private $hookRunner;
|
|
/** @var LinkCache */
|
|
private $linkCache;
|
|
|
|
/** @var callable[] */
|
|
private $links = [];
|
|
/** @var LinkBatch|null */
|
|
private $linkBatch;
|
|
|
|
/** @var array Input to RepoGroup::findFiles() */
|
|
private $fileBatch;
|
|
/** @var File[] Resolved File objects indexed by DB key */
|
|
private $files = [];
|
|
|
|
/** @var int The maximum number of digits in a marker ID */
|
|
private const MAX_ID_SIZE = 7;
|
|
|
|
/**
|
|
* @param LinkRenderer $linkRenderer
|
|
* @param LinkBatchFactory $linkBatchFactory
|
|
* @param LinkCache $linkCache
|
|
* @param RepoGroup $repoGroup
|
|
* @param Language $userLang
|
|
* @param Language $contLang
|
|
* @param TitleParser $titleParser
|
|
* @param NamespaceInfo $namespaceInfo
|
|
* @param HookContainer $hookContainer
|
|
*/
|
|
public function __construct(
|
|
LinkRenderer $linkRenderer,
|
|
LinkBatchFactory $linkBatchFactory,
|
|
LinkCache $linkCache,
|
|
RepoGroup $repoGroup,
|
|
Language $userLang,
|
|
Language $contLang,
|
|
TitleParser $titleParser,
|
|
NamespaceInfo $namespaceInfo,
|
|
HookContainer $hookContainer
|
|
) {
|
|
$this->linkRenderer = $linkRenderer;
|
|
$this->linkBatchFactory = $linkBatchFactory;
|
|
$this->linkCache = $linkCache;
|
|
$this->repoGroup = $repoGroup;
|
|
$this->userLang = $userLang;
|
|
$this->contLang = $contLang;
|
|
$this->titleParser = $titleParser;
|
|
$this->namespaceInfo = $namespaceInfo;
|
|
$this->hookRunner = new HookRunner( $hookContainer );
|
|
}
|
|
|
|
/**
|
|
* Convert a comment to HTML, but replace links with markers which are
|
|
* resolved later.
|
|
*
|
|
* @param string $comment
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
|
|
$samePage = false, $wikiId = false, $enableSectionLinks = true
|
|
) {
|
|
return $this->preprocessInternal( $comment, false, $selfLinkTarget,
|
|
$samePage, $wikiId, $enableSectionLinks );
|
|
}
|
|
|
|
/**
|
|
* Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
|
|
*
|
|
* @param string $comment
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
|
|
$samePage = false, $wikiId = false, $enableSectionLinks = true
|
|
) {
|
|
return $this->preprocessInternal( $comment, true, $selfLinkTarget,
|
|
$samePage, $wikiId, $enableSectionLinks );
|
|
}
|
|
|
|
/**
|
|
* Execute pending batch queries and replace markers in the specified
|
|
* string(s) with actual links.
|
|
*
|
|
* @param string|string[] $comments
|
|
* @return string|string[]
|
|
*/
|
|
public function finalize( $comments ) {
|
|
$this->flushLinkBatches();
|
|
return preg_replace_callback(
|
|
'/\x1b([0-9]{' . self::MAX_ID_SIZE . '})/',
|
|
function ( $m ) {
|
|
$callback = $this->links[(int)$m[1]] ?? null;
|
|
if ( $callback ) {
|
|
return $callback();
|
|
} else {
|
|
return '<!-- MISSING -->';
|
|
}
|
|
},
|
|
$comments
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param string $comment
|
|
* @param bool $unsafe
|
|
* @param LinkTarget|null $selfLinkTarget
|
|
* @param bool $samePage
|
|
* @param string|false|null $wikiId
|
|
* @param bool $enableSectionLinks
|
|
* @return string
|
|
*/
|
|
private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
|
|
$enableSectionLinks
|
|
) {
|
|
// Sanitize text a bit
|
|
// \x1b needs to be stripped because it is used for link markers
|
|
$comment = strtr( $comment, "\n\x1b", " " );
|
|
// Allow HTML entities (for T15815)
|
|
if ( !$unsafe ) {
|
|
$comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
|
|
}
|
|
if ( $enableSectionLinks ) {
|
|
$comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
|
|
}
|
|
return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
|
|
}
|
|
|
|
/**
|
|
* Converts C-style comments in edit summaries into section links.
|
|
*
|
|
* Too many things are called "comments", so these are mostly now called
|
|
* section links rather than autocomments.
|
|
*
|
|
* We look for all comments, match any text before and after the comment,
|
|
* add a separator where needed and format the comment itself with CSS.
|
|
*
|
|
* @param string $comment Comment text
|
|
* @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
|
|
* @param bool $samePage Whether section links should refer to local page
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
* @return string Preprocessed comment
|
|
*/
|
|
private function doSectionLinks(
|
|
$comment,
|
|
$selfLinkTarget = null,
|
|
$samePage = false,
|
|
$wikiId = false
|
|
) {
|
|
// @todo $append here is something of a hack to preserve the status
|
|
// quo. Someone who knows more about bidi and such should decide
|
|
// (1) what sane rendering even *is* for an LTR edit summary on an RTL
|
|
// wiki, both when autocomments exist and when they don't, and
|
|
// (2) what markup will make that actually happen.
|
|
$append = '';
|
|
$comment = preg_replace_callback(
|
|
// To detect the presence of content before or after the
|
|
// auto-comment, we use capturing groups inside optional zero-width
|
|
// assertions. But older versions of PCRE can't directly make
|
|
// zero-width assertions optional, so wrap them in a non-capturing
|
|
// group.
|
|
'!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
|
|
function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
|
|
// Ensure all match positions are defined
|
|
$match += [ '', '', '', '' ];
|
|
|
|
$pre = $match[1] !== '';
|
|
$auto = $match[2];
|
|
$post = $match[3] !== '';
|
|
$comment = null;
|
|
|
|
$this->hookRunner->onFormatAutocomments(
|
|
$comment, $pre, $auto, $post,
|
|
Title::castFromLinkTarget( $selfLinkTarget ),
|
|
$samePage,
|
|
$wikiId );
|
|
if ( $comment !== null ) {
|
|
return $comment;
|
|
}
|
|
|
|
if ( $selfLinkTarget ) {
|
|
$section = $auto;
|
|
# Remove links that a user may have manually put in the autosummary
|
|
# This could be improved by copying as much of Parser::stripSectionName as desired.
|
|
$section = str_replace( [
|
|
'[[:',
|
|
'[[',
|
|
']]'
|
|
], '', $section );
|
|
|
|
// We don't want any links in the auto text to be linked, but we still
|
|
// want to show any [[ ]]
|
|
$sectionText = str_replace( '[[', '[[', $auto );
|
|
|
|
$section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
|
|
if ( $section !== '' ) {
|
|
if ( $samePage ) {
|
|
$sectionTitle = new TitleValue( NS_MAIN, '', $section );
|
|
} else {
|
|
$sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
|
|
}
|
|
$auto = $this->makeSectionLink(
|
|
$sectionTitle,
|
|
$this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
|
|
$wikiId
|
|
);
|
|
}
|
|
}
|
|
if ( $pre ) {
|
|
# written summary $presep autocomment (summary /* section */)
|
|
$pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
|
|
}
|
|
if ( $post ) {
|
|
# autocomment $postsep written summary (/* section */ summary)
|
|
$auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
|
|
}
|
|
if ( $auto ) {
|
|
$auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
|
|
$append .= '</span>';
|
|
}
|
|
$comment = $pre . $auto;
|
|
return $comment;
|
|
},
|
|
$comment
|
|
);
|
|
return $comment . $append;
|
|
}
|
|
|
|
/**
|
|
* Make a section link. These don't need to go into the LinkBatch, since
|
|
* the link class does not depend on whether the link is known.
|
|
*
|
|
* @param LinkTarget $target
|
|
* @param string $text
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
*
|
|
* @return string HTML link
|
|
*/
|
|
private function makeSectionLink(
|
|
LinkTarget $target, $text, $wikiId
|
|
) {
|
|
if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
|
|
return Linker::makeExternalLink(
|
|
\WikiMap::getForeignURL(
|
|
$wikiId,
|
|
$target->getNamespace() === 0
|
|
? $target->getDBkey()
|
|
: $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
|
|
':' . $target->getDBkey(),
|
|
$target->getFragment()
|
|
),
|
|
$text,
|
|
/* escape = */ false // Already escaped
|
|
);
|
|
}
|
|
return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
|
|
}
|
|
|
|
/**
|
|
* Formats wiki links and media links in text; all other wiki formatting
|
|
* is ignored
|
|
*
|
|
* @todo FIXME: Doesn't handle sub-links as in image thumb texts like the main parser
|
|
*
|
|
* @param string $comment Text to format links in. WARNING! Since the output of this
|
|
* function is html, $comment must be sanitized for use as html. You probably want
|
|
* to pass $comment through Sanitizer::escapeHtmlAllowEntities() before calling
|
|
* this function.
|
|
* as used by WikiMap.
|
|
* @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
|
|
* @param bool $samePage Whether section links should refer to local page
|
|
* @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
|
|
* as used by WikiMap.
|
|
*
|
|
* @return string HTML
|
|
*/
|
|
private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
|
|
return preg_replace_callback(
|
|
'/
|
|
\[\[
|
|
\s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
|
|
:? # ignore optional leading colon
|
|
([^[\]|]+) # 1. link target; page names cannot include [, ] or |
|
|
(?:\|
|
|
# 2. link text
|
|
# Stop matching at ]] without relying on backtracking.
|
|
((?:]?[^\]])*+)
|
|
)?
|
|
\]\]
|
|
([^[]*) # 3. link trail (the text up until the next link)
|
|
/x',
|
|
function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
|
|
$medians = '(?:';
|
|
$medians .= preg_quote(
|
|
$this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
|
|
$medians .= '|';
|
|
$medians .= preg_quote(
|
|
$this->contLang->getNsText( NS_MEDIA ),
|
|
'/'
|
|
) . '):';
|
|
|
|
$comment = $match[0];
|
|
|
|
// Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
|
|
if ( strpos( $match[1], '%' ) !== false ) {
|
|
$match[1] = strtr(
|
|
rawurldecode( $match[1] ),
|
|
[ '<' => '<', '>' => '>' ]
|
|
);
|
|
}
|
|
|
|
// Handle link renaming [[foo|text]] will show link as "text"
|
|
if ( $match[2] != "" ) {
|
|
$text = $match[2];
|
|
} else {
|
|
$text = $match[1];
|
|
}
|
|
$submatch = [];
|
|
$linkMarker = null;
|
|
if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
|
|
// Media link; trail not supported.
|
|
$linkRegexp = '/\[\[(.*?)\]\]/';
|
|
$linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
|
|
if ( $linkTarget ) {
|
|
$linkMarker = $this->addFileLink( $linkTarget, $text );
|
|
}
|
|
} else {
|
|
// Other kind of link
|
|
// Make sure its target is non-empty
|
|
if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
|
|
$match[1] = substr( $match[1], 1 );
|
|
}
|
|
if ( $match[1] !== false && $match[1] !== '' ) {
|
|
if ( preg_match(
|
|
$this->contLang->linkTrail(),
|
|
$match[3],
|
|
$submatch
|
|
) ) {
|
|
$trail = $submatch[1];
|
|
} else {
|
|
$trail = "";
|
|
}
|
|
$linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
|
|
list( $inside, $trail ) = Linker::splitTrail( $trail );
|
|
|
|
$linkText = $text;
|
|
$linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
|
|
|
|
try {
|
|
$target = $this->titleParser->parseTitle( $linkTarget );
|
|
|
|
if ( $target->getText() == '' && !$target->isExternal()
|
|
&& !$samePage && $selfLinkTarget
|
|
) {
|
|
$target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
|
|
}
|
|
|
|
$linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
|
|
$linkMarker .= $trail;
|
|
} catch ( MalformedTitleException $e ) {
|
|
// Fall through
|
|
}
|
|
}
|
|
}
|
|
if ( $linkMarker ) {
|
|
// If the link is still valid, go ahead and replace it in!
|
|
$comment = preg_replace(
|
|
$linkRegexp,
|
|
$linkMarker,
|
|
$comment,
|
|
1
|
|
);
|
|
}
|
|
|
|
return $comment;
|
|
},
|
|
$comment
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Add a deferred link to the list and return its marker.
|
|
*
|
|
* @param callable $callback
|
|
* @return string
|
|
*/
|
|
private function addLinkMarker( $callback ) {
|
|
$nextId = count( $this->links );
|
|
if ( strlen( $nextId ) > self::MAX_ID_SIZE ) {
|
|
throw new \RuntimeException( 'Too many links in comment batch' );
|
|
}
|
|
$this->links[] = $callback;
|
|
return sprintf( "\x1b%0" . self::MAX_ID_SIZE . 'd', $nextId );
|
|
}
|
|
|
|
/**
|
|
* Link to a LinkTarget. Return either HTML or a marker depending on whether
|
|
* existence checks are deferred.
|
|
*
|
|
* @param LinkTarget $target
|
|
* @param string $text
|
|
* @param string|false|null $wikiId
|
|
* @return string
|
|
*/
|
|
private function addPageLink( LinkTarget $target, $text, $wikiId ) {
|
|
// Handle external links (not including interwiki links)
|
|
if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
|
|
return Linker::makeExternalLink(
|
|
\WikiMap::getForeignURL(
|
|
$wikiId,
|
|
$target->getNamespace() === 0
|
|
? $target->getDBkey()
|
|
: $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
|
|
':' . $target->getDBkey(),
|
|
$target->getFragment()
|
|
),
|
|
$text,
|
|
/* escape = */ false // Already escaped
|
|
);
|
|
}
|
|
|
|
if ( $this->linkCache->getGoodLinkID( $target ) ) {
|
|
// Already known
|
|
return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
|
|
} elseif ( $this->linkCache->isBadLink( $target ) ) {
|
|
// Already cached as unknown
|
|
return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
|
|
}
|
|
|
|
// Defer page link
|
|
if ( !$this->linkBatch ) {
|
|
$this->linkBatch = $this->linkBatchFactory->newLinkBatch();
|
|
}
|
|
$this->linkBatch->addObj( $target );
|
|
return $this->addLinkMarker( function () use ( $target, $text ) {
|
|
return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
|
|
} );
|
|
}
|
|
|
|
/**
|
|
* Link to a file, returning a marker.
|
|
*
|
|
* @param LinkTarget $target The name of the file.
|
|
* @param string $html The inner HTML of the link
|
|
* @return string
|
|
*/
|
|
private function addFileLink( LinkTarget $target, $html ) {
|
|
$this->fileBatch[] = [
|
|
'title' => $target
|
|
];
|
|
return $this->addLinkMarker( function () use ( $target, $html ) {
|
|
return Linker::makeMediaLinkFile(
|
|
$target,
|
|
$this->files[$target->getDBkey()] ?? false,
|
|
$html
|
|
);
|
|
} );
|
|
}
|
|
|
|
/**
|
|
* Execute any pending link batch or file batch
|
|
*/
|
|
private function flushLinkBatches() {
|
|
if ( $this->linkBatch ) {
|
|
$this->linkBatch->execute();
|
|
$this->linkBatch = null;
|
|
}
|
|
if ( $this->fileBatch ) {
|
|
$this->files += $this->repoGroup->findFiles( $this->fileBatch );
|
|
$this->fileBatch = [];
|
|
}
|
|
}
|
|
|
|
}
|