Expand local URLs to absolute URLs in ParserOutput

New option 'absoluteURLs' was added to getText method
of the ParserOutput object that replaces all links
in the page HTML with absolute URLs.

Removing the action=render special case from Title
seems safe cause we will end up replacing the result
with absolute URL if we're in a render action no matter
where Title::getLocalUrl was called from.

This change is safely revertable from the perspective
of ParserCache.

Bug: T263581
Change-Id: Id660e1026192f40181587199d3418568f0fdb6d3
This commit is contained in:
Petr Pchelko 2021-09-21 07:34:30 -07:00
parent 629c5e28ac
commit d334de960a
5 changed files with 64 additions and 22 deletions

View file

@ -25,7 +25,6 @@
use MediaWiki\DAO\WikiAwareEntityTrait;
use MediaWiki\Interwiki\InterwikiLookup;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\ExistingPageRecord;
use MediaWiki\Page\PageIdentity;
@ -2267,7 +2266,7 @@ class Title implements LinkTarget, PageIdentity, IDBAccessObject {
* @return string
*/
public function getLocalURL( $query = '', $query2 = false ) {
global $wgArticlePath, $wgScript, $wgServer, $wgRequest, $wgMainPageIsDomainRoot;
global $wgArticlePath, $wgScript, $wgMainPageIsDomainRoot;
$query = self::fixUrlQueryArgs( $query, $query2 );
@ -2337,20 +2336,6 @@ class Title implements LinkTarget, PageIdentity, IDBAccessObject {
}
}
Hooks::runner()->onGetLocalURL__Internal( $this, $url, $query );
// @todo FIXME: This causes breakage in various places when we
// actually expected a local URL and end up with dupe prefixes.
if ( $wgRequest->getRawVal( 'action' ) == 'render' ) {
LoggerFactory::getInstance( 'T263581' )
->debug(
"Title::getLocalURL called from render action",
[
'title' => $this->getPrefixedDBkey(),
'exception' => new Exception()
]
);
$url = $wgServer . $url;
}
}
Hooks::runner()->onGetLocalURL( $this, $url, $query );

View file

@ -502,6 +502,9 @@ class Article implements Page {
$this->showRedirectedFromHeader();
$this->showNamespaceHeader();
if ( $this->viewIsRenderAction ) {
$poOptions += [ 'absoluteURLs' => true ];
}
$continue =
$this->generateContentOutput( $user, $parserOptions, $oldid, $outputPage, $poOptions );

View file

@ -302,17 +302,15 @@ class ParserCache {
ParserOptions $options,
array $usedOptions = null
): string {
global $wgRequest;
$usedOptions = $usedOptions ?? ParserOptions::allCacheVaryingOptions();
// idhash seem to mean 'page id' + 'rendering hash' (r3710)
$pageid = $page->getId( PageRecord::LOCAL );
// TODO: remove the split T263581
$renderkey = (int)( $wgRequest->getRawVal( 'action' ) == 'render' );
$title = $this->titleFactory->castFromPageIdentity( $page );
$hash = $options->optionsHash( $usedOptions, $title );
return $this->cache->makeKey( $this->name, 'idhash', "{$pageid}-{$renderkey}!{$hash}" );
// Before T263581 ParserCache was split between normal page views
// and action=parse. -0 is left in the key to avoid invalidating the entire
// cache when removing the cache split.
return $this->cache->makeKey( $this->name, 'idhash', "{$pageid}-0!{$hash}" );
}
/**

View file

@ -1,5 +1,6 @@
<?php
use HtmlFormatter\HtmlFormatter;
use MediaWiki\Json\JsonUnserializable;
use MediaWiki\Json\JsonUnserializableTrait;
use MediaWiki\Json\JsonUnserializer;
@ -344,6 +345,7 @@ class ParserOutput extends CacheTime {
* rel="mw-deduplicated-inline-style" href="mw-data:..."/>` tags, where
* the scheme-specific-part of the href is the (percent-encoded) value
* of the `data-mw-deduplicate` attribute.
* - absoluteURLs: (bool) use absolute URLs in all links. Default: false
* @return string HTML
* @return-taint escaped
*/
@ -441,6 +443,22 @@ class ParserOutput extends CacheTime {
);
}
// Expand all relative URLs
if ( ( $options['absoluteURLs'] ?? false ) && $text ) {
$formatter = new HtmlFormatter( $text );
$doc = $formatter->getDoc();
$xpath = new DOMXPath( $doc );
$nodes = $xpath->query( '//a[@href]' );
/** @var DOMElement $node */
foreach ( $nodes as $node ) {
$node->setAttribute(
'href',
wfExpandUrl( $node->getAttribute( 'href' ), PROTO_RELATIVE )
);
}
$text = $formatter->getText( 'html' );
}
// Hydrate slot section header placeholders generated by RevisionRenderer.
$text = preg_replace_callback(
'#<mw:slotheader>(.*?)</mw:slotheader>#',

View file

@ -383,6 +383,44 @@ EOF
$po->getText();
}
public function provideGetText_absoluteURLs() {
yield 'empty' => [
'text' => '',
'expectedText' => '',
];
yield 'no-links' => [
'text' => '<p>test</p>',
'expectedText' => '<p>test</p>',
];
yield 'simple link' => [
'text' => '<a href="/wiki/Test">test</a>',
'expectedText' => '<a href="//TEST_SERVER/wiki/Test">test</a>',
];
yield 'already absolute, relative' => [
'text' => '<a href="//TEST_SERVER/wiki/Test">test</a>',
'expectedText' => '<a href="//TEST_SERVER/wiki/Test">test</a>',
];
yield 'already absolute, https' => [
'text' => '<a href="https://TEST_SERVER/wiki/Test">test</a>',
'expectedText' => '<a href="https://TEST_SERVER/wiki/Test">test</a>',
];
yield 'external' => [
'text' => '<a href="https://en.wikipedia.org/wiki/Test">test</a>',
'expectedText' => '<a href="https://en.wikipedia.org/wiki/Test">test</a>',
];
}
/**
* @dataProvider provideGetText_absoluteURLs
*/
public function testGetText_absoluteURLs( string $text, string $expectedText ) {
$this->setMwGlobals( [
'wgServer' => '//TEST_SERVER'
] );
$parserOutput = new ParserOutput( $text );
$this->assertSame( $expectedText, $parserOutput->getText( [ 'absoluteURLs' => true ] ) );
}
/**
* @covers ParserOutput::getRawText
*/