[parsoid] Fix Parsoid relative links
Bug: T350952 Change-Id: I60165a9946a35cfb42a78ed2f833c34570fefffc
This commit is contained in:
parent
7328f958cd
commit
0e1b889a0f
4 changed files with 27 additions and 8 deletions
|
|
@ -7,6 +7,7 @@ use Linker;
|
|||
use MediaWiki\HookContainer\HookContainer;
|
||||
use MediaWiki\HookContainer\HookRunner;
|
||||
use MediaWiki\Html\Html;
|
||||
use MediaWiki\Html\HtmlHelper;
|
||||
use MediaWiki\Languages\LanguageFactory;
|
||||
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
|
||||
use MediaWiki\Tidy\TidyDriverBase;
|
||||
|
|
@ -18,6 +19,7 @@ use RequestContext;
|
|||
use Sanitizer;
|
||||
use Skin;
|
||||
use Title;
|
||||
use Wikimedia\RemexHtml\Serializer\SerializerNode;
|
||||
|
||||
/**
|
||||
* This class contains the default output transformation pipeline for wikitext. It is a postprocessor for
|
||||
|
|
@ -177,11 +179,34 @@ class DefaultOutputTransform {
|
|||
*/
|
||||
private function extractBody( string $text ): string {
|
||||
// This is a full HTML document, generated by Parsoid.
|
||||
|
||||
// T350952: temporary fix for subpage paths: use Parsoid's
|
||||
// <base href> to expand relative links
|
||||
$baseHref = '';
|
||||
if ( preg_match( '{<base href=["\']([^"\']+)["\'][^>]+>}', $text, $matches ) === 1 ) {
|
||||
$baseHref = $matches[1];
|
||||
}
|
||||
// Strip everything but the <body>
|
||||
// Probably would be better to process this as a DOM.
|
||||
$text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
|
||||
$text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
|
||||
|
||||
// T350952: Expand relative links
|
||||
// What we should be doing here is parsing as a title and then
|
||||
// using Title::getLocalURL()
|
||||
$text = HtmlHelper::modifyElements(
|
||||
$text,
|
||||
static function ( SerializerNode $node ): bool {
|
||||
return $node->name === 'a' &&
|
||||
str_starts_with( $node->attrs['href'] ?? '', './' );
|
||||
},
|
||||
static function ( SerializerNode $node ) use ( $baseHref ): SerializerNode {
|
||||
$href = $baseHref . $node->attrs['href'];
|
||||
$node->attrs['href'] =
|
||||
wfExpandUrl( $href, PROTO_RELATIVE );
|
||||
return $node;
|
||||
}
|
||||
);
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -181,12 +181,6 @@ class ParsoidParser /* eventually this will extend \Parser */ {
|
|||
|
||||
$this->makeLimitReport( $options, $parserOutput );
|
||||
|
||||
// T350952: temporary fix for subpage paths: copy over Parsoid's
|
||||
// <base href> into ParserOutput's head items.
|
||||
if ( preg_match( '{<base href=[^>]+>}', $parserOutput->getRawText(), $matches ) === 1 ) {
|
||||
$parserOutput->addHeadItem( $matches[0] );
|
||||
}
|
||||
|
||||
// Record Parsoid version in extension data; this allows
|
||||
// us to use the onRejectParserCacheValue hook to selectively
|
||||
// expire "bad" generated content in the event of a rollback.
|
||||
|
|
|
|||
|
|
@ -593,7 +593,7 @@ class ApiParseTest extends ApiTestCase {
|
|||
yield [ false, false, $expected ];
|
||||
yield [ false, true, $expected ];
|
||||
// Parsoid parses, with and without pre-existing content.
|
||||
$expected = '!^<section[^>]*><p[^>]*><a rel="mw:WikiLink" href="./Foo" title="Foo"[^>]*>Foo</a></p></section>!';
|
||||
$expected = '!^<section[^>]*><p[^>]*><a rel="mw:WikiLink" href="[^"]*Foo" title="Foo"[^>]*>Foo</a></p></section>!';
|
||||
yield [ true, false, $expected ];
|
||||
yield [ true, true, $expected ];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class WikitextContentHandlerIntegrationTest extends TextContentHandlerIntegratio
|
|||
'title' => 'WikitextContentTest_testGetParserOutput',
|
||||
'model' => CONTENT_MODEL_WIKITEXT,
|
||||
'text' => "#REDIRECT [[Main Page]]",
|
||||
'expectedHtml' => '<div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr">' . "<div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/index.php?title=Main_Page&action=edit&redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\"/></section></div>",
|
||||
'expectedHtml' => '<div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr">' . "<div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/index.php?title=Main_Page&action=edit&redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\"></section></div>",
|
||||
'expectedFields' => [
|
||||
'Links' => [
|
||||
[ 'Main_Page' => 0 ],
|
||||
|
|
|
|||
Loading…
Reference in a new issue