Allow setting a ParserOption to generate Parsoid HTML

This is an initial quick-and-dirty implementation.  The
ParsoidParser class will eventually inherit from \Parser,
but this is an initial placeholder to unblock other Parsoid
read views work.

Currently Parsoid does not fully implement all the ParserOutput
metadata set by the legacy parser, but we're working on it.

This patch also addresses T300325 by ensuring the the Page HTML
APIs use ParserOutput::getRawText(), which will return the entire
Parsoid HTML document without post-processing.  This is what
the Parsoid team refers to as "edit mode" HTML. The
ParserOutput::getText() method returns only the <body> contents
of the HTML, and applies several transformations, including
inserting Table of Contents and style deduplication; this is
the "read views" flavor of the Parsoid HTML.

We need to be careful of the interaction of the `useParsoid` flag with
the ParserCacheMetadata.  Effectively `useParsoid` should *always* be
marked as "used" or else the ParserCache will assume its value doesn't
matter and will serve legacy content for parsoid requests and
vice-versa.  T330677 is a follow up to address this more thoroughly by
splitting the parser cache in ParserOutputAccess; the stop gap in this
patch is fragile and, because it doesn't fork the ParserCacheMetadata
cache, may corrupt the ParserCacheMetadata in the case when Parsoid
and the legacy parser consult different sets of options to render a
page.

Bug: T300191
Bug: T330677
Bug: T300325
Change-Id: Ica09a4284c00d7917f8b6249e946232b2fb38011
This commit is contained in:
C. Scott Ananian 2022-05-27 12:38:32 -04:00
parent b5684282b3
commit cfd9c516e1
26 changed files with 546 additions and 32 deletions

View file

@ -1609,6 +1609,8 @@ $wgAutoloadLocalClasses = [
'MediaWiki\\Parser\\Parsoid\\PageBundleJsonTrait' => __DIR__ . '/includes/parser/Parsoid/PageBundleJsonTrait.php',
'MediaWiki\\Parser\\Parsoid\\PageBundleParserOutputConverter' => __DIR__ . '/includes/parser/Parsoid/PageBundleParserOutputConverter.php',
'MediaWiki\\Parser\\Parsoid\\ParsoidOutputAccess' => __DIR__ . '/includes/parser/Parsoid/ParsoidOutputAccess.php',
'MediaWiki\\Parser\\Parsoid\\ParsoidParser' => __DIR__ . '/includes/parser/Parsoid/ParsoidParser.php',
'MediaWiki\\Parser\\Parsoid\\ParsoidParserFactory' => __DIR__ . '/includes/parser/Parsoid/ParsoidParserFactory.php',
'MediaWiki\\Parser\\Parsoid\\ParsoidRenderID' => __DIR__ . '/includes/parser/Parsoid/ParsoidRenderID.php',
'MediaWiki\\Parser\\Parsoid\\ParsoidServices' => __DIR__ . '/includes/parser/Parsoid/ParsoidServices.php',
'MediaWiki\\Parser\\RemexRemoveTagHandler' => __DIR__ . '/includes/parser/RemexRemoveTagHandler.php',

View file

@ -1984,7 +1984,7 @@ config-schema:
- 1.40: Added
ContentHandlers:
default:
wikitext: { class: WikitextContentHandler, services: [TitleFactory, ParserFactory, GlobalIdGenerator, LanguageNameUtils, MagicWordFactory] }
wikitext: { class: WikitextContentHandler, services: [TitleFactory, ParserFactory, GlobalIdGenerator, LanguageNameUtils, MagicWordFactory, ParsoidParserFactory] }
javascript: JavaScriptContentHandler
json: JsonContentHandler
css: CssContentHandler

View file

@ -3269,6 +3269,7 @@ class MainConfigSchema {
'GlobalIdGenerator',
'LanguageNameUtils',
'MagicWordFactory',
'ParsoidParserFactory',
],
],
// dumb version, no syntax highlighting

View file

@ -108,6 +108,7 @@ use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Permissions\GrantsInfo;
use MediaWiki\Permissions\GrantsLocalization;
use MediaWiki\Permissions\GroupPermissionsLookup;
@ -1572,6 +1573,15 @@ class MediaWikiServices extends ServiceContainer {
return $this->getService( 'ParsoidPageConfigFactory' );
}
/**
* @return ParsoidParserFactory
* @since 1.41
* @internal
*/
public function getParsoidParserFactory(): ParsoidParserFactory {
return $this->getService( 'ParsoidParserFactory' );
}
/**
* @return SiteConfig
* @since 1.39

View file

@ -114,9 +114,7 @@ class PageHTMLHandler extends SimpleHandler {
}
$parserOutput = $this->htmlHelper->getHtml();
// Do not de-duplicate styles, Parsoid already does it in a slightly different way (T300325)
$parserOutputHtml = $parserOutput->getText( [ 'deduplicateStyles' => false ] );
$parserOutputHtml = $parserOutput->getRawText();
$outputMode = $this->getOutputMode();
switch ( $outputMode ) {

View file

@ -85,12 +85,12 @@ class RevisionHTMLHandler extends SimpleHandler {
$response->setHeader( 'Content-Type', 'text/html' );
$this->htmlHelper->putHeaders( $response, $setContentLanguageHeader );
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
$response->setBody( new StringStream( $parserOutput->getText() ) );
$response->setBody( new StringStream( $parserOutput->getRawText() ) );
break;
case 'with_html':
$parserOutput = $this->htmlHelper->getHtml();
$body = $this->contentHelper->constructMetadata();
$body['html'] = $parserOutput->getText();
$body['html'] = $parserOutput->getRawText();
$response = $this->getResponseFactory()->createJson( $body );
// For JSON content, it doesn't make sense to set content language header
$this->htmlHelper->putHeaders( $response, !$setContentLanguageHeader );

View file

@ -128,6 +128,7 @@ use MediaWiki\Parser\Parsoid\Config\PageConfigFactory as MWPageConfigFactory;
use MediaWiki\Parser\Parsoid\Config\SiteConfig as MWSiteConfig;
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Permissions\GrantsInfo;
use MediaWiki\Permissions\GrantsLocalization;
use MediaWiki\Permissions\GroupPermissionsLookup;
@ -1426,6 +1427,16 @@ return [
);
},
'ParsoidParserFactory' => static function ( MediaWikiServices $services ): ParsoidParserFactory {
return new ParsoidParserFactory(
$services->getParsoidSiteConfig(),
$services->getParsoidDataAccess(),
$services->getParsoidPageConfigFactory(),
$services->getLanguageConverterFactory(),
$services->getParserFactory()
);
},
'ParsoidSiteConfig' => static function ( MediaWikiServices $services ): SiteConfig {
$mainConfig = $services->getMainConfig();
$parsoidSettings = $mainConfig->get( MainConfigNames::ParsoidSettings );

View file

@ -395,6 +395,7 @@ return [
2 => 'GlobalIdGenerator',
3 => 'LanguageNameUtils',
4 => 'MagicWordFactory',
5 => 'ParsoidParserFactory',
],
],
'javascript' => 'JavaScriptContentHandler',

View file

@ -29,6 +29,7 @@ use MediaWiki\Content\Transform\PreSaveTransformParams;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Title\Title;
use MediaWiki\Title\TitleFactory;
@ -56,6 +57,9 @@ class WikitextContentHandler extends TextContentHandler {
/** @var MagicWordFactory */
private $magicWordFactory;
/** @var ParsoidParserFactory */
private $parsoidParserFactory;
/**
* @param string $modelId
* @param TitleFactory $titleFactory
@ -63,6 +67,7 @@ class WikitextContentHandler extends TextContentHandler {
* @param GlobalIdGenerator $globalIdGenerator
* @param LanguageNameUtils $languageNameUtils
* @param MagicWordFactory $magicWordFactory
* @param ParsoidParserFactory $parsoidParserFactory
*/
public function __construct(
string $modelId,
@ -70,7 +75,8 @@ class WikitextContentHandler extends TextContentHandler {
ParserFactory $parserFactory,
GlobalIdGenerator $globalIdGenerator,
LanguageNameUtils $languageNameUtils,
MagicWordFactory $magicWordFactory
MagicWordFactory $magicWordFactory,
ParsoidParserFactory $parsoidParserFactory
) {
// $modelId should always be CONTENT_MODEL_WIKITEXT
parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] );
@ -79,6 +85,7 @@ class WikitextContentHandler extends TextContentHandler {
$this->globalIdGenerator = $globalIdGenerator;
$this->languageNameUtils = $languageNameUtils;
$this->magicWordFactory = $magicWordFactory;
$this->parsoidParserFactory = $parsoidParserFactory;
}
protected function getContentClass() {
@ -173,7 +180,8 @@ class WikitextContentHandler extends TextContentHandler {
$this->parserFactory,
$this->globalIdGenerator,
$this->languageNameUtils,
$this->magicWordFactory
$this->magicWordFactory,
$this->parsoidParserFactory
);
}
@ -340,10 +348,21 @@ class WikitextContentHandler extends TextContentHandler {
$revId = $cpoParams->getRevId();
[ $redir, $text ] = $content->getRedirectTargetAndText();
if ( $parserOptions->getUseParsoid() && !$redir ) {
$parser = $this->parsoidParserFactory->create();
} else {
$parser = $this->parserFactory->getInstance();
}
$parserOutput = $parser
// @phan-suppress-next-line PhanTypeMismatchArgumentNullable castFrom does not return null here
->parse( $text, $title, $parserOptions, true, true, $revId );
$parser = $this->parserFactory->getInstance();
// @phan-suppress-next-line PhanTypeMismatchArgumentNullable castFrom does not return null here
$parserOutput = $parser->parse( $text, $title, $parserOptions, true, true, $revId );
// T330667: Record the fact that we used the value of
// 'useParsoid' to influence this parse. Note that
// ::getUseParsoid() has a side-effect on $parserOutput here
// which didn't occur when we called ::getUseParsoid() earlier
// because $parserOutput didn't exist at that time.
$parserOptions->getUseParsoid();
// Add redirect indicator at the top
if ( $redir ) {

View file

@ -85,6 +85,7 @@ class ParserOptions {
'thumbsize' => true,
'printable' => true,
'userlang' => true,
'useParsoid' => true,
];
/**
@ -666,6 +667,27 @@ class ParserOptions {
return $this->setOptionLegacy( 'preSaveTransform', $x );
}
/**
* Parsoid-format HTML output, or legacy wikitext parser HTML?
* @see T300191
* @unstable
* @since 1.41
* @return bool
*/
public function getUseParsoid(): bool {
return $this->getOption( 'useParsoid' );
}
/**
* Request Parsoid-format HTML output.
* @see T300191
* @unstable
* @since 1.41
*/
public function setUseParsoid() {
$this->setOption( 'useParsoid', true );
}
/**
* Date format index
* @return string
@ -1180,6 +1202,7 @@ class ParserOptions {
'speculativeRevId' => null,
'speculativePageIdCallback' => null,
'speculativePageId' => null,
'useParsoid' => false,
];
self::$cacheVaryingOptionsHash = self::$initialCacheVaryingOptionsHash;

View file

@ -12,6 +12,7 @@ use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageReference;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Title\Title;
use Wikimedia\Bcp47Code\Bcp47Code;
use Wikimedia\Bcp47Code\Bcp47CodeValue;
@ -418,8 +419,19 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector {
'deduplicateStyles' => true,
'wrapperDivClass' => $this->getWrapperDivClass(),
'includeDebugInfo' => false,
'bodyOnly' => true,
];
$text = $this->getRawText();
if (
$options['bodyOnly'] &&
PageBundleParserOutputConverter::hasPageBundle( $this )
) {
// This is a full HTML document, generated by Parsoid.
// Strip everything but the <body>
// Probably would be better to process this as a DOM.
$text = preg_replace( '/^.*?<body[^>]*>/s', '', $text, 1 );
$text = preg_replace( '(</body>\s*</html>\s*$)', '', $text, 1 );
}
if ( $options['includeDebugInfo'] ) {
$text .= $this->renderDebugInfo();

View file

@ -83,4 +83,7 @@ final class PageBundleParserOutputConverter {
);
}
public static function hasPageBundle( ParserOutput $parserOutput ) {
return $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY ) !== null;
}
}

View file

@ -0,0 +1,178 @@
<?php
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Page\PageReference;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Title\Title;
use ParserFactory;
use ParserOptions;
use ParserOutput;
use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Parsoid;
use WikitextContent;
/**
* Parser implementation which uses Parsoid.
*
* Currently incomplete; see T236809 for the long-term plan.
*
* @since 1.41
* @unstable since 1.41; see T236809 for plan.
*/
class ParsoidParser /* eventually this will extend \Parser */ {
/** @var Parsoid */
private $parsoid;
/** @var PageConfigFactory */
private $pageConfigFactory;
/** @var LanguageConverterFactory */
private $languageConverterFactory;
/** @var ParserFactory */
private $legacyParserFactory;
/**
* @param SiteConfig $siteConfig
* @param DataAccess $dataAccess
* @param PageConfigFactory $pageConfigFactory
* @param LanguageConverterFactory $languageConverterFactory
* @param ParserFactory $legacyParserFactory
*/
public function __construct(
SiteConfig $siteConfig,
DataAccess $dataAccess,
PageConfigFactory $pageConfigFactory,
LanguageConverterFactory $languageConverterFactory,
ParserFactory $legacyParserFactory
) {
$this->parsoid = new Parsoid( $siteConfig, $dataAccess );
$this->pageConfigFactory = $pageConfigFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->legacyParserFactory = $legacyParserFactory;
}
/**
* Convert wikitext to HTML
* Do not call this function recursively.
*
* @param string $text Text we want to parse
* @param-taint $text escapes_htmlnoent
* @param PageReference $page
* @param ParserOptions $options
* @param bool $linestart
* @param bool $clearState
* @param int|null $revId ID of the revision being rendered. This is used to render
* REVISION* magic words. 0 means that any current revision will be used. Null means
* that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
* use the current timestamp.
* @return ParserOutput
* @return-taint escaped
* @unstable since 1.41
*/
public function parse(
string $text, PageReference $page, ParserOptions $options,
bool $linestart = true, bool $clearState = true, ?int $revId = null
): ParserOutput {
Assert::invariant( $linestart, '$linestart=false is not yet supported' );
Assert::invariant( $clearState, '$clearState=false is not yet supported' );
$title = Title::castFromPageReference( $page ) ??
// ::castFromPageReference() never actually returns null here
Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
$lang = $options->getTargetLanguage();
if ( $lang === null ) {
if ( $options->getInterfaceMessage() ) {
$lang = $options->getUserLangObj();
} else {
$lang = $title->getPageLanguage();
}
}
$langConv = $this->languageConverterFactory->getLanguageConverter(
$lang
);
$pageConfig = $revId === null ? null : $this->pageConfigFactory->create(
$title,
$options->getUserIdentity(),
$revId,
null, // unused
$lang
);
if ( !( $pageConfig && $pageConfig->getPageMainContent() === $text ) ) {
// This is a bit awkward! But we really need to parse $text, which
// may or may not correspond to the $revId provided!
// T332928 suggests one solution: splitting the "have revid"
// callers from the "bare text, no associated revision" callers.
$revisionRecord = new MutableRevisionRecord( $title );
if ( $revId !== null ) {
$revisionRecord->setId( $revId );
}
$revisionRecord->setSlot(
SlotRecord::newUnsaved(
SlotRecord::MAIN,
new WikitextContent( $text )
)
);
$pageConfig = $this->pageConfigFactory->create(
$title,
$options->getUserIdentity(),
$revisionRecord,
null, // unused
$lang
);
}
$parserOutput = new ParserOutput();
// T331148: This should be checked to be consistent with the
// REST interfaces for Parsoid output
$pageBundle = $this->parsoid->wikitext2html( $pageConfig, [
'pageBundle' => true,
'wrapSections' => true,
'htmlVariantLanguage' => $langConv->getPreferredVariant(),
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
], $headers, $parserOutput );
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
$this->makeLimitReport( $options, $parserOutput );
return $parserOutput;
}
/**
* Set the limit report data in the current ParserOutput.
* This is ported from Parser::makeLimitReport() and should eventually
* use the method from the superclass directly.
*/
protected function makeLimitReport(
ParserOptions $parserOptions, ParserOutput $parserOutput
) {
$maxIncludeSize = $parserOptions->getMaxIncludeSize();
$cpuTime = $parserOutput->getTimeSinceStart( 'cpu' );
if ( $cpuTime !== null ) {
$parserOutput->setLimitReportData( 'limitreport-cputime',
sprintf( "%.3f", $cpuTime )
);
}
$wallTime = $parserOutput->getTimeSinceStart( 'wall' );
$parserOutput->setLimitReportData( 'limitreport-walltime',
sprintf( "%.3f", $wallTime )
);
$parserOutput->setLimitReportData( 'limitreport-timingprofile', [ 'not yet supported' ] );
// Add other cache related metadata
$parserOutput->setLimitReportData( 'cachereport-timestamp',
$parserOutput->getCacheTime() );
$parserOutput->setLimitReportData( 'cachereport-ttl',
$parserOutput->getCacheExpiry() );
$parserOutput->setLimitReportData( 'cachereport-transientcontent',
$parserOutput->hasReducedExpiry() );
}
}

View file

@ -0,0 +1,77 @@
<?php
namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use ParserFactory;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
/**
* ParserFactory which uses a ParsoidParser.
*
* This is similar to \ParserFactory, but simplified since we don't need
* to try to reuse parser objects. Eventually we'll be able to simplify
* \ParserFactory the same way.
*
* @since 1.41
* @internal May be combined with \ParserFactory or otherwise refactored
*
* @file
* @ingroup Parser
*/
class ParsoidParserFactory /* eventually this may extend \ParserFactory */ {
/** @var SiteConfig */
private $siteConfig;
/** @var DataAccess */
private $dataAccess;
/** @var PageConfigFactory */
private $pageConfigFactory;
/** @var LanguageConverterFactory */
private $languageConverterFactory;
/** @var ParserFactory */
private $legacyParserFactory;
/**
* @param SiteConfig $siteConfig
* @param DataAccess $dataAccess
* @param PageConfigFactory $pageConfigFactory
* @param LanguageConverterFactory $languageConverterFactory
* @param ParserFactory $legacyParserFactory
*/
public function __construct(
SiteConfig $siteConfig,
DataAccess $dataAccess,
PageConfigFactory $pageConfigFactory,
LanguageConverterFactory $languageConverterFactory,
ParserFactory $legacyParserFactory
) {
$this->siteConfig = $siteConfig;
$this->dataAccess = $dataAccess;
$this->pageConfigFactory = $pageConfigFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->legacyParserFactory = $legacyParserFactory;
}
/**
* Creates a new Parsoid parser.
* @return ParsoidParser
* @since 1.41
* @unstable
*/
public function create(): ParsoidParser {
return new ParsoidParser(
$this->siteConfig,
$this->dataAccess,
$this->pageConfigFactory,
$this->languageConverterFactory,
$this->legacyParserFactory
);
}
}

View file

@ -977,6 +977,9 @@ class RevisionStoreDbTest extends MediaWikiIntegrationTestCase {
try {
$this->setService( 'DBLoadBalancer', $localLoadBalancerMock );
// There may be other code which indirectly uses the RevisionStore
// service; make sure it picks up the external store as well.
$this->setService( 'RevisionStore', $store );
$callback( $store );
} finally {
// Restore the original load balancer to make test teardown work

View file

@ -601,6 +601,7 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
'GlobalIdGenerator',
'LanguageNameUtils',
'MagicWordFactory',
'ParsoidParserFactory',
],
],
'testing' => DummyContentHandlerForTesting::class,

View file

@ -6,6 +6,7 @@ use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageIdentityValue;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Title\Title;
use MediaWiki\Title\TitleFactory;
use Wikimedia\TestingAccessWrapper;
@ -39,6 +40,7 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase {
'GlobalIdGenerator',
'LanguageNameUtils',
'MagicWordFactory',
'ParsoidParserFactory',
],
],
CONTENT_MODEL_JAVASCRIPT => JavaScriptContentHandler::class,
@ -507,7 +509,8 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase {
$this->createMock( ParserFactory::class ),
$this->createMock( GlobalIdGenerator::class ),
$this->createMock( LanguageNameUtils::class ),
$this->createMock( MagicWordFactory::class )
$this->createMock( MagicWordFactory::class ),
$this->createMock( ParsoidParserFactory::class )
);
$slotDiffRenderer = $contentHandler->getSlotDiffRenderer( RequestContext::getMain() );
$this->assertInstanceOf( TextSlotDiffRenderer::class, $slotDiffRenderer );

View file

@ -21,6 +21,7 @@ class RegistrationContentHandlerFactoryToMediaWikiServicesTest extends MediaWiki
'GlobalIdGenerator',
'LanguageNameUtils',
'MagicWordFactory',
'ParsoidParserFactory',
],
],
CONTENT_MODEL_JAVASCRIPT => JavaScriptContentHandler::class,

View file

@ -51,8 +51,8 @@ class ParsoidCachePrewarmJobTest extends MediaWikiIntegrationTestCase {
);
// Ensure we have the parsoid output in parser cache as an HTML document
$this->assertStringContainsString( '<html', $parsoidOutput->getText() );
$this->assertStringContainsString( self::NON_JOB_QUEUE_EDIT, $parsoidOutput->getText() );
$this->assertStringContainsString( '<html', $parsoidOutput->getRawText() );
$this->assertStringContainsString( self::NON_JOB_QUEUE_EDIT, $parsoidOutput->getRawText() );
$rev2 = $this->editPage( $page, self::JOB_QUEUE_EDIT )->getNewRevision();
$parsoidPrewarmJob = new ParsoidCachePrewarmJob(
@ -81,8 +81,8 @@ class ParsoidCachePrewarmJobTest extends MediaWikiIntegrationTestCase {
);
// Ensure we have the parsoid output in parser cache as an HTML document
$this->assertStringContainsString( '<html', $parsoidOutput->getText() );
$this->assertStringContainsString( self::JOB_QUEUE_EDIT, $parsoidOutput->getText() );
$this->assertStringContainsString( '<html', $parsoidOutput->getRawText() );
$this->assertStringContainsString( self::JOB_QUEUE_EDIT, $parsoidOutput->getRawText() );
// Check that the causeAction was looped through as the render reason
$this->assertStringContainsString(

View file

@ -11,6 +11,7 @@ use Wikimedia\TestingAccessWrapper;
/**
* @covers \Article::view()
* @group Database
*/
class ArticleViewTest extends MediaWikiIntegrationTestCase {
@ -224,7 +225,7 @@ class ArticleViewTest extends MediaWikiIntegrationTestCase {
/** @var MockObject|WikiPage $page */
$page = $this->getMockBuilder( WikiPage::class )
->onlyMethods( [ 'getRevisionRecord', 'getLatest' ] )
->onlyMethods( [ 'getRevisionRecord', 'getLatest', 'getContentHandler' ] )
->setConstructorArgs( [ $title ] )
->getMock();
@ -232,6 +233,8 @@ class ArticleViewTest extends MediaWikiIntegrationTestCase {
->willReturn( $rev );
$page->method( 'getLatest' )
->willReturn( $rev->getId() );
$page->method( 'getContentHandler' )
->willReturn( $mockHandler );
$article = Article::newFromWikiPage( $page, RequestContext::getMain() );
$article->getContext()->getOutput()->setTitle( $page->getTitle() );
@ -695,4 +698,46 @@ class ArticleViewTest extends MediaWikiIntegrationTestCase {
$services->getRevisionStore()
);
}
/**
* Test the "useParsoid" parser option and the ArticleParserOptions
* hook.
*/
public function testUseParsoid() {
// Create an appropriate test page.
$title = Title::makeTitle( NS_MAIN, 'UseParsoidTest' );
$article = new Article( $title );
$page = $this->getExistingTestPage( $title );
$page->doUserEditContent(
ContentHandler::makeContent(
'[[Foo]]',
$title,
// Force this page to be wikitext
CONTENT_MODEL_WIKITEXT
),
static::getTestSysop()->getUser(),
'UTPageSummary',
EDIT_SUPPRESS_RC
);
$article->view();
$html = $this->getHtml( $article->getContext()->getOutput() );
// Confirm that this is NOT parsoid-generated HTML
$this->assertStringNotContainsString(
'rel="mw:WikiLink"',
$html
);
// Now enable Parsoid via the ArticleParserOptions hook
$article = new Article( $title );
$this->setTemporaryHook( 'ArticleParserOptions', static function ( $article, $popts ) {
$popts->setUseParsoid();
} );
$article->view();
$html = $this->getHtml( $article->getContext()->getOutput() );
// Look for a marker that this is Parsoid-generated HTML
$this->assertStringContainsString(
'rel="mw:WikiLink"',
$html
);
}
}

View file

@ -331,7 +331,7 @@ class ParserOptionsTest extends MediaWikiLangTestCase {
$this->setTemporaryHook( 'ParserOptionsRegister', null );
$this->assertSame( [
'dateformat', 'printable',
'thumbsize', 'userlang'
'thumbsize', 'useParsoid', 'userlang',
], ParserOptions::allCacheVaryingOptions() );
ParserOptions::clearStaticCache();
@ -349,7 +349,7 @@ class ParserOptionsTest extends MediaWikiLangTestCase {
} );
$this->assertSame( [
'dateformat', 'foo', 'printable',
'thumbsize', 'userlang'
'thumbsize', 'useParsoid', 'userlang',
], ParserOptions::allCacheVaryingOptions() );
}

View file

@ -506,7 +506,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
/** @var ParserOutput $parserOutput */
$parserOutput = $status->getValue();
$this->assertStringContainsString( __METHOD__, $parserOutput->getText() );
$this->assertStringContainsString( __METHOD__, $parserOutput->getRawText() );
$this->assertNotEmpty( $parserOutput->getExtensionData( 'parsoid-render-id' ) );
$this->assertNotEmpty( $parserOutput->getCacheRevisionId() );
$this->assertNotEmpty( $parserOutput->getCacheTime() );
@ -540,7 +540,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
/** @var ParserOutput $parserOutput */
$parserOutput = $status->getValue();
$this->assertStringContainsString( __METHOD__, $parserOutput->getText() );
$this->assertStringContainsString( __METHOD__, $parserOutput->getRawText() );
$this->assertNotEmpty( $parserOutput->getExtensionData( 'parsoid-render-id' ) );
$this->assertNotEmpty( $parserOutput->getCacheRevisionId() );
$this->assertNotEmpty( $parserOutput->getCacheTime() );
@ -564,7 +564,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
/** @var ParserOutput $parserOutput */
$parserOutput = $status->getValue();
$this->assertStringContainsString( __METHOD__, $parserOutput->getText() );
$this->assertStringContainsString( __METHOD__, $parserOutput->getRawText() );
$this->assertNotEmpty( $parserOutput->getExtensionData( 'parsoid-render-id' ) );
$this->assertNotEmpty( $parserOutput->getCacheRevisionId() );
$this->assertNotEmpty( $parserOutput->getCacheTime() );
@ -596,7 +596,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
/** @var ParserOutput $parserOutput */
$parserOutput = $status->getValue();
$this->assertStringContainsString( __METHOD__, $parserOutput->getText() );
$this->assertStringContainsString( __METHOD__, $parserOutput->getRawText() );
$this->assertNotEmpty( $parserOutput->getExtensionData( 'parsoid-render-id' ) );
// The revision ID is set to 0, so that's what is in the cache.
$this->assertSame( 0, $parserOutput->getCacheRevisionId() );
@ -659,7 +659,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$this->assertTrue( $status->isOK() );
// assert dummy content in parsoid output HTML
$html = $status->getValue()->getText();
$html = $status->getValue()->getRawText();
$this->assertStringContainsString( 'UTContent', $html );
if ( $parserOptions->getTargetLanguage() !== null ) {

View file

@ -7,6 +7,7 @@ use FormatJson;
use HashConfig;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Rest\Handler\UpdateHandler;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\RequestData;
@ -47,7 +48,8 @@ class UpdateHandlerTest extends \MediaWikiLangTestCase {
$this->createMock( ParserFactory::class ),
$this->createMock( GlobalIdGenerator::class ),
$this->createMock( LanguageNameUtils::class ),
$this->createMock( MagicWordFactory::class )
$this->createMock( MagicWordFactory::class ),
$this->createMock( ParsoidParserFactory::class )
);
// Only wikitext is defined, returns specific handler instance

View file

@ -0,0 +1,39 @@
<?php
namespace MediaWiki\Tests\Parser\Parsoid;
use MediaWiki\Title\Title;
use MediaWikiIntegrationTestCase;
use ParserOptions;
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidParser::parse
*/
class ParsoidParserTest extends MediaWikiIntegrationTestCase {
/** @dataProvider provideParsoidParserHtml */
public function testParsoidParserHtml( $args, $expected, $getTextOpts = [] ) {
$parsoidParser = $this->getServiceContainer()
->getParsoidParserFactory()->create();
if ( is_string( $args[1] ?? '' ) ) {
// Make a PageReference from a string
$args[1] = Title::newFromText( $args[1] ?? 'Main Page' );
}
if ( ( $args[2] ?? null ) === null ) {
// Make default ParserOptions if none are provided
$args[2] = ParserOptions::newFromAnon();
}
$output = $parsoidParser->parse( ...$args );
$html = $output->getText( $getTextOpts );
$this->assertStringContainsString( $expected, $html );
}
public function provideParsoidParserHtml() {
return [
[ [ 'Hello, World' ], 'Hello, World' ],
[ [ '__NOTOC__' ], '<meta property="mw:PageProp/notoc"' ],
// Once we support $linestart and other parser options we
// can extend these tests.
];
}
}

View file

@ -5,6 +5,7 @@ namespace MediaWiki\Tests\Unit;
use FileContentHandler;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Title\TitleFactory;
use MediaWikiUnitTestCase;
use ParserFactory;
@ -33,7 +34,8 @@ class FileContentHandlerTest extends MediaWikiUnitTestCase {
$this->createMock( ParserFactory::class ),
$this->createMock( GlobalIdGenerator::class ),
$this->createMock( LanguageNameUtils::class ),
$this->createMock( MagicWordFactory::class )
$this->createMock( MagicWordFactory::class ),
$this->createMock( ParsoidParserFactory::class )
);
}

View file

@ -2,15 +2,22 @@
namespace MediaWiki\Tests\Unit;
use MediaWiki\Content\Renderer\ContentParseParams;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SlotRenderingProvider;
use MediaWiki\Title\Title;
use MediaWiki\Title\TitleFactory;
use MediaWikiUnitTestCase;
use MWException;
use Parser;
use ParserFactory;
use ParserOptions;
use ParserOutput;
use ReflectionClass;
use Wikimedia\UUID\GlobalIdGenerator;
use WikitextContent;
use WikitextContentHandler;
@ -23,14 +30,15 @@ use WikitextContentHandler;
*/
class WikitextContentHandlerTest extends MediaWikiUnitTestCase {
private function newWikitextContentHandler(): WikitextContentHandler {
private function newWikitextContentHandler( $overrides = [] ): WikitextContentHandler {
return new WikitextContentHandler(
CONTENT_MODEL_WIKITEXT,
$this->createMock( TitleFactory::class ),
$this->createMock( ParserFactory::class ),
$this->createMock( GlobalIdGenerator::class ),
$this->createMock( LanguageNameUtils::class ),
$this->createMock( MagicWordFactory::class )
$overrides[TitleFactory::class] ?? $this->createMock( TitleFactory::class ),
$overrides[ParserFactory::class] ?? $this->createMock( ParserFactory::class ),
$overrides[GlobalIdGenerator::class] ?? $this->createMock( GlobalIdGenerator::class ),
$overrides[LanguageNameUtils::class] ?? $this->createMock( LanguageNameUtils::class ),
$overrides[MagicWordFactory::class] ?? $this->createMock( MagicWordFactory::class ),
$overrides[ParsoidParserFactory::class] ?? $this->createMock( ParsoidParserFactory::class )
);
}
@ -128,4 +136,79 @@ class WikitextContentHandlerTest extends MediaWikiUnitTestCase {
$this->assertEquals( [], $updates );
}
/**
* @covers ::fillParserOutput
* @dataProvider provideFillParserOutput
*/
public function testFillParserOutput( $useParsoid = true ) {
$parserOptions = $this->createMock( ParserOptions::class );
$parserOptions
->method( 'getUseParsoid' )
->willReturn( $useParsoid );
// This is the core of the test: if the useParsoid option is NOT
// present, we expect ParserFactory->getInstance()->parse()
// to be called exactly once, otherwise never.
$parser = $this->createMock( Parser::class );
$parser
->expects( $useParsoid ? $this->never() : $this->once() )
->method( 'parse' );
$parserFactory = $this->createMock( ParserFactory::class );
$parserFactory
->expects( $useParsoid ? $this->never() : $this->once() )
->method( 'getInstance' )
->willReturn( $parser );
// If the useParsoid option is present, we expect
// ParsoidParserFactory()->create()->parse() to be called
// exactly once, otherwise never.
$parsoidParser = $this->createMock( ParsoidParser::class );
$parsoidParser
->expects( $useParsoid ? $this->once() : $this->never() )
->method( 'parse' );
$parsoidParserFactory = $this->createMock( ParsoidParserFactory::class );
$parsoidParserFactory
->expects( $useParsoid ? $this->once() : $this->never() )
->method( 'create' )
->willReturn( $parsoidParser );
// Set up the rest of the mocks
$content = $this->createMock( WikitextContent::class );
$content
->method( 'getRedirectTargetAndText' )
->willReturn( [ false, '* Hello, world!' ] );
$content
->method( 'getPreSaveTransformFlags' )
->willReturn( [] );
$title = $this->createMock( Title::class );
$titleFactory = $this->createMock( TitleFactory::class );
$titleFactory
->method( 'castFromPageReference' )
->willReturn( $title );
$cpoParams = new ContentParseParams( $title, 42, $parserOptions );
$parserOutput = $this->createMock( ParserOutput::class );
// The method we'd like to test, fillParserOutput, is protected;
// make it public
$class = new ReflectionClass( WikitextContentHandler::class );
$method = $class->getMethod( 'fillParserOutput' );
$method->setAccessible( true );
$handler = $this->newWikitextContentHandler( [
TitleFactory::class => $titleFactory,
ParserFactory::class => $parserFactory,
ParsoidParserFactory::class => $parsoidParserFactory,
] );
// Okay, invoke fillParserOutput() and verify that the assertions
// above about the parse() invocations are correct.
$method->invokeArgs( $handler, [ $content, $cpoParams, &$parserOutput ] );
}
public function provideFillParserOutput() {
return [ [ false ], [ true ] ];
}
}