Make ParsoidOutputAccess a wrapper over ParserOutputAccess

* Updated ParserOutput to set Parsoid render ids that REST API
  functionality expects in ParserOutput objects.
* CacheThresholdTime functionality no longer exists since it was
  implemented in ParsoidOutputAccess and ParserOutputAccess doesn't
  support it. This is tracked in T346765.
* Enforce the constraint that uncacheable parses are only for fake or
  mutable revisions. Updated tests that violated this constraint to
  use 'getParseOutput' instead of calling the parse method directly.
* Had to make some changes in ParsoidParser around use of preferredVariant
  passed to Parsoid. I also left some TODO comments for future fixes.
  T267067 is also relevant here.

PARSOID-SPECIFIC OPTIONS:
* logLinterData: linter data is always logged by default -- removed
  support to disable it. Linter extension handles stale lints properly
  and it is better to let it handle it rather than add special cases
  to the API.
* offsetType: Moved this support to ParsoidHandler as a post-processing
  of byte-offset output. This eliminates the need to support this
  Parsoid-specific options in the ContentHandler hierarchies.
* body_only / wrapSections: Handled this in HtmlOutputRendererHelper
  as a post-processing of regular output by removing sections and
  returning the body content only. This does result in some useless
  section-wrapping work with Parsoid, but the simplification is probably
  worth it. If in the future, we support Parsoid-specific options in
  the ContentHandler hierarchy, we could re-introduce this. But, in any
  case, this "fragment" flavor options is likely to get moved out of
  core into the VisualEditor extension code.

DEPLOYMENT:
* This patch changes the cache key by setting the useParsoid option
  in ParserOptions. The parent patch handles this to ensure we don't
  encounter a cold cache on deploy.

TESTS:
* Updated tests and mocks to reflect new reality.
* Do we need any new tests?

Bug: T332931
Change-Id: Ic9b7cc0fcf365e772b7d080d76a065e3fd585f80
This commit is contained in:
Subramanya Sastry 2023-08-29 15:13:43 -05:00
parent c82c20f1a7
commit c8d0470f4b
20 changed files with 664 additions and 567 deletions

View file

@ -31,6 +31,7 @@ use MediaWiki\Edit\SelserContext;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
@ -55,9 +56,12 @@ use Wikimedia\Bcp47Code\Bcp47CodeValue;
use Wikimedia\Message\MessageValue;
use Wikimedia\ParamValidator\ParamValidator;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\Parsoid\Utils\ContentUtils;
use Wikimedia\Parsoid\Utils\DOMCompat;
use Wikimedia\Parsoid\Utils\DOMUtils;
use Wikimedia\Parsoid\Utils\WTUtils;
/**
* Helper for getting output of a given wikitext page rendered by parsoid.
@ -240,22 +244,6 @@ class HtmlOutputRendererHelper implements HtmlOutputHelper {
}
}
/**
* Set the desired offset type for data-parsoid attributes.
*
* @note Will disable caching if the given offset type is different from the default.
*
* @param string $offsetType One of the offset types accepted by Parsoid::wikitext2html.
*/
public function setOffsetType( $offsetType ) {
// Only set the option if the value isn't the default (see Wikimedia\Parsoid\Config\Env)!
// See Parsoid::wikitext2html for possible values.
if ( $offsetType !== 'byte' ) {
$this->parsoidOptions['offsetType'] = $offsetType;
$this->isCacheable = false;
}
}
/**
* Controls how the parser cache is used.
*
@ -264,8 +252,8 @@ class HtmlOutputRendererHelper implements HtmlOutputHelper {
*/
public function setUseParserCache( bool $read, bool $write ) {
$this->parsoidOutputAccessOptions =
( $read ? 0 : ParsoidOutputAccess::OPT_FORCE_PARSE ) |
( $write ? 0 : ParsoidOutputAccess::OPT_NO_UPDATE_CACHE );
( $read ? 0 : ParserOutputAccess::OPT_FORCE_PARSE ) |
( $write ? 0 : ParserOutputAccess::OPT_NO_UPDATE_CACHE );
}
/**
@ -716,38 +704,54 @@ class HtmlOutputRendererHelper implements HtmlOutputHelper {
return (int)$this->revisionOrId;
}
/**
* Strip Parsoid's section wrappers
*
* TODO: Should we move this to Parsoid's ContentUtils class?
* There already is a stripUnnecessaryWrappersAndSyntheticNodes but
* it targets html2wt and does a lot more than just section unwrapping.
*
* @param Element $elt
*/
private function stripParsoidSectionTags( Element $elt ): void {
$n = $elt->firstChild;
while ( $n ) {
$next = $n->nextSibling;
if ( $n instanceof Element ) {
// Recurse into subtree before stripping this
$this->stripParsoidSectionTags( $n );
// Strip <section> tags and synthetic extended-annotation-region wrappers
if ( WTUtils::isParsoidSectionTag( $n ) ) {
$parent = $n->parentNode;
// Help out phan
'@phan-var Element $parent';
DOMUtils::migrateChildren( $n, $parent, $n );
$parent->removeChild( $n );
}
}
$n = $next;
}
}
/**
* @param ParserOptions $parserOptions
*
* @return Status
*/
private function getParserOutputInternal( ParserOptions $parserOptions ): Status {
// XXX: $parsoidOptions are really parser options, and they should be integrated with
// the ParserOptions class. That would allow us to use the ParserCache with
// various flavors.
$parsoidOptions = $this->parsoidOptions;
// NOTE: VisualEditor would set this flavor when transforming from Wikitext to HTML
// for the purpose of editing when doing parsefragment (in body only mode).
if ( $this->flavor === 'fragment' ) {
$parsoidOptions += [
'body_only' => true,
'wrapSections' => false
];
}
// NOTE: ParsoidOutputAccess::getParserOutput() should be used for revisions
// that comes from the database. Either this revision is null to indicate
// the current revision or the revision must have an ID.
// If we have a revision and the ID is 0 or null, then it's a fake revision
// representing a preview.
$isFakeRevision = $this->getRevisionId() === null;
if ( !$isFakeRevision && !$parsoidOptions && $this->isCacheable ) {
// Always log lint info when generating cacheable output.
// We are not really interested in lint data for old revisions, but
// we don't have a good way to tell at this point.
$flags = $this->parsoidOutputAccessOptions | ParsoidOutputAccess::OPT_LOG_LINT_DATA;
$parsoidOptions = $this->parsoidOptions;
// NOTE: VisualEditor would set this flavor when transforming from Wikitext to HTML
// for the purpose of editing when doing parsefragment (in body only mode).
if ( $this->flavor === 'fragment' || $this->getRevisionId() === null ) {
$this->isCacheable = false;
}
if ( $this->isCacheable ) {
$flags = $this->parsoidOutputAccessOptions;
$status = $this->parsoidOutputAccess->getParserOutput(
$this->page,
$parserOptions,
@ -771,17 +775,29 @@ class HtmlOutputRendererHelper implements HtmlOutputHelper {
$this->page,
$parserOptions,
$this->revisionOrId,
$flags | ParsoidOutputAccess::OPT_FORCE_PARSE
$flags | ParserOutputAccess::OPT_FORCE_PARSE
);
}
}
} else {
$status = $this->parsoidOutputAccess->parse(
$status = $this->parsoidOutputAccess->parseUncacheable(
$this->page,
$parserOptions,
$parsoidOptions,
$this->revisionOrId
);
// @phan-suppress-next-line PhanSuspiciousValueComparison
if ( $status->isOK() && $this->flavor === 'fragment' ) {
// Unwrap sections and return body_only content
// NOTE: This introduces an extra html -> dom -> html roundtrip
// This will get addressed once HtmlHolder work is complete
$parserOutput = $status->getValue();
$body = DOMCompat::getBody( DOMUtils::parseHTML( $parserOutput->getRawText() ) );
if ( $body ) {
$this->stripParsoidSectionTags( $body );
$parserOutput->setText( DOMCompat::getInnerHTML( $body ) );
}
}
}
return $status;

View file

@ -277,7 +277,6 @@ abstract class ParsoidHandler extends Handler {
// by the parsoid extension. Will be null for core endpoints.
'domain' => $request->getPathParam( 'domain' ),
'pageName' => $attribs['pageName'],
'offsetType' => $attribs['offsetType'],
'cookie' => $request->getHeaderLine( 'Cookie' ),
'reqId' => $request->getHeaderLine( 'X-Request-Id' ),
'userAgent' => $request->getHeaderLine( 'User-Agent' ),
@ -368,10 +367,6 @@ abstract class ParsoidHandler extends Handler {
$helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] );
}
if ( isset( $attribs['envOptions']['offsetType'] ) ) {
$helper->setOffsetType( $attribs['envOptions']['offsetType'] );
}
if ( isset( $attribs['pagelanguage'] ) ) {
$helper->setPageLanguage( $attribs['pagelanguage'] );
}
@ -886,6 +881,20 @@ abstract class ParsoidHandler extends Handler {
if ( $needsPageBundle ) {
$pb = $helper->getPageBundle();
// Handle custom offset requests as a pb2pb transform
if ( $attribs['offsetType'] !== 'byte' ) {
$parsoid = $this->newParsoid();
$pb = $parsoid->pb2pb(
$pageConfig,
'convertoffsets',
$pb,
[
'inputOffsetType' => 'byte',
'outputOffsetType' => $attribs['offsetType']
]
);
}
$response = $this->getResponseFactory()->createJson( $pb->responseData() );
$helper->putHeaders( $response, false );
@ -897,6 +906,9 @@ abstract class ParsoidHandler extends Handler {
} else {
$out = $helper->getHtml();
// TODO: offsetType conversion isn't supported right now for non-pagebundle endpoints
// Once the OutputTransform framework lands, we might revisit this.
$response = $this->getResponseFactory()->create();
$response->getBody()->write( $out->getRawText() );

View file

@ -1505,14 +1505,11 @@ return [
$services->getMainConfig(),
[ 'ParsoidWikiID' => WikiMap::getCurrentWikiId() ]
),
$services->getParserCacheFactory(),
$services->getParsoidParserFactory(),
$services->getParserOutputAccess(),
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$services->getStatsdDataFactory(),
$services->getService( '_Parsoid' ),
$services->getParsoidSiteConfig(),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
);
},
@ -1546,7 +1543,8 @@ return [
$services->getParsoidDataAccess(),
$services->getParsoidPageConfigFactory(),
$services->getLanguageConverterFactory(),
$services->getParserFactory()
$services->getParserFactory(),
$services->getGlobalIdGenerator()
);
},

View file

@ -39,7 +39,7 @@ use MediaWiki\HookContainer\HookContainer;
use MediaWiki\HookContainer\HookRunner;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\ResourceLoader as RL;
use MediaWiki\Revision\MutableRevisionRecord;
@ -1873,7 +1873,7 @@ class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface, P
// Use OPT_FORCE_PARSE to avoid a useless cache lookup.
if ( $this->warmParsoidParserCache ) {
$cacheWarmingParams = $this->getCause();
$cacheWarmingParams['options'] = ParsoidOutputAccess::OPT_FORCE_PARSE;
$cacheWarmingParams['options'] = ParserOutputAccess::OPT_FORCE_PARSE;
$this->jobQueueGroup->lazyPush(
ParsoidCachePrewarmJob::newSpec(

View file

@ -64,7 +64,7 @@ class ParsoidCachePrewarmJob extends Job {
* @param array $params Additional options for the job. Known keys:
* - causeAction: Indicate what action caused the job to be scheduled. Used for monitoring.
* - options: Flags to be passed to ParsoidOutputAccess:getParserOutput.
* May be set to ParsoidOutputAccess::OPT_FORCE_PARSE to force a parsing even if there
* May be set to ParserOutputAccess::OPT_FORCE_PARSE to force a parsing even if there
* already is cached output.
*
* @return JobSpecification
@ -135,12 +135,7 @@ class ParsoidCachePrewarmJob extends Job {
$options = $this->params['options'] ?? 0;
// getParserOutput() will write to ParserCache.
$status = $this->parsoidOutputAccess->getParserOutput(
$page,
$parserOpts,
$rev,
$options | ParsoidOutputAccess::OPT_LOG_LINT_DATA
);
$status = $this->parsoidOutputAccess->getParserOutput( $page, $parserOpts, $rev, $options );
if ( !$status->isOK() ) {
$this->logger->error( __METHOD__ . ': Parsoid error', [

View file

@ -33,6 +33,7 @@ use MediaWiki\Output\OutputPage;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Parser\ParserOutputStringSets;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\Parsoid\ParsoidRenderID;
use MediaWiki\Parser\Sanitizer;
use MediaWiki\Title\Title;
use Wikimedia\Bcp47Code\Bcp47Code;
@ -93,6 +94,12 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector {
*/
public const MW_MERGE_STRATEGY_UNION = 'union';
/**
* @internal
* String Key used to store the parsoid render ID in ParserOutput
*/
public const PARSOID_RENDER_ID_KEY = 'parsoid-render-id';
/**
* @var string|null The output text
*/
@ -1435,9 +1442,30 @@ class ParserOutput extends CacheTime implements ContentMetadataCollector {
}
/**
* @internal
*
* Store a unique rendering id for this output. This is used by the REST API
* for stashing content to support editing use cases.
*
* /**
* @param ParsoidRenderID $parsoidRenderId
*/
public function setParsoidRenderId( ParsoidRenderID $parsoidRenderId ): void {
$this->setExtensionData( self::PARSOID_RENDER_ID_KEY, $parsoidRenderId->getKey() );
}
/**
* @internal
*
* Return the Parsoid rendering id for this ParserOutput. This is only set
* where the ParserOutput has been generated by Parsoid.
*
* @return string|null
*/
public function getParsoidRenderId(): ?string {
return $this->getExtensionData( self::PARSOID_RENDER_ID_KEY );
}
/**
* Attach a flag to the output so that it can be checked later to handle special cases
*
* @param string $flag

View file

@ -135,6 +135,9 @@ class LanguageVariantConverter {
$languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
$targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode();
if ( $languageConverter->hasVariant( $targetVariantCode ) ) {
// NOTE: This is not a convert() because we have the exact desired variant
// and don't need to compute a preferred variant based on a base language.
// Also see T267067 for why convert() should be avoided.
$convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
} else {
// No conversion possible - pass through original HTML.

View file

@ -19,36 +19,25 @@
namespace MediaWiki\Parser\Parsoid;
use IBufferingStatsdDataFactory;
use InvalidArgumentException;
use Liuggio\StatsdClient\Factory\StatsdDataFactory;
use MediaWiki\Config\Config;
use MediaWiki\Config\HashConfig;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\Content\IContentHandlerFactory;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageLookup;
use MediaWiki\Page\PageRecord;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Parser\RevisionOutputCache;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Status\Status;
use MWUnknownContentModelException;
use ParserCache;
use ParserOptions;
use ParserOutput;
use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\UUID\GlobalIdGenerator;
/**
* MediaWiki service for getting Parsoid Output objects.
@ -63,19 +52,16 @@ class ParsoidOutputAccess {
public const PARSOID_PARSER_CACHE_NAME = 'parsoid';
/**
* @var string Key used to store the parsoid render ID in ParserOutput
* @deprecated since 1.42 use ParserOutputAccess::OPT_NO_UPDATE_CACHE instead
* Temporarily needed while we migrate extensions and other codebases
* to use the ParserOutputAccess constant directly
*/
private const RENDER_ID_KEY = 'parsoid-render-id';
/** @var int Do not check the cache before parsing (force parse) */
public const OPT_FORCE_PARSE = 1;
/**
* @var int Do not update the cache after parsing.
*/
public const OPT_NO_UPDATE_CACHE = 2;
public const OPT_NO_UPDATE_CACHE = ParserOutputAccess::OPT_NO_UPDATE_CACHE;
/**
* @deprecated Parsoid will always lint at this point. This option
* has no effect and will be removed once all callers are fixed.
*
* @var int Collect linter data for the ParserLogLinterData hook.
*/
public const OPT_LOG_LINT_DATA = 64;
@ -85,26 +71,8 @@ class ParsoidOutputAccess {
'ParsoidWikiID'
];
/** @var RevisionOutputCache */
private $revisionOutputCache;
/** @var ParserCache */
private $parserCache;
/** @var GlobalIdGenerator */
private $globalIdGenerator;
/** @var StatsdDataFactory */
private $stats;
/** @var Config */
private $parsoidCacheConfig;
/** @var Parsoid */
private $parsoid;
/** @var PageConfigFactory */
private $parsoidPageConfigFactory;
/** @var ParsoidParserFactory */
private $parsoidParserFactory;
/** @var PageLookup */
private $pageLookup;
@ -112,6 +80,9 @@ class ParsoidOutputAccess {
/** @var RevisionLookup */
private $revisionLookup;
/** @var ParserOutputAccess */
private $parserOutputAccess;
/** @var SiteConfig */
private $siteConfig;
@ -126,41 +97,29 @@ class ParsoidOutputAccess {
/**
* @param ServiceOptions $options
* @param ParserCacheFactory $parserCacheFactory
* @param ParsoidParserFactory $parsoidParserFactory
* @param ParserOutputAccess $parserOutputAccess
* @param PageLookup $pageLookup
* @param RevisionLookup $revisionLookup
* @param GlobalIdGenerator $globalIdGenerator
* @param IBufferingStatsdDataFactory $stats
* @param Parsoid $parsoid
* @param SiteConfig $siteConfig
* @param PageConfigFactory $parsoidPageConfigFactory
* @param IContentHandlerFactory $contentHandlerFactory
*/
public function __construct(
ServiceOptions $options,
ParserCacheFactory $parserCacheFactory,
ParsoidParserFactory $parsoidParserFactory,
ParserOutputAccess $parserOutputAccess,
PageLookup $pageLookup,
RevisionLookup $revisionLookup,
GlobalIdGenerator $globalIdGenerator,
IBufferingStatsdDataFactory $stats,
Parsoid $parsoid,
SiteConfig $siteConfig,
PageConfigFactory $parsoidPageConfigFactory,
IContentHandlerFactory $contentHandlerFactory
) {
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
$this->options = $options;
$this->parsoidCacheConfig = new HashConfig( $options->get( MainConfigNames::ParsoidCacheConfig ) );
$this->revisionOutputCache = $parserCacheFactory
->getRevisionOutputCache( self::PARSOID_PARSER_CACHE_NAME );
$this->parserCache = $parserCacheFactory->getParserCache( self::PARSOID_PARSER_CACHE_NAME );
$this->parsoidParserFactory = $parsoidParserFactory;
$this->parserOutputAccess = $parserOutputAccess;
$this->pageLookup = $pageLookup;
$this->revisionLookup = $revisionLookup;
$this->globalIdGenerator = $globalIdGenerator;
$this->stats = $stats;
$this->parsoid = $parsoid;
$this->siteConfig = $siteConfig;
$this->parsoidPageConfigFactory = $parsoidPageConfigFactory;
$this->contentHandlerFactory = $contentHandlerFactory;
// NOTE: This is passed as the "prefix" option to parsoid, which it uses
@ -196,6 +155,43 @@ class ParsoidOutputAccess {
return $this->siteConfig->getContentModelHandler( $model ) !== null;
}
/**
* @internal
*
* @param ParserOutput $parserOutput
*
* @return ParsoidRenderID
*/
public function getParsoidRenderID( ParserOutput $parserOutput ): ParsoidRenderID {
// XXX: ParserOutput may be coming from the parser cache, so we need to be careful
// when we change how we store the render key in the ParserOutput object.
$renderId = $parserOutput->getParsoidRenderId();
if ( !$renderId ) {
throw new InvalidArgumentException( 'ParserOutput does not have a render ID' );
}
return ParsoidRenderID::newFromKey( $renderId );
}
private function handleUnsupportedContentModel( RevisionRecord $revision ): ?Status {
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
$contentModel = $mainSlot->getModel();
if ( $this->supportsContentModel( $contentModel ) ) {
return null;
} else {
// This is a messy fix for T324711. The real solution is T311648.
// For now, just return dummy parser output.
return $this->makeDummyParserOutput( $contentModel );
// TODO: go back to throwing, once RESTbase no longer expects to get a parsoid rendering for
//any kind of content (T324711).
/*
// TODO: throw an internal exception here, convert to HttpError in HtmlOutputRendererHelper.
throw new HttpException( 'Parsoid does not support content model ' . $mainSlot->getModel(), 400 );
}
*/
}
}
/**
* @param PageIdentity $page
* @param ParserOptions $parserOpts
@ -211,133 +207,26 @@ class ParsoidOutputAccess {
int $options = 0
): Status {
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
$isOld = $revision->getId() !== $page->getLatest();
$statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
if ( !( $options & self::OPT_FORCE_PARSE ) ) {
$parserOutput = $this->getCachedParserOutputInternal(
$page,
$parserOpts,
$revision,
$isOld,
$statsKey
);
if ( $parserOutput ) {
return Status::newGood( $parserOutput );
}
$status = $this->handleUnsupportedContentModel( $revision );
if ( $status ) {
return $status;
}
$parsoidOptions = [];
if ( $options & self::OPT_LOG_LINT_DATA ) {
$parsoidOptions += [
'logLinterData' => true
];
}
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
$startTime = microtime( true );
$status = $this->parse( $page, $parserOpts, $parsoidOptions, $revision );
$time = microtime( true ) - $startTime;
if ( !$status->isOK() ) {
$this->stats->increment( $statsKey . '.save.notok' );
} elseif ( $options & self::OPT_NO_UPDATE_CACHE ) {
$this->stats->increment( $statsKey . '.save.disabled' );
} elseif ( !$this->supportsContentModel( $mainSlot->getModel() ) ) {
// TODO: We really want to cache for all supported content models.
// But supportsContentModels() lies, because of T324711.
// This causes us to render garbage output for all content models, which we shouldn't cache.
// NOTE: this will become irrelevant when we implement T311648.
$this->stats->increment( $statsKey . '.save.badmodel' );
} else {
if ( $time > $this->parsoidCacheConfig->get( 'CacheThresholdTime' ) ) {
$parserOutput = $status->getValue();
$now = $parserOutput->getCacheTime();
if ( $isOld ) {
$this->revisionOutputCache->save( $parserOutput, $revision, $parserOpts, $now );
} else {
$this->parserCache->save( $parserOutput, $page, $parserOpts, $now );
}
$this->stats->increment( $statsKey . '.save.ok' );
} else {
$this->stats->increment( $statsKey . '.save.skipfast' );
}
}
return $status;
}
/**
* @param PageConfig $pageConfig
* @param array $parsoidOptions
*
* @return Status
*/
private function parseInternal(
PageConfig $pageConfig,
array $parsoidOptions
): Status {
$defaultOptions = [
'pageBundle' => true,
'wrapSections' => true,
// Defaults to page title language unless the REST API
// sets a target language in ParserOptions which it does if:
// (a) a content-language header is passed in which is usually
// a language variant conversion request
// (b) user language (for interface messages)
'htmlVariantLanguage' => $pageConfig->getPageLanguageBcp47(),
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
];
try {
$startTime = microtime( true );
$parserOutput = new ParserOutput();
$pageBundle = $this->parsoid->wikitext2html(
$pageConfig,
$parsoidOptions + $defaultOptions,
$headers,
$parserOutput
// Since we know we are generating Parsoid output, explicitly
// call ParserOptions::setUseParsoid. This ensures that when
// we query the parser-cache, the right cache key is called.
// This is an optional transition step to using ParserOutputAccess.
$parserOpts->setUseParsoid();
$status = $this->parserOutputAccess->getParserOutput(
$page, $parserOpts, $revision, $options
);
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
$time = microtime( true ) - $startTime;
if ( $time > 3 ) {
LoggerFactory::getInstance( 'slow-parsoid' )
->info( 'Parsing {title} was slow, took {time} seconds', [
'time' => number_format( $time, 2 ),
'title' => $pageConfig->getTitle(),
] );
}
return Status::newGood( $parserOutput );
} catch ( ClientError $e ) {
return Status::newFatal( 'parsoid-client-error', $e->getMessage() );
$status = Status::newFatal( 'parsoid-client-error', $e->getMessage() );
} catch ( ResourceLimitExceededException $e ) {
return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
$status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
}
}
/**
* NOTE: This needs to be ParserOutput returned by ->getParserOutput()
* in this class.
*
* @param ParserOutput $parserOutput
*
* @return ParsoidRenderID
*/
public function getParsoidRenderID( ParserOutput $parserOutput ): ParsoidRenderID {
// XXX: ParserOutput may be coming from the parser cache, so we need to be careful
// when we change how we store the render key in the ParserOutput object.
$renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY );
if ( !$renderId ) {
throw new InvalidArgumentException( 'ParserOutput does not have a render ID' );
}
return ParsoidRenderID::newFromKey( $renderId );
return $status;
}
/**
@ -353,58 +242,16 @@ class ParsoidOutputAccess {
$revision = null
): ?ParserOutput {
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
$isOld = $revision->getId() !== $page->getLatest();
$statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
return $this->getCachedParserOutputInternal(
$page,
$parserOpts,
$revision,
$isOld,
$statsKey
);
}
/**
* @param PageRecord $page
* @param ParserOptions $parserOpts
* @param RevisionRecord|null $revision
* @param bool $isOld
* @param string $statsKey
*
* @return ?ParserOutput
*/
protected function getCachedParserOutputInternal(
PageRecord $page,
ParserOptions $parserOpts,
?RevisionRecord $revision,
bool $isOld,
string $statsKey
): ?ParserOutput {
if ( $isOld ) {
$parserOutput = $this->revisionOutputCache->get( $revision, $parserOpts );
} else {
$parserOutput = $this->parserCache->get( $page, $parserOpts );
}
if ( $parserOutput ) {
// Ignore cached ParserOutput if it is incomplete,
// because it was stored by an old version of the code.
if ( !$parserOutput->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY )
|| !$parserOutput->getExtensionData( self::RENDER_ID_KEY )
) {
$parserOutput = null;
}
}
if ( $parserOutput ) {
$this->stats->increment( $statsKey . '.get.hit' );
return $parserOutput;
} else {
$this->stats->increment( $statsKey . '.get.miss' );
return null;
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
$contentModel = $mainSlot->getModel();
if ( $this->supportsContentModel( $contentModel ) ) {
// Since we know Parsoid supports this content model, explicitly
// call ParserOptions::setUseParsoid. This ensures that when
// we query the parser-cache, the right cache key is called.
// This is an optional transition step to using ParserOutputAccess.
$parserOpts->setUseParsoid();
}
return $this->parserOutputAccess->getCachedParserOutput( $page, $parserOpts, $revision );
}
private function makeDummyParserOutput( string $contentModel ): Status {
@ -414,7 +261,7 @@ class ParsoidOutputAccess {
// This is fast to generate so it's fine not to write this to parser cache.
$output->updateCacheExpiry( 0 );
// The render ID is required for rendering of dummy output: T311728.
$output->setExtensionData( self::RENDER_ID_KEY, '0/dummy-output' );
$output->setExtensionData( ParserOutput::PARSOID_RENDER_ID_KEY, '0/dummy-output' );
// Required in HtmlOutputRendererHelper::putHeaders when $forHtml
$output->setExtensionData(
PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY,
@ -427,17 +274,18 @@ class ParsoidOutputAccess {
}
/**
* This is to be called only for parsing posted wikitext that is actually
* not part of any real revision.
*
* @param PageIdentity $page
* @param ParserOptions $parserOpts
* @param array $parsoidOptions
* @param RevisionRecord|int|null $revision
*
* @return Status
*/
public function parse(
public function parseUncacheable(
PageIdentity $page,
ParserOptions $parserOpts,
array $parsoidOptions,
$revision
): Status {
// NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
@ -446,55 +294,33 @@ class ParsoidOutputAccess {
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
}
try {
$mainSlot = $revision->getSlot( SlotRecord::MAIN );
$contentModel = $mainSlot->getModel();
if ( !$this->supportsContentModel( $contentModel ) ) {
// This is a messy fix for T324711. The real solution is T311648.
// For now, just return dummy parser output.
return $this->makeDummyParserOutput( $contentModel );
// TODO: go back to throwing, once RESTbase no longer expects to get a parsoid rendering for
//any kind of content (T324711).
/*
// TODO: throw an internal exception here, convert to HttpError in HtmlOutputRendererHelper.
throw new HttpException( 'Parsoid does not support content model ' . $mainSlot->getModel(), 400 );
}
*/
}
$pageConfig = $this->parsoidPageConfigFactory->create(
$page,
null,
$revision,
null,
$parserOpts->getTargetLanguage(),
true // ensureAccessibleContent
);
} catch ( RevisionAccessException $e ) {
return Status::newFatal( 'parsoid-revision-access', $e->getMessage() );
// Enforce caller expectation
$revId = $revision->getId();
if ( $revId !== 0 && $revId !== null ) {
return Status::newFatal( 'parsoid-revision-access',
"parseUncacheable should not called for a real revision" );
}
$status = $this->parseInternal( $pageConfig, $parsoidOptions );
if ( !$status->isOK() ) {
$status = $this->handleUnsupportedContentModel( $revision );
if ( $status ) {
return $status;
}
$parserOutput = $status->getValue();
// TODO: when we make tighter integration with Parsoid, render ID should become
// a standard ParserOutput property. Nothing else needs it now, so don't generate
// it in ParserCache just yet.
$revId = $revision->getId();
$parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
$parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
// XXX: ParserOutput should just always record the revision ID and timestamp
$now = wfTimestampNow();
$parserOutput->setCacheRevisionId( $revId );
$parserOutput->setCacheTime( $now );
try {
// Since we aren't caching this output, there is no need to
// call setUseParsoid() here.
$parser = $this->parsoidParserFactory->create();
$parserOutput = $this->parsoidParserFactory->create()->parseFakeRevision(
$revision, $page, $parserOpts );
$parserOutput->updateCacheExpiry( 0 ); // Ensure this isn't accidentally cached
$status = Status::newGood( $parserOutput );
} catch ( RevisionAccessException $e ) {
return Status::newFatal( 'parsoid-revision-access', $e->getMessage() );
} catch ( ClientError $e ) {
$status = Status::newFatal( 'parsoid-client-error', $e->getMessage() );
} catch ( ResourceLimitExceededException $e ) {
$status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
}
return $status;
}

View file

@ -7,13 +7,16 @@ use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageReference;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Title\Title;
use ParserFactory;
use ParserOptions;
use ParserOutput;
use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\UUID\GlobalIdGenerator;
use WikitextContent;
/**
@ -25,7 +28,6 @@ use WikitextContent;
* @unstable since 1.41; see T236809 for plan.
*/
class ParsoidParser /* eventually this will extend \Parser */ {
/** @var Parsoid */
private $parsoid;
@ -38,22 +40,149 @@ class ParsoidParser /* eventually this will extend \Parser */ {
/** @var ParserFactory */
private $legacyParserFactory;
/** @var GlobalIdGenerator */
private $globalIdGenerator;
/**
* @param Parsoid $parsoid
* @param PageConfigFactory $pageConfigFactory
* @param LanguageConverterFactory $languageConverterFactory
* @param ParserFactory $legacyParserFactory
* @param GlobalIdGenerator $globalIdGenerator
*/
public function __construct(
Parsoid $parsoid,
PageConfigFactory $pageConfigFactory,
LanguageConverterFactory $languageConverterFactory,
ParserFactory $legacyParserFactory
ParserFactory $legacyParserFactory,
GlobalIdGenerator $globalIdGenerator
) {
$this->parsoid = $parsoid;
$this->pageConfigFactory = $pageConfigFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->legacyParserFactory = $legacyParserFactory;
$this->globalIdGenerator = $globalIdGenerator;
}
/**
* API users expect a ParsoidRenderID value set in the parser output's extension data.
* @param int $revId
* @param ParserOutput $parserOutput
*/
private function setParsoidRenderID( int $revId, ParserOutput $parserOutput ): void {
$parserOutput->setParsoidRenderId(
new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() )
);
$now = wfTimestampNow();
$parserOutput->setCacheRevisionId( $revId );
$parserOutput->setCacheTime( $now );
}
/**
* Internal helper to avoid code deuplication across two methods
*
* @param PageConfig $pageConfig
* @param ParserOptions $options
* @return ParserOutput
*/
private function genParserOutput(
PageConfig $pageConfig, ParserOptions $options
): ParserOutput {
$parserOutput = new ParserOutput();
// The enable/disable logic here matches that in Parser::internalParseHalfParsed(),
// although __NOCONTENTCONVERT__ is handled internal to Parsoid.
//
// TODO: It might be preferable to handle __NOCONTENTCONVERT__ here rather than
// by instpecting the DOM inside Parsoid. That will come in a separate patch.
$htmlVariantLanguage = null;
if ( !( $options->getDisableContentConversion() || $options->getInterfaceMessage() ) ) {
// NOTES (some of are TODOs for read views integration)
// 1. This html variant conversion is a pre-cache transform. HtmlOutputRendererHelper
// has another variant conversion that is a post-cache transform based on the
// 'Accept-Language' header. If that header is set, there is really no reason to
// do this conversion here. So, eventually, we are likely to either not pass in
// the htmlVariantLanguage option below OR disable language conversion from the
// wt2html path in Parsoid and this and the Accept-Language variant conversion
// both would have to be handled as post-cache transforms.
//
// 2. Parser.php calls convert() which computes a preferred variant from the
// target language. But, we cannot do that unconditionally here because REST API
// requests specifcy the exact variant via the 'Content-Language' header.
//
// For Parsoid page views, either the callers will have to compute the
// preferred variant and set it in ParserOptions OR the REST API will have
// to set some other flag indicating that the preferred variant should not
// be computed. For now, I am adding a temporary hack, but this should be
// replaced with something more sensible.
//
// 3. Additionally, Parsoid's callers will have to set targetLanguage in ParserOptiosn
// to mimic the logic in Parser.php (missing right now).
$langCode = $pageConfig->getPageLanguageBcp47();
if ( $options->getRenderReason() === 'page-view' ) { // TEMPORARY HACK
$langFactory = MediaWikiServices::getInstance()->getLanguageFactory();
$lang = $langFactory->getLanguage( $langCode );
$langConv = $this->languageConverterFactory->getLanguageConverter( $lang );
$htmlVariantLanguage = $langFactory->getLanguage( $langConv->getPreferredVariant() );
} else {
$htmlVariantLanguage = $langCode;
}
}
// NOTE: This is useless until the time Parsoid uses the
// $options ParserOptions object. But if/when it does, this
// will ensure that we track used options correctly.
$options->registerWatcher( [ $parserOutput, 'recordOption' ] );
$defaultOptions = [
'pageBundle' => true,
'wrapSections' => true,
'logLinterData' => true,
'body_only' => false,
'htmlVariantLanguage' => $htmlVariantLanguage,
'offsetType' => 'byte',
'outputContentVersion' => Parsoid::defaultHTMLVersion()
];
// This can throw ClientError or ResourceLimitExceededException.
// Callers are responsible for figuring out how to handle them.
$pageBundle = $this->parsoid->wikitext2html(
$pageConfig,
$defaultOptions,
$headers,
$parserOutput );
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
// Register a watcher again because the $parserOuptut arg
// and $parserOutput return value above are different objects!
$options->registerWatcher( [ $parserOutput, 'recordOption' ] );
$revId = $pageConfig->getRevisionId();
if ( $revId !== null ) {
$this->setParsoidRenderID( $revId, $parserOutput );
}
// Copied from Parser.php::parse and should probably be abstracted
// into the parent base class (probably as part of T236809)
// Wrap non-interface parser output in a <div> so it can be targeted
// with CSS (T37247)
$class = $options->getWrapOutputClass();
if ( $class !== false && !$options->getInterfaceMessage() ) {
$parserOutput->addWrapperDivClass( $class );
}
$this->makeLimitReport( $options, $parserOutput );
// Record Parsoid version in extension data; this allows
// us to use the onRejectParserCacheValue hook to selectively
// expire "bad" generated content in the event of a rollback.
$parserOutput->setExtensionData(
'core:parsoid-version', Parsoid::version()
);
return $parserOutput;
}
/**
@ -116,59 +245,38 @@ class ParsoidParser /* eventually this will extend \Parser */ {
);
}
// FIXME: Right now, ParsoidOutputAccess uses $lang and does not compute a
// $preferredVariant for $lang. So, when switching over to ParserOutputAccess,
// we need to reconcile that difference.
//
// The REST interfaces will disable content conversion here
// via ParserOptions. The enable/disable logic here matches
// that in Parser::internalParseHalfParsed(), although
// __NOCONTENTCONVERT__ is handled internal to Parsoid.
$preferredVariant = null;
if ( !( $options->getDisableContentConversion() || $options->getInterfaceMessage() ) ) {
$langFactory = MediaWikiServices::getInstance()->getLanguageFactory();
$lang = $langFactory->getLanguage( $pageConfig->getPageLanguageBcp47() );
$langConv = $this->languageConverterFactory->getLanguageConverter( $lang );
$preferredVariant = $langFactory->getLanguage( $langConv->getPreferredVariant() );
return $this->genParserOutput( $pageConfig, $options );
}
/**
* @internal
*
* Convert custom wikitext (stored in main slot of the $fakeRev arg) to HTML.
* Callers are expected NOT to stuff the result into ParserCache.
*
* @param RevisionRecord $fakeRev Revision to parse
* @param PageReference $page
* @param ParserOptions $options
* @return ParserOutput
* @unstable since 1.41
*/
public function parseFakeRevision(
RevisionRecord $fakeRev, PageReference $page, ParserOptions $options
): ParserOutput {
$title = Title::newFromPageReference( $page );
$lang = $options->getTargetLanguage();
if ( $lang === null && $options->getInterfaceMessage() ) {
$lang = $options->getUserLangObj();
}
$parserOutput = new ParserOutput();
// NOTE: This is useless until the time Parsoid uses the
// $options ParserOptions object. But if/when it does, this
// will ensure that we track used options correctly.
$options->registerWatcher( [ $parserOutput, 'recordOption' ] );
$pageBundle = $this->parsoid->wikitext2html( $pageConfig, [
'pageBundle' => true,
'wrapSections' => true,
'htmlVariantLanguage' => $preferredVariant,
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
'logLinterData' => true
], $headers, $parserOutput );
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
// Register a watcher again because the $parserOuptut arg
// and $parserOutput return value above are different objects!
$options->registerWatcher( [ $parserOutput, 'recordOption' ] );
# Copied from Parser.php::parse and should probably be abstracted
# into the parent base class (probably as part of T236809)
# Wrap non-interface parser output in a <div> so it can be targeted
# with CSS (T37247)
$class = $options->getWrapOutputClass();
if ( $class !== false && !$options->getInterfaceMessage() ) {
$parserOutput->addWrapperDivClass( $class );
}
$this->makeLimitReport( $options, $parserOutput );
// Record Parsoid version in extension data; this allows
// us to use the onRejectParserCacheValue hook to selectively
// expire "bad" generated content in the event of a rollback.
$parserOutput->setExtensionData(
'core:parsoid-version', Parsoid::version()
$pageConfig = $this->pageConfigFactory->create(
$title,
$options->getUserIdentity(),
$fakeRev,
null, // unused
$lang // defaults to title page language if null
);
return $parserOutput;
return $this->genParserOutput( $pageConfig, $options );
}
/**

View file

@ -8,6 +8,7 @@ use ParserFactory;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\UUID\GlobalIdGenerator;
/**
* ParserFactory which uses a ParsoidParser.
@ -39,6 +40,9 @@ class ParsoidParserFactory /* eventually this may extend \ParserFactory */ {
/** @var ParserFactory */
private $legacyParserFactory;
/** @var GlobalIdGenerator */
private $globalIdGenerator;
/**
* @param SiteConfig $siteConfig
* @param DataAccess $dataAccess
@ -51,13 +55,15 @@ class ParsoidParserFactory /* eventually this may extend \ParserFactory */ {
DataAccess $dataAccess,
PageConfigFactory $pageConfigFactory,
LanguageConverterFactory $languageConverterFactory,
ParserFactory $legacyParserFactory
ParserFactory $legacyParserFactory,
GlobalIdGenerator $globalIdGenerator
) {
$this->siteConfig = $siteConfig;
$this->dataAccess = $dataAccess;
$this->pageConfigFactory = $pageConfigFactory;
$this->languageConverterFactory = $languageConverterFactory;
$this->legacyParserFactory = $legacyParserFactory;
$this->globalIdGenerator = $globalIdGenerator;
}
/**
@ -71,7 +77,8 @@ class ParsoidParserFactory /* eventually this may extend \ParserFactory */ {
new Parsoid( $this->siteConfig, $this->dataAccess ),
$this->pageConfigFactory,
$this->languageConverterFactory,
$this->legacyParserFactory
$this->legacyParserFactory,
$this->globalIdGenerator
);
}
}

View file

@ -1,6 +1,7 @@
<?php
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageLookup;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
@ -74,7 +75,7 @@ class PrewarmParsoidParserCache extends Maintenance {
$page,
ParserOptions::newFromAnon(),
$revision,
$this->forceParse | ParsoidOutputAccess::OPT_LOG_LINT_DATA
$this->forceParse
);
}
@ -110,7 +111,7 @@ class PrewarmParsoidParserCache extends Maintenance {
if ( $force !== null ) {
// If --force is supplied, for a parse for supported pages or supported
// pages in the specified batch.
$this->forceParse = ParsoidOutputAccess::OPT_FORCE_PARSE;
$this->forceParse = ParserOutputAccess::OPT_FORCE_PARSE;
}
$startFrom = (int)$startFrom;

View file

@ -1312,7 +1312,7 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
// Assert cache update after edit ----------
$parserCacheFactory = $this->getServiceContainer()->getParserCacheFactory();
$parserCache = $parserCacheFactory->getParserCache( ParserCacheFactory::DEFAULT_NAME );
$parsoidCache = $parserCacheFactory->getParserCache( ParsoidOutputAccess::PARSOID_PARSER_CACHE_NAME );
$parsoidCache = $parserCacheFactory->getParserCache( "parsoid-" . ParserCacheFactory::DEFAULT_NAME );
$parserCache->deleteOptionsKey( $page );
$parsoidCache->deleteOptionsKey( $page );
@ -1331,7 +1331,7 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
// Parsoid cache should have an entry
$parserOptions = ParserOptions::newFromAnon();
$parserOptions->setUseParsoid();
$parsoidCached = $parsoidCache->get( $page, $parserOptions, true );
$this->assertIsObject( $parsoidCached );
$this->assertStringContainsString( 'first', $parsoidCached->getRawText() );
@ -1344,6 +1344,8 @@ class DerivedPageDataUpdaterTest extends MediaWikiIntegrationTestCase {
$this->getServiceContainer()->getParsoidOutputAccess()->getParsoidRenderID( $parsoidCached );
// The cached ParserOutput should not use the revision timestamp
// Create nwe ParserOptions object since we setUseParsoid() above
$parserOptions = ParserOptions::newFromAnon();
$cached = $parserCache->get( $page, $parserOptions, true );
$this->assertIsObject( $cached );
$this->assertNotSame( $parsoidCached, $cached );

View file

@ -55,6 +55,12 @@ class ParsoidCachePrewarmJobTest extends MediaWikiIntegrationTestCase {
$this->assertStringContainsString( self::NON_JOB_QUEUE_EDIT, $parsoidOutput->getRawText() );
$rev2 = $this->editPage( $page, self::JOB_QUEUE_EDIT )->getNewRevision();
// Post-edit, reset all services!
// ParserOutputAccess has a localCache which can incorrectly return stale
// content for the previous revision! Resetting ensures that ParsoidCachePrewarmJob
// gets a fresh copy of ParserOutputAccess and ParsoidOutputAccess.
$this->resetServices();
$parsoidPrewarmJob = new ParsoidCachePrewarmJob(
[ 'revId' => $rev2->getId(), 'pageId' => $page->getId(), 'causeAction' => 'just for testing' ],
$this->getServiceContainer()->getParsoidOutputAccess(),

View file

@ -13,13 +13,17 @@ use MediaWiki\Config\ServiceOptions;
use MediaWiki\Content\IContentHandlerFactory;
use MediaWiki\Edit\SimpleParsoidOutputStash;
use MediaWiki\Hook\ParserLogLinterDataHook;
use MediaWiki\Logger\Spi as LoggerSpi;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageIdentityValue;
use MediaWiki\Page\PageRecord;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Parser\Parsoid\ParsoidRenderID;
use MediaWiki\Parser\RevisionOutputCache;
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
@ -40,6 +44,7 @@ use ParserOptions;
use ParserOutput;
use PHPUnit\Framework\MockObject\MockObject;
use PHPUnit\Framework\MockObject\Rule\InvocationOrder;
use Psr\Log\NullLogger;
use Wikimedia\Bcp47Code\Bcp47CodeValue;
use Wikimedia\Message\MessageValue;
use Wikimedia\Parsoid\Core\ClientError;
@ -82,13 +87,25 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
return new ParsoidRenderID( $pout->getCacheRevisionId(), $pout->getCacheTime() );
}
/**
* @param LoggerInterface|null $logger
*
* @return LoggerSpi
*/
private function getLoggerSpi( $logger = null ) {
$logger = $logger ?: new NullLogger();
$spi = $this->createNoOpMock( LoggerSpi::class, [ 'getLogger' ] );
$spi->method( 'getLogger' )->willReturn( $logger );
return $spi;
}
/**
* @return MockObject|ParsoidOutputAccess
*/
public function newMockParsoidOutputAccess(): ParsoidOutputAccess {
$expectedCalls = [
'getParserOutput' => null,
'parse' => null,
'parseUncacheable' => null,
'getParsoidRenderID' => null
];
@ -114,13 +131,13 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$parsoid->method( 'getParsoidRenderID' )
->willReturnCallback( [ $this, 'getParsoidRenderID' ] );
$parsoid->expects( $this->exactlyOrAny( $expectedCalls[ 'parse' ] ) )
->method( 'parse' )
$parsoid->expects( $this->exactlyOrAny( $expectedCalls[ 'parseUncacheable' ] ) )
->method( 'parseUncacheable' )
->willReturnCallback( function (
PageIdentity $page,
ParserOptions $parserOpts,
array $envOptions,
$rev
$rev,
array $envOptions = []
) {
$html = $this->getMockHtml( $rev, $envOptions );
@ -366,14 +383,13 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
// Calling setParsoidOptions must disable caching and force the ETag to null
$helper->setOutputProfileVersion( '999.0.0' );
$helper->setOffsetType( 'ucs2' );
$pb = $helper->getPageBundle();
// NOTE: Check that the options are present in the HTML.
// We don't do real parsing, so this is how they are represented in the output.
$this->assertStringContainsString( '"outputContentVersion":"999.0.0"', $pb->html );
$this->assertStringContainsString( '"offsetType":"ucs2"', $pb->html );
$this->assertStringContainsString( '"offsetType":"byte"', $pb->html );
$response = new Response();
$helper->putHeaders( $response, true );
@ -512,9 +528,9 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$htmlresult = $helper->getHtml()->getRawText();
$this->assertStringContainsString( 'fragment', $helper->getETag() );
$this->assertStringContainsString( self::MOCK_HTML, $htmlresult );
$this->assertStringContainsString( '"body_only":true', $htmlresult );
$this->assertStringNotContainsString( "<body", $htmlresult );
$this->assertStringNotContainsString( "<section", $htmlresult );
}
public function testGetHtmlForEdit() {
@ -589,18 +605,18 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
/**
* @covers \MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper::init
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseUncacheable
*/
public function testEtagLastModifiedWithPageIdentity() {
[ $fakePage, $fakeRevision ] = $this->getNonExistingPageWithFakeRevision( __METHOD__ );
$poa = $this->createMock( ParsoidOutputAccess::class );
$poa->expects( $this->once() )
->method( 'parse' )
->method( 'parseUncacheable' )
->willReturnCallback( function (
PageIdentity $page,
ParserOptions $parserOpts,
array $envOptions,
$rev = null
$rev,
array $envOptions = []
) use ( $fakePage, $fakeRevision ) {
self::assertSame( $page, $fakePage, '$page and $fakePage should be the same' );
self::assertSame( $rev, $fakeRevision, '$rev and $fakeRevision should be the same' );
@ -705,15 +721,36 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
];
}
private function newRealParsoidOutputAccess( $overrides = [] ) {
if ( isset( $overrides['parsoid'] ) ) {
$parsoid = $overrides['parsoid'];
} else {
$parsoid = $this->createNoOpMock( Parsoid::class, [ 'wikitext2html' ] );
$parsoid->method( 'wikitext2html' )
private function resetServicesWithMockedParsoid( ?Parsoid $mockParsoid = null ): void {
$services = $this->getServiceContainer();
// Init mock Parsoid object
if ( !$mockParsoid ) {
$mockParsoid = $this->createNoOpMock( Parsoid::class, [ 'wikitext2html' ] );
$mockParsoid->method( 'wikitext2html' )
->willReturn( new PageBundle( 'This is HTML' ) );
}
// Install it in the ParsoidParser object
$parsoidParser = new ParsoidParser(
$mockParsoid,
$services->getParsoidPageConfigFactory(),
$services->getLanguageConverterFactory(),
$services->getParserFactory(),
$services->getGlobalIdGenerator()
);
// Create a mock Parsoid factory that returns the ParsoidParser object
// with the mocked Parsoid object.
$mockParsoidParserFactory = $this->createNoOpMock( ParsoidParserFactory::class, [ 'create' ] );
$mockParsoidParserFactory->method( 'create' )->willReturn( $parsoidParser );
$this->setService( 'ParsoidParserFactory', $mockParsoidParserFactory );
}
private function newRealParsoidOutputAccess( $overrides = [] ): ParsoidOutputAccess {
$services = $this->getServiceContainer();
if ( isset( $overrides['parserCache'] ) ) {
$parserCache = $overrides['parserCache'];
} else {
@ -736,8 +773,17 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
);
$parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
$parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn( $revisionCache );
$services = $this->getServiceContainer();
$parserOutputAccess = new ParserOutputAccess(
$parserCacheFactory,
$services->getRevisionLookup(),
$services->getRevisionRenderer(),
new NullStatsdDataFactory(),
$services->getDBLoadBalancerFactory(),
$services->getChronologyProtector(),
$this->getLoggerSpi(),
$services->getWikiPageFactory(),
$services->getTitleFormatter()
);
return new ParsoidOutputAccess(
new ServiceOptions(
@ -745,14 +791,11 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$services->getMainConfig(),
[ 'ParsoidWikiID' => 'MyWiki' ]
),
$parserCacheFactory,
$services->getParsoidParserFactory(),
$parserOutputAccess,
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
new NullStatsdDataFactory(),
$parsoid,
$services->getParsoidSiteConfig(),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
);
}
@ -770,10 +813,13 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$parsoid->method( 'wikitext2html' )
->willThrowException( $parsoidException );
/** @var ParsoidOutputAccess|MockObject $access */
$access = $this->newRealParsoidOutputAccess( [
'parsoid' => $parsoid
] );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'get', 'makeParserOutputKey', 'getMetadata' ] );
$parserCache->method( 'get' )->willReturn( false );
$parserCache->expects( $this->once() )->method( 'getMetadata' );
$parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
$this->resetServicesWithMockedParsoid( $parsoid );
$access = $this->newRealParsoidOutputAccess( [ 'parserCache' => $parserCache ] );
$helper = $this->newHelper( null, $access );
$helper->init( $page, self::PARAM_DEFAULTS, $this->newUser() );
@ -790,13 +836,18 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$page = PageIdentityValue::localIdentity( $page->getId(), $page->getNamespace(), $page->getDBkey() );
// This is the key assertion in this test case: get() and save() are both called.
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'get', 'save' ] );
$parserCache->expects( $this->once() )->method( 'get' )->willReturn( false );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'get', 'save', 'getMetadata', 'makeParserOutputKey' ] );
// Second call is for the fallback cache for hacks added to ParserCache
// (T347632 tracks removal of this hack)
$parserCache->expects( $this->exactly( 2 ) )->method( 'get' )->willReturn( false );
$parserCache->expects( $this->once() )->method( 'save' );
$parserCache->expects( $this->once() )->method( 'getMetadata' );
$parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
$this->resetServicesWithMockedParsoid();
$access = $this->newRealParsoidOutputAccess( [
'parserCache' => $parserCache,
'revisionCache' => $this->createNoOpMock( RevisionOutputCache::class ),
'revisionCache' => $this->createNoOpMock( RevisionOutputCache::class )
] );
$helper = $this->newHelper( null, $access );
@ -810,10 +861,12 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
// NOTE: The save() method is not supported and will throw!
// The point of this test case is asserting that save() isn't called.
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'get' ] );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'get', 'getMetadata', 'makeParserOutputKey' ] );
$parserCache->method( 'get' )->willReturn( false );
$parserCache->expects( $this->once() )->method( 'getMetadata' );
$parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
/** @var ParsoidOutputAccess|MockObject $access */
$this->resetServicesWithMockedParsoid();
$access = $this->newRealParsoidOutputAccess( [
'parserCache' => $parserCache,
'revisionCache' => $this->createNoOpMock( RevisionOutputCache::class ),
@ -833,10 +886,12 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
// NOTE: The get() method is not supported and will throw!
// The point of this test case is asserting that get() isn't called.
// We also check that save() is still called.
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save' ] );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'getMetadata', 'makeParserOutputKey' ] );
$parserCache->expects( $this->once() )->method( 'save' );
$parserCache->expects( $this->once() )->method( 'getMetadata' );
$parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
/** @var ParsoidOutputAccess|MockObject $access */
$this->resetServicesWithMockedParsoid();
$access = $this->newRealParsoidOutputAccess( [
'parserCache' => $parserCache,
'revisionCache' => $this->createNoOpMock( RevisionOutputCache::class ),
@ -1057,13 +1112,17 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
public static function provideFlavorsForBadModelOutput() {
yield 'view' => [ 'view' ];
yield 'edit' => [ 'edit' ];
yield 'fragment' => [ 'fragment' ];
// fragment mode is only for posted wikitext fragments not part of a revision
// and should not be used with real revisions
//
// yield 'fragment' => [ 'fragment' ];
}
/**
* @dataProvider provideFlavorsForBadModelOutput
*/
public function testDummyContentForBadModel( string $flavor ) {
$this->resetServicesWithMockedParsoid();
$helper = $this->newHelper( new HashBagOStuff(), $this->newRealParsoidOutputAccess() );
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -1098,7 +1157,7 @@ class HtmlOutputRendererHelperTest extends MediaWikiIntegrationTestCase {
$first = false;
} else {
$version = Parsoid::defaultHTMLVersion();
$this->assertGreaterThan( 0, $options & ParsoidOutputAccess::OPT_FORCE_PARSE );
$this->assertGreaterThan( 0, $options & ParserOutputAccess::OPT_FORCE_PARSE );
}
$html = $this->getMockHtml( $revision );
$pout = $this->makeParserOutput( $parserOpts, $html, $revision, $page, $version );

View file

@ -60,7 +60,10 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
* @return PageHTMLHandler
*/
private function newHandler( ?Parsoid $parsoid = null ): PageHTMLHandler {
return $this->newPageHtmlHandler( $parsoid );
if ( $parsoid ) {
$this->resetServicesWithMockedParsoid( $parsoid );
}
return $this->newPageHtmlHandler();
}
public function testExecuteWithHtml() {

View file

@ -48,9 +48,9 @@ class PageRedirectHandlerTest extends MediaWikiIntegrationTestCase {
case 'bare':
return $this->newPageSourceHandler();
case 'html':
return $this->newPageHtmlHandler( null, $request );
return $this->newPageHtmlHandler( $request );
case 'with_html':
return $this->newPageHtmlHandler( null, $request );
return $this->newPageHtmlHandler( $request );
case 'history':
return $this->newPageHistoryHandler();
case 'history_count':

View file

@ -1906,12 +1906,10 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
$attribs = [
'oldid' => 1, // will be replaced by a real revision id
'opts' => [ 'format' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE ],
'envOptions' => [
// Ensure this is ucs2 so we have a ucs2 offsetType test since
// Parsoid's rt-testing script is node.js based and hence needs
// ucs2 offsets to function correctly!
'offsetType' => 'ucs2', // make sure this is looped through to data-parsoid attribute
]
// Ensure this is ucs2 so we have a ucs2 offsetType test since
// Parsoid's rt-testing script is node.js based and hence needs
// ucs2 offsets to function correctly!
'offsetType' => 'ucs2', // make sure this is looped through to data-parsoid attribute
];
yield 'should get from a title and revision (pagebundle)' => [
$attribs,
@ -2090,12 +2088,15 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
$page = $this->getExistingTestPage();
$pageConfig = $this->getPageConfig( $page );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get' ] );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get', 'makeParserOutputKey', 'getMetadata' ] );
// This is the critical assertion in this test case: the save() method should
// be called exactly once!
$parserCache->expects( $this->once() )->method( 'save' );
$parserCache->method( 'get' )->willReturn( false );
// These methods will be called by ParserOutputAccess:qa
$parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
$parserCache->expects( $this->atLeastOnce() )->method( 'getMetadata' );
$parserCacheFactory = $this->createNoOpMock(
ParserCacheFactory::class,

View file

@ -1,13 +1,14 @@
<?php
use MediaWiki\Config\ServiceOptions;
use MediaWiki\Hook\ParserLogLinterDataHook;
use MediaWiki\Json\JsonCodec;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Rest\HttpException;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionAccessException;
@ -68,12 +69,11 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* @return ParsoidOutputAccess
* @throws Exception
*/
private function getParsoidOutputAccessWithCache(
private function resetServicesWithMockedParsoid(
$expectedParses,
$parsoidCacheConfig = [],
?BagOStuff $parserCacheBag = null
) {
$stats = new NullStatsdDataFactory();
): void {
$services = $this->getServiceContainer();
$parsoidCacheConfig += [
@ -97,22 +97,41 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$services->getWikiPageFactory()
);
$mockParsoid = $this->newMockParsoid( $expectedParses );
$parsoidParser = new ParsoidParser(
$mockParsoid,
$services->getParsoidPageConfigFactory(),
$services->getLanguageConverterFactory(),
$services->getParserFactory(),
$services->getGlobalIdGenerator()
);
// Create a mock Parsoid factory that returns the ParsoidParser object
// with the mocked Parsoid object.
$mockParsoidParserFactory = $this->createNoOpMock( ParsoidParserFactory::class, [ 'create' ] );
$mockParsoidParserFactory->expects( $this->exactly( $expectedParses ) )
->method( 'create' )
->willReturn( $parsoidParser );
$this->setService( 'ParsoidParserFactory', $mockParsoidParserFactory );
}
/**
* @return ParsoidOutputAccess
*/
private function getParsoidOutputAccessWithCache(): ParsoidOutputAccess {
$services = $this->getServiceContainer();
return new ParsoidOutputAccess(
new ServiceOptions(
ParsoidOutputAccess::CONSTRUCTOR_OPTIONS,
[
'ParsoidCacheConfig' => $parsoidCacheConfig,
'ParsoidWikiID' => 'MyWiki'
]
$services->getMainConfig(),
[ 'ParsoidWikiID' => 'MyWiki' ]
),
$parserCacheFactory,
$services->getParsoidParserFactory(),
$services->getParserOutputAccess(),
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$stats,
$this->newMockParsoid( $expectedParses ),
$services->getParsoidSiteConfig(),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
);
}
@ -153,7 +172,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testGetParserOutputThrowsIfRevisionNotFound() {
$access = $this->getParsoidOutputAccessWithCache( 0 );
$this->resetServicesWithMockedParsoid( 0 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -168,7 +188,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
public function testGetParserOutputThrowsIfNotWikitext() {
$this->markTestSkipped( 'Broken by fix for T324711. Restore once we have T311728.' );
$access = $this->getParsoidOutputAccessWithCache( 0 );
$this->resetServicesWithMockedParsoid( 0 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -190,7 +211,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParsoidRenderID
*/
public function testGetParserOutput() {
$access = $this->getParsoidOutputAccessWithCache( 1 );
$this->resetServicesWithMockedParsoid( 1 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -223,10 +245,10 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* in the parsoid parser cache.
*
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getCachedParserOutputInternal
*/
public function testLatestRevisionIsCached() {
$access = $this->getParsoidOutputAccessWithCache( 1 );
$this->resetServicesWithMockedParsoid( 1 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -259,7 +281,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testLatestRevisionWithForceParse() {
$access = $this->getParsoidOutputAccessWithCache( 2 );
$this->resetServicesWithMockedParsoid( 2 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -275,41 +298,12 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$page,
$parserOptions,
null,
ParsoidOutputAccess::OPT_FORCE_PARSE
ParserOutputAccess::OPT_FORCE_PARSE
);
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
$this->checkMetadata( $status );
}
/**
* Tests that the ParserLogLinterData hook is triggered when the OPT_LOG_LINT_DATA
* flag is set.
*
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testLatestRevisionWithLogLint() {
$this->overrideConfigValue( MainConfigNames::ParsoidSettings, [
'linting' => true
] );
$mockHandler = $this->createMock( ParserLogLinterDataHook::class );
$mockHandler->expects( $this->once() ) // this is the critical assertion in this test case!
->method( 'onParserLogLinterData' );
$this->setTemporaryHook(
'ParserLogLinterData',
$mockHandler
);
// Use the real ParsoidOutputAccess, so we use the real hook container.
$access = $this->getServiceContainer()->getParsoidOutputAccess();
$parserOptions = $this->getParserOptions();
$page = $this->getExistingTestPage( __METHOD__ );
$access->getParserOutput( $page, $parserOptions, null, ParsoidOutputAccess::OPT_LOG_LINT_DATA );
}
/**
* Tests that getParserOutput() will force a parse since we know that
* the revision is not in the cache.
@ -317,7 +311,15 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testLatestRevisionWithNoUpdateCache() {
$access = $this->getParsoidOutputAccessWithCache( 2 );
$cacheBag = $this->getMockBuilder( HashBagOStuff::class )
->onlyMethods( [ 'set', 'setMulti' ] )
->getMock();
$cacheBag->expects( $this->never() )->method( 'set' );
$cacheBag->expects( $this->never() )->method( 'setMulti' );
// ParserCache should not get anything stored in it.
$this->resetServicesWithMockedParsoid( 1, [], $cacheBag );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -327,16 +329,10 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$page,
$parserOptions,
null,
ParsoidOutputAccess::OPT_NO_UPDATE_CACHE
ParserOutputAccess::OPT_NO_UPDATE_CACHE
);
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
$this->checkMetadata( $status );
// Get the ParserOutput again, this should trigger a new parse
// since we suppressed caching above.
$status = $access->getParserOutput( $page, $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML . ' of ' . self::WIKITEXT, $status );
$this->checkMetadata( $status );
}
/**
@ -354,7 +350,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$cacheBag->expects( $this->never() )->method( 'setMulti' );
// Expect no calls to parsoid!
$access = $this->getParsoidOutputAccessWithCache( 0, [], $cacheBag );
$this->resetServicesWithMockedParsoid( 0, [], $cacheBag );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -376,6 +373,9 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$this->assertContainsHtml( 'Dummy output', $status );
}
/**
* Unsupported functionality at this point
*/
public static function provideCacheThresholdData() {
return [
yield "fast parse" => [ 1, 2 ], // high threshold, no caching
@ -390,13 +390,16 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$cacheThresholdTime,
$expectedCalls
) {
$this->markTestSkipped( 'Supported removed. Will be fixed by T346398.' );
$page = $this->getExistingTestPage( __METHOD__ );
$parsoidCacheConfig = [
'CacheThresholdTime' => $cacheThresholdTime
];
$parserOptions = $this->getParserOptions();
$access = $this->getParsoidOutputAccessWithCache( $expectedCalls, $parsoidCacheConfig );
$this->resetServicesWithMockedParsoid( $expectedCalls, $parsoidCacheConfig );
$access = $this->getParsoidOutputAccessWithCache();
$status = $access->getParserOutput( $page, $parserOptions );
$this->assertContainsHtml( self::MOCKED_HTML, $status );
$this->checkMetadata( $status );
@ -407,7 +410,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
public function testOldRevisionIsCached() {
$access = $this->getParsoidOutputAccessWithCache( 1 );
$this->resetServicesWithMockedParsoid( 1 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -437,7 +441,8 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
public function testGetParserOutputWithOldRevision() {
$access = $this->getParsoidOutputAccessWithCache( 2 );
$this->resetServicesWithMockedParsoid( 2 );
$access = $this->getParsoidOutputAccessWithCache();
$parserOptions = $this->getParserOptions();
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -483,20 +488,20 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
} ],
] + $contentHandlers );
$access = $this->getParsoidOutputAccessWithCache( 0 );
$this->resetServicesWithMockedParsoid( 0 );
$access = $this->getParsoidOutputAccessWithCache();
$this->assertSame( $expected, $access->supportsContentModel( $model ) );
}
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseInternal
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testParseWithPageRecordAndNoRevision() {
$pageRecord = $this->getExistingTestPage( __METHOD__ )->toPageRecord();
$pOpts = ParserOptions::newFromAnon();
$parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess();
$status = $parsoidOutputAccess->parse( $pageRecord, $pOpts, self::ENV_OPTS, null );
$status = $parsoidOutputAccess->getParserOutput( $pageRecord, $pOpts, null );
$this->assertInstanceOf( Status::class, $status );
$this->assertTrue( $status->isOK() );
@ -520,8 +525,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseInternal
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testParseWithPageRecordAndRevision() {
$page = $this->getExistingTestPage( __METHOD__ );
@ -530,7 +534,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$revRecord = $page->getRevisionRecord();
$parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess();
$status = $parsoidOutputAccess->parse( $pageRecord, $pOpts, self::ENV_OPTS, $revRecord );
$status = $parsoidOutputAccess->getParserOutput( $pageRecord, $pOpts, $revRecord );
$this->assertInstanceOf( Status::class, $status );
$this->assertTrue( $status->isOK() );
@ -545,8 +549,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseInternal
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::getParserOutput
*/
public function testParseWithPageIdentityAndRevisionId() {
$page = $this->getExistingTestPage( __METHOD__ );
@ -554,7 +557,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$revId = $page->getLatest();
$parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess();
$status = $parsoidOutputAccess->parse( $page->getTitle(), $pOpts, self::ENV_OPTS, $revId );
$status = $parsoidOutputAccess->getParserOutput( $page->getTitle(), $pOpts, $revId );
$this->assertInstanceOf( Status::class, $status );
$this->assertTrue( $status->isOK() );
@ -569,8 +572,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseInternal
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseUncacheable
*/
public function testParseWithNonExistingPageAndFakeRevision() {
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -586,7 +588,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
);
$parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess();
$status = $parsoidOutputAccess->parse( $page->getTitle(), $pOpts, self::ENV_OPTS, $revRecord );
$status = $parsoidOutputAccess->parseUncacheable( $page->getTitle(), $pOpts, $revRecord );
$this->assertInstanceOf( Status::class, $status );
$this->assertTrue( $status->isOK() );
@ -602,7 +604,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
}
/**
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parse
* @covers \MediaWiki\Parser\Parsoid\ParsoidOutputAccess::parseUncacheable
*/
public function testParseDeletedRevision() {
$page = $this->getNonexistingTestPage( __METHOD__ );
@ -620,7 +622,7 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
$revRecord->setVisibility( RevisionRecord::DELETED_TEXT );
$parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess();
$status = $parsoidOutputAccess->parse( $page->getTitle(), $pOpts, self::ENV_OPTS, $revRecord );
$status = $parsoidOutputAccess->parseUncacheable( $page->getTitle(), $pOpts, $revRecord );
$this->assertStatusError( 'parsoid-revision-access', $status );
$this->assertSame(
@ -653,18 +655,24 @@ class ParsoidOutputAccessTest extends MediaWikiIntegrationTestCase {
/** @return Generator */
public function provideParserOptionsWithLanguageOverride() {
$parserOptions = $this->createMock( ParserOptions::class );
$parserOptions->method( 'optionsHash' )->willReturn( '' );
$parserOptions->method( 'getUseParsoid' )->willReturn( true );
$parserOptions->method( 'getTargetLanguage' )
->willReturn( null );
yield 'ParserOptions with no language' => [ $parserOptions, 'en' ];
$langCode = 'de';
$parserOptions = $this->createMock( ParserOptions::class );
$parserOptions->method( 'optionsHash' )->willReturn( '' );
$parserOptions->method( 'getUseParsoid' )->willReturn( true );
$parserOptions->method( 'getTargetLanguage' )
->willReturn( $this->getLanguageMock( $langCode ) );
yield 'ParserOptions for "de" language' => [ $parserOptions, $langCode ];
$langCode = 'ar';
$parserOptions = $this->createMock( ParserOptions::class );
$parserOptions->method( 'optionsHash' )->willReturn( '' );
$parserOptions->method( 'getUseParsoid' )->willReturn( true );
$parserOptions->method( 'getTargetLanguage' )
->willReturn( $this->getLanguageMock( $langCode ) );
yield 'ParserOptions for "ar" language' => [ $parserOptions, $langCode ];

View file

@ -6,11 +6,11 @@ use DeferredUpdates;
use Exception;
use HashBagOStuff;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\Json\JsonCodec;
use MediaWiki\MainConfigNames;
use MediaWiki\MainConfigSchema;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
use MediaWiki\Rest\Handler\Helper\PageRestHelperFactory;
use MediaWiki\Rest\Handler\Helper\RevisionContentHelper;
@ -20,10 +20,8 @@ use MediaWiki\Rest\RequestData;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Utils\MWTimestamp;
use MediaWikiIntegrationTestCase;
use NullStatsdDataFactory;
use Psr\Http\Message\StreamInterface;
use Psr\Log\NullLogger;
use WANObjectCache;
use ReflectionClass;
use Wikimedia\Message\MessageValue;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
@ -63,29 +61,15 @@ class RevisionHTMLHandlerTest extends MediaWikiIntegrationTestCase {
}
/**
* @param ?Parsoid $parsoid
*
* @return RevisionHTMLHandler
*/
private function newHandler( ?Parsoid $parsoid = null ): RevisionHTMLHandler {
private function newHandler(): RevisionHTMLHandler {
$parserCacheFactoryOptions = new ServiceOptions( ParserCacheFactory::CONSTRUCTOR_OPTIONS, [
'CacheEpoch' => '20200202112233',
'OldRevisionParserCacheExpireTime' => 60 * 60,
] );
$services = $this->getServiceContainer();
$parserCacheFactory = new ParserCacheFactory(
$this->parserCacheBagOStuff,
new WANObjectCache( [ 'cache' => $this->parserCacheBagOStuff, ] ),
$this->createHookContainer(),
new JsonCodec(),
new NullStatsdDataFactory(),
new NullLogger(),
$parserCacheFactoryOptions,
$services->getTitleFactory(),
$services->getWikiPageFactory()
);
$config = [
'RightsUrl' => 'https://example.com/rights',
'RightsText' => 'some rights',
@ -99,17 +83,11 @@ class RevisionHTMLHandlerTest extends MediaWikiIntegrationTestCase {
$services->getMainConfig(),
[ 'ParsoidWikiID' => 'MyWiki' ]
),
$parserCacheFactory,
$services->getParsoidParserFactory(),
$services->getParserOutputAccess(),
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$services->getStatsdDataFactory(),
$parsoid ?? new Parsoid(
$services->get( 'ParsoidSiteConfig' ),
$services->get( 'ParsoidDataAccess' )
),
$services->getParsoidSiteConfig(),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
);
@ -286,12 +264,38 @@ class RevisionHTMLHandlerTest extends MediaWikiIntegrationTestCase {
[ 'pathParams' => [ 'id' => $revisions['first']->getId() ] ]
);
$parsoid = $this->createNoOpMock( Parsoid::class, [ 'wikitext2html' ] );
$parsoid->expects( $this->once() )
$services = $this->getServiceContainer();
$parsoidParser = $services->getParsoidParserFactory()->create();
// Mock Parsoid
$mockParsoid = $this->createNoOpMock( Parsoid::class, [ 'wikitext2html' ] );
$mockParsoid->expects( $this->once() )
->method( 'wikitext2html' )
->willThrowException( $parsoidException );
$handler = $this->newHandler( $parsoid );
// Install it in the ParsoidParser object
$reflector = new ReflectionClass( 'MediaWiki\Parser\Parsoid\ParsoidParser' );
$prop = $reflector->getProperty( 'parsoid' );
$prop->setAccessible( true );
$prop->setValue( $parsoidParser, $mockParsoid );
$this->assertEquals( $prop->getValue( $parsoidParser ), $mockParsoid );
// Create a mock Parsoid factory that returns the ParsoidParser object
// with the mocked Parsoid object.
$mockParsoidParserFactory = $this->createNoOpMock( ParsoidParserFactory::class, [ 'create' ] );
$mockParsoidParserFactory->expects( $this->once() )
->method( 'create' )
->willReturn( $parsoidParser );
// Ensure WiktiextContentHandler has the mock ParsoidParserFactory
$wtHandler = $services->getContentHandlerFactory()->getContentHandler( 'wikitext' );
$reflector = new ReflectionClass( 'WikitextContentHandler' );
$prop = $reflector->getProperty( 'parsoidParserFactory' );
$prop->setAccessible( true );
$prop->setValue( $wtHandler, $mockParsoidParserFactory );
$this->assertEquals( $prop->getValue( $wtHandler ), $mockParsoidParserFactory );
$handler = $this->newHandler();
$this->expectExceptionObject( $expectedException );
$this->executeHandler( $handler, $request, [
'format' => 'html'

View file

@ -8,6 +8,8 @@ use MediaWiki\MainConfigNames;
use MediaWiki\MainConfigSchema;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\ParsoidOutputAccess;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Rest\Handler\Helper\HtmlMessageOutputHelper;
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
use MediaWiki\Rest\Handler\Helper\PageContentHelper;
@ -61,11 +63,35 @@ trait PageHandlerTestTrait {
}
/**
* @param Parsoid|MockObject|null $parsoid
*
* @param Parsoid|MockObject $mockParsoid
*/
public function resetServicesWithMockedParsoid( $mockParsoid ): void {
$services = $this->getServiceContainer();
$parsoidParser = new ParsoidParser(
$mockParsoid,
$services->getParsoidPageConfigFactory(),
$services->getLanguageConverterFactory(),
$services->getParserFactory(),
$services->getGlobalIdGenerator()
);
// Create a mock Parsoid factory that returns the ParsoidParser object
// with the mocked Parsoid object.
$mockParsoidParserFactory = $this->createNoOpMock( ParsoidParserFactory::class, [ 'create' ] );
$mockParsoidParserFactory->method( 'create' )->willReturn( $parsoidParser );
$this->setService( 'ParsoidParserFactory', $mockParsoidParserFactory );
}
/**
* @return PageHTMLHandler
*/
public function newPageHtmlHandler( ?Parsoid $parsoid = null, ?RequestInterface $request = null ) {
public function newPageHtmlHandler( ?RequestInterface $request = null ) {
// ParserOutputAccess has a localCache which can return stale content.
// Resetting ensures that ParsoidCachePrewarmJob gets a fresh copy
// of ParserOutputAccess and ParsoidOutputAccess without these problems!
$this->resetServices();
$parserCacheFactoryOptions = new ServiceOptions( ParserCacheFactory::CONSTRUCTOR_OPTIONS, [
'CacheEpoch' => '20200202112233',
'OldRevisionParserCacheExpireTime' => 60 * 60,
@ -97,17 +123,11 @@ trait PageHandlerTestTrait {
$services->getMainConfig(),
[ 'ParsoidWikiID' => 'MyWiki' ]
),
$parserCacheFactory,
$services->getParsoidParserFactory(),
$services->getParserOutputAccess(),
$services->getPageStore(),
$services->getRevisionLookup(),
$services->getGlobalIdGenerator(),
$services->getStatsdDataFactory(),
$parsoid ?? new Parsoid(
$services->get( 'ParsoidSiteConfig' ),
$services->get( 'ParsoidDataAccess' )
),
$services->getParsoidSiteConfig(),
$services->getParsoidPageConfigFactory(),
$services->getContentHandlerFactory()
);