[Re-apply] ParsoidHandler: use HtmlOutputRendererHelper in wt2html

This restores change Ie430acd0753880d88370bb9f22bb40a0f9ded917.
This reverts commit ab6baad1a5.

NOTE: Also needs the patch the fixes the original reason for the
revert: Ief721c23ed9a57d781cfdac625a62113f22f87a5

Change-Id: Ic48db1b5fdff1dfd4f2d2643d64252e5fc721e79
This commit is contained in:
Daniel Kinzler 2022-12-05 18:39:37 +00:00
parent ab6baad1a5
commit 5cb388455b
8 changed files with 330 additions and 165 deletions

View file

@ -174,8 +174,6 @@ class HtmlOutputRendererHelper {
* Flavors may influence parser options, parsoid options, and DOM transformations.
* They will be reflected by the ETag returned by getETag().
*
* Flavors cannot be combined. For more fine-grained control, use setOption
*
* @param string $flavor
*
* @return void
@ -577,14 +575,20 @@ class HtmlOutputRendererHelper {
* Set the HTTP headers based on the response generated
*
* @param ResponseInterface $response
* @param bool $setContentLanguageHeader
* @param bool $forHtml Whether the response will be HTML (rather than JSON)
*
* @return void
*/
public function putHeaders( ResponseInterface $response, bool $setContentLanguageHeader ) {
public function putHeaders( ResponseInterface $response, bool $forHtml = true ) {
if ( $forHtml ) {
// For HTML we want to set the Content-Language. For JSON, we probably don't.
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
$pb = $this->getPageBundle();
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version );
}
if ( $this->targetLanguageCode ) {
if ( $setContentLanguageHeader ) {
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
}
$response->addHeader( 'Vary', 'Accept-Language' );
}
@ -593,6 +597,12 @@ class HtmlOutputRendererHelper {
if ( !$this->isCacheable ) {
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
}
// TODO: cache control for stable HTML? See ContentHelper::setCacheControl
if ( $this->getRevisionId() ) {
$response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() );
}
}
/**

View file

@ -109,7 +109,6 @@ class PageHTMLHandler extends SimpleHandler {
switch ( $outputMode ) {
case 'html':
$response = $this->getResponseFactory()->create();
$response->setHeader( 'Content-Type', 'text/html' );
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
$response->setBody( new StringStream( $parserOutputHtml ) );
break;

View file

@ -24,9 +24,11 @@ use ExtensionRegistry;
use InvalidArgumentException;
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use LogicException;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\ExistingPageRecord;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\HttpException;
@ -37,7 +39,6 @@ use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\SlotRecord;
use MobileContext;
use ParserOutput;
use RequestContext;
use Title;
use WikiMap;
@ -62,6 +63,7 @@ use WikitextContent;
* Base class for Parsoid handlers.
*/
abstract class ParsoidHandler extends Handler {
// TODO logging, timeouts(?), CORS
// TODO content negotiation (routes.js routes.acceptable)
// TODO handle MaxConcurrentCallsError (pool counter?)
@ -279,6 +281,70 @@ abstract class ParsoidHandler extends Handler {
return $this->requestAttributes;
}
/**
* @param array $attribs
* @param ?string $source
* @param PageConfig|PageIdentity $page
*
* @return HtmlOutputRendererHelper
*/
private function getHtmlOutputRendererHelper(
array $attribs,
?string $source,
$page
): HtmlOutputRendererHelper {
$services = MediaWikiServices::getInstance();
// Support PageConfig for backwards compatibility.
// We should leave it to lower level code to create it.
if ( $page instanceof PageConfig ) {
$title = $page->getTitle();
$page = $services->getPageStore()->getPageByText( $title );
if ( !$page ) {
throw new HttpException( "Bad title: $title", 400 );
}
}
$helper = new HtmlOutputRendererHelper(
$services->getParsoidOutputStash(),
$services->getStatsdDataFactory(),
$services->getParsoidOutputAccess(),
$services->getHtmlTransformFactory(),
$services->getContentHandlerFactory(),
$services->getLanguageFactory()
);
$user = RequestContext::getMain()->getUser();
$params = [];
$helper->init( $page, $params, $user );
// XXX: should default to the page's content model?
$model = $attribs['opts']['contentmodel']
?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT );
if ( $source !== null ) {
$helper->setContentSource( $source, $model );
}
if ( isset( $attribs['envOptions']['outputContentVersion'] )
&& $attribs['envOptions']['outputContentVersion'] !== Parsoid::defaultHTMLVersion()
) {
$helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] );
}
if ( isset( $attribs['pagelanguage'] ) ) {
$helper->setPageLanguage( $attribs['pagelanguage'] );
}
if ( isset( $attribs['envOptions']['htmlVariantLanguage'] ) ) {
$helper->setVariantConversionLanguage( $attribs['envOptions']['htmlVariantLanguage'] );
}
return $helper;
}
/**
* @param array $attribs
* @param string $html
@ -617,6 +683,43 @@ abstract class ParsoidHandler extends Handler {
return '/v1/revision/{revision}/html';
}
/**
* @param LinkTarget $redirectTarget
* @param string $domain
* @param string $format
*
* @throws ResponseException
*/
private function followWikiRedirect( $redirectTarget, $domain, $format ): void {
$pageStore = MediaWikiServices::getInstance()->getPageStore();
$titleFormatter = MediaWikiServices::getInstance()->getTitleFormatter();
$redirectTarget = $pageStore->getPageForLink( $redirectTarget );
if ( $redirectTarget instanceof ExistingPageRecord ) {
$pathParams = [
'domain' => $domain,
'format' => $format,
'title' => $titleFormatter->getPrefixedDBkey( $redirectTarget ),
'revision' => $redirectTarget->getLatest()
];
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
// so the below will fail unless the methods are overwritten.
if ( $redirectTarget->exists() ) {
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
} else {
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
}
throw new ResponseException(
$this->createRedirectResponse(
$redirectPath,
$pathParams,
$this->getRequest()->getQueryParams()
)
);
}
}
/**
* Expand the current URL with the latest revision number and redirect there.
*
@ -654,6 +757,37 @@ abstract class ParsoidHandler extends Handler {
return $this->createRedirectResponse( $newPath, $pathParams, $this->getRequest()->getQueryParams() );
}
public function wtLint( PageConfig $pageConfig, array $attribs, ?string $wikitext = null ) {
$envOptions = $attribs['envOptions'];
$oldid = $attribs['oldid'];
if ( $wikitext === null && $oldid !== null ) {
$envOptions['logLinterData'] = true;
}
try {
$parsoid = $this->newParsoid();
return $parsoid->wikitext2lint( $pageConfig, $envOptions );
} catch ( ClientError $e ) {
throw new HttpException( $e->getMessage(), 400 );
} catch ( ResourceLimitExceededException $e ) {
throw new HttpException( $e->getMessage(), 413 );
}
}
private function allowParserCacheWrite() {
$config = RequestContext::getMain()->getConfig();
// HACK: remove before the release of MW 1.40 / early 2023.
if ( $config->has( 'TemporaryParsoidHandlerParserCacheWriteRatio' ) ) {
// We need to be careful about ramping up the cache writes,
// so we don't run out of disk space.
return wfRandom() < $config->get( 'TemporaryParsoidHandlerParserCacheWriteRatio' );
}
return true;
}
/**
* Wikitext -> HTML helper.
* Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
@ -662,6 +796,7 @@ abstract class ParsoidHandler extends Handler {
* @param array $attribs Request attributes from getRequestAttributes()
* @param ?string $wikitext Wikitext to transform (or null to use the
* page specified in the request attributes).
*
* @return Response
*/
protected function wt2html(
@ -672,75 +807,67 @@ abstract class ParsoidHandler extends Handler {
$format = $opts['format'];
$oldid = $attribs['oldid'];
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
$lints = $this->wtLint( $pageConfig, $attribs, $wikitext );
$response = $this->getResponseFactory()->createJson( $lints );
return $response;
}
// Performance Timing options
// init refers to time elapsed before parsing begins
$metrics = $this->metrics;
$timing = Timing::start( $metrics );
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
'!=' . Parsoid::defaultHTMLVersion() ) ) {
$metrics->increment( 'wt2html.parse.version.notdefault' );
}
$helper = $this->getHtmlOutputRendererHelper(
$attribs,
$wikitext,
$pageConfig
);
$parsoid = $this->newParsoid();
if ( !$this->allowParserCacheWrite() ) {
// NOTE: In theory, we want to always write to the parser cache. However,
// the ParserCache takes a lot of disk space, and we need to have fine grained control
// over when we write to it, so we can avoid running out of disc space.
$helper->setUseParserCache( true, false );
}
if (
!empty( $this->parsoidSettings['devAPI'] ) &&
( $request->getQueryParams()['follow_redirects'] ?? false )
) {
$content = $pageConfig->getRevisionContent();
$revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
$revision = $revisionStore->getRevisionById( $helper->getRevisionId() );
$content = $revision->getContent( SlotRecord::MAIN );
$redirectTarget = $content ? $content->getRedirectTarget() : null;
if ( $redirectTarget ) {
$redirectInfo = $this->dataAccess->getPageInfo(
$pageConfig, [ $redirectTarget ]
);
$pathParams = [
'domain' => $attribs['envOptions']['domain'],
'format' => $format,
'title' => $redirectTarget,
'revision' => $redirectInfo['revId']
];
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
// so the below will fail unless the methods are overwritten.
if ( $redirectInfo['revId'] ) {
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
} else {
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
}
throw new ResponseException(
$this->createRedirectResponse( $redirectPath, $pathParams, $request->getQueryParams() )
$this->followWikiRedirect(
$redirectTarget,
$attribs['envOptions']['domain'],
$format
);
}
}
$reqOpts = $attribs['envOptions'] + [
'pageBundle' => $needsPageBundle,
'contentmodel' => $opts['contentmodel'] ?? null,
];
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
'!=' . Parsoid::defaultHTMLVersion() ) ) {
$metrics->increment( 'wt2html.parse.version.notdefault' );
}
// VE, the only client using body_only property,
// doesn't want section tags when this flag is set.
// (T181226)
if ( $attribs['body_only'] ) {
$reqOpts['wrapSections'] = false;
$reqOpts['body_only'] = true;
$helper->setFlavor( 'fragment' );
} elseif ( !$needsPageBundle ) {
// Inline data-parsoid. This will happen when no special params are set.
$helper->setFlavor( 'edit' );
}
if ( $wikitext === null && $oldid !== null ) {
$reqOpts['logLinterData'] = true;
$mstr = 'pageWithOldid';
} else {
$mstr = 'wt';
}
// XXX: Not necessary, since it's in the pageConfig
// if ( isset( $attribs['pagelanguage'] ) ) {
// $reqOpts['pagelanguage'] = $attribs['pagelanguage'];
// }
$timing->end( "wt2html.$mstr.init" );
$metrics->timing(
"wt2html.$mstr.size.input",
@ -748,107 +875,89 @@ abstract class ParsoidHandler extends Handler {
);
$parseTiming = Timing::start( $metrics );
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
try {
$lints = $parsoid->wikitext2lint( $pageConfig, $reqOpts );
} catch ( ClientError $e ) {
throw new HttpException( $e->getMessage(), 400 );
} catch ( ResourceLimitExceededException $e ) {
throw new HttpException( $e->getMessage(), 413 );
}
$response = $this->getResponseFactory()->createJson( $lints );
if ( $needsPageBundle ) {
$pb = $helper->getPageBundle();
$response = $this->getResponseFactory()->createJson( $pb->responseData() );
$helper->putHeaders( $response, false );
ParsoidFormatHelper::setContentType(
$response,
ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
$pb->version
);
} else {
$parserOutput = new ParserOutput();
try {
$out = $parsoid->wikitext2html(
$pageConfig, $reqOpts, $headers, $parserOutput
);
} catch ( ClientError $e ) {
throw new HttpException( $e->getMessage(), 400 );
} catch ( ResourceLimitExceededException $e ) {
throw new HttpException( $e->getMessage(), 413 );
$out = $helper->getHtml();
$response = $this->getResponseFactory()->create();
$response->getBody()->write( $out->getRawText() );
$helper->putHeaders( $response, true );
// XXX: Since we don't enable stashing, the ETag is not really useful.
$eTag = $helper->getETag();
if ( $eTag ) {
$response->setHeader( 'ETag', $eTag );
}
if ( $needsPageBundle ) {
$response = $this->getResponseFactory()->createJson( $out->responseData() );
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
$out->version );
} else {
$response = $this->getResponseFactory()->create();
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML,
$attribs['envOptions']['outputContentVersion'] );
$response->getBody()->write( $out );
// @phan-suppress-next-next-line PhanTypeArraySuspiciousNullable $headers can't be null after the
// method call, but the docblock of wikitext2html doesn't say that.
$response->setHeader( 'Content-Language', $headers['content-language'] );
// @phan-suppress-next-line PhanTypeArraySuspiciousNullable Same.
$response->addHeader( 'Vary', $headers['vary'] );
}
// XXX: For pagebundle requests, this can be somewhat inflated
// because of pagebundle json-encoding overheads
$outSize = $response->getBody()->getSize();
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
$timing->end( 'wt2html.total' );
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
// Ignore slow parse metrics for non-oldid parses
if ( $mstr === 'pageWithOldid' ) {
if ( $parseTime > 3000 ) {
LoggerFactory::getInstance( 'slow-parsoid' )
->info( 'Parsing {title} was slow, took {time} seconds', [
'time' => number_format( $parseTime / 1000, 2 ),
'title' => $pageConfig->getTitle(),
] );
}
// NOTE: We used to generate an ETag here, but since it was random every time and the
// output wasn't stored anywhere, it could not possibly match anything, ever.
if ( $parseTime > 10 && $outSize > 100 ) {
// * Don't bother with this metric for really small parse times
// p99 for initialization time is ~7ms according to grafana.
// So, 10ms ensures that startup overheads don't skew the metrics
// * For body_only=false requests, <head> section isn't generated
// and if the output is small, per-request overheads can skew
// the timePerKB metrics.
// FIXME: For pagebundle requests, this can be somewhat inflated
// because of pagebundle json-encoding overheads
$outSize = $response->getBody()->getSize();
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
$timing->end( 'wt2html.total' );
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
// NOTE: This is slightly misleading since there are fixed costs
// for generating output like the <head> section and should be factored in,
// but this is good enough for now as a useful first degree of approxmation.
$timePerKB = $parseTime * 1024 / $outSize;
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
// Ignore slow parse metrics for non-oldid parses
if ( $mstr === 'pageWithOldid' ) {
if ( $parseTime > 3000 ) {
if ( $timePerKB > 500 ) {
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
// So, we probably want to shoot for a threshold under 100ms.
// But, let's start with 500ms+ outliers first and see what we uncover.
LoggerFactory::getInstance( 'slow-parsoid' )
->info( 'Parsing {title} was slow, took {time} seconds', [
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
'time' => number_format( $parseTime / 1000, 2 ),
'timePerKB' => number_format( $timePerKB, 1 ),
'title' => $pageConfig->getTitle(),
] );
}
if ( $parseTime > 10 && $outSize > 100 ) {
// * Don't bother with this metric for really small parse times
// p99 for initialization time is ~7ms according to grafana.
// So, 10ms ensures that startup overheads don't skew the metrics
// * For body_only=false requests, <head> section isn't generated
// and if the output is small, per-request overheads can skew
// the timePerKB metrics.
// FIXME: This is slightly misleading since there are fixed costs
// for generating output like the <head> section and should be factored in,
// but this is good enough for now as a useful first degree of approxmation.
$timePerKB = $parseTime * 1024 / $outSize;
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
if ( $timePerKB > 500 ) {
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
// So, we probably want to shoot for a threshold under 100ms.
// But, let's start with 500ms+ outliers first and see what we uncover.
LoggerFactory::getInstance( 'slow-parsoid' )
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
'time' => number_format( $parseTime / 1000, 2 ),
'timePerKB' => number_format( $timePerKB, 1 ),
'title' => $pageConfig->getTitle(),
] );
}
}
}
}
if ( $wikitext !== null ) {
// Don't cache requests when wt is set in case somebody uses
// GET for wikitext parsing
// XXX: can we just refuse to do wikitext parsing in a GET request?
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
} elseif ( $oldid !== null ) {
// FIXME this should be handled in core (cf OutputPage::sendCacheControl)
// XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content.
if ( $request->getHeaderLine( 'Cookie' ) ||
$request->getHeaderLine( 'Authorization' ) ) {
// Don't cache requests with a session.
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
}
// Indicate the MediaWiki revision in a header as well for
// ease of extraction in clients.
$response->setHeader( 'Content-Revision-Id', $oldid );
} else {
throw new LogicException( 'Should be unreachable' );
}
return $response;
}

View file

@ -45,7 +45,8 @@ final class PageBundleParserOutputConverter {
'parsoid' => $pageBundle->parsoid,
'mw' => $pageBundle->mw,
'version' => $pageBundle->version,
'headers' => $pageBundle->headers
'headers' => $pageBundle->headers,
'contentmodel' => $pageBundle->contentmodel,
]
);
@ -66,7 +67,8 @@ final class PageBundleParserOutputConverter {
$pageBundleData['parsoid'] ?? [],
$pageBundleData['mw'] ?? [],
$pageBundleData['version'] ?? null,
$pageBundleData['headers'] ?? []
$pageBundleData['headers'] ?? [],
$pageBundleData['contentmodel'] ?? null
);
}

View file

@ -23,7 +23,6 @@ use Config;
use HashConfig;
use IBufferingStatsdDataFactory;
use InvalidArgumentException;
use Language;
use Liuggio\StatsdClient\Factory\StatsdDataFactory;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\Logger\LoggerFactory;
@ -43,6 +42,7 @@ use ParserOptions;
use ParserOutput;
use Status;
use UnexpectedValueException;
use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
@ -238,41 +238,28 @@ class ParsoidOutputAccess {
}
/**
* @param PageIdentity $page
* @param array $envOptions
* @param ?RevisionRecord $revision
* @param Language|null $languageOverride
* @param PageConfig $pageConfig
* @param array $parsoidOptions
*
* @return Status
*/
private function parseInternal(
PageIdentity $page,
array $envOptions,
?RevisionRecord $revision = null,
Language $languageOverride = null
PageConfig $pageConfig,
array $parsoidOptions
): Status {
$defaultOptions = [
'pageBundle' => true,
'prefix' => $this->parsoidWikiId,
'pageName' => $page,
'htmlVariantLanguage' => $languageOverride ? $languageOverride->getCode() : null,
'pageName' => $pageConfig->getTitle(),
'htmlVariantLanguage' => $pageConfig->getPageLanguage(),
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
];
try {
$langCode = $languageOverride ? $languageOverride->getCode() : null;
$pageConfig = $this->parsoidPageConfigFactory->create(
$page,
null,
$revision,
null,
$langCode,
$this->options->get( MainConfigNames::ParsoidSettings )
);
$startTime = microtime( true );
$pageBundle = $this->parsoid->wikitext2html(
$pageConfig,
$envOptions + $defaultOptions
$parsoidOptions + $defaultOptions
);
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle );
@ -281,7 +268,7 @@ class ParsoidOutputAccess {
LoggerFactory::getInstance( 'slow-parsoid' )
->info( 'Parsing {title} was slow, took {time} seconds', [
'time' => number_format( $time, 2 ),
'title' => (string)$page,
'title' => $pageConfig->getTitle(),
] );
}
return Status::newGood( $parserOutput );
@ -381,7 +368,7 @@ class ParsoidOutputAccess {
/**
* @param PageIdentity $page
* @param ParserOptions $parserOpts
* @param array $envOptions
* @param array $parsoidOptions
* @param RevisionRecord|int|null $revision
*
* @return Status
@ -389,7 +376,7 @@ class ParsoidOutputAccess {
public function parse(
PageIdentity $page,
ParserOptions $parserOpts,
array $envOptions,
array $parsoidOptions,
$revision
): Status {
// NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
@ -398,9 +385,18 @@ class ParsoidOutputAccess {
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
}
$revId = $revision ? $revision->getId() : $page->getId();
$languageOverride = $parserOpts->getTargetLanguage();
$langCode = $languageOverride ? $languageOverride->getCode() : null;
$pageConfig = $this->parsoidPageConfigFactory->create(
$page,
null,
$revision,
null,
$langCode,
$this->options->get( MainConfigNames::ParsoidSettings )
);
$status = $this->parseInternal( $page, $envOptions, $revision, $parserOpts->getTargetLanguage() );
$status = $this->parseInternal( $pageConfig, $parsoidOptions );
if ( !$status->isOK() ) {
return $status;
@ -411,6 +407,7 @@ class ParsoidOutputAccess {
// TODO: when we make tighter integration with Parsoid, render ID should become
// a standard ParserOutput property. Nothing else needs it now, so don't generate
// it in ParserCache just yet.
$revId = $revision->getId();
$parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
$parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );

View file

@ -968,6 +968,8 @@ describe( '/transform/ endpoint', function () {
} );
describe( 'Variant conversion', function () {
// NOTE: Continue to accept sr-el for a while, to remain compatible with older versions of the parsoid lib.
const expectedCodeSerbianLatin = /sr-el|sr-Latn/;
it( 'should perform variant conversion for transform given pagelanguage in HTTP header (html)', function ( done ) {
client.req
@ -977,7 +979,7 @@ describe( '/transform/ endpoint', function () {
.send( {
wikitext: 'абвг abcd x'
} )
.expect( 'Content-Language', 'sr-el' )
.expect( 'Content-Language', expectedCodeSerbianLatin )
.expect( 'Vary', /\bAccept-Language\b/i )
.expect( validHtmlResponse( ( doc ) => {
doc.body.textContent.should.equal( 'abvg abcd x' );
@ -1003,7 +1005,7 @@ describe( '/transform/ endpoint', function () {
const headers = res.body.html.headers;
headers.should.have.property( 'content-language' );
headers.should.have.property( 'vary' );
headers[ 'content-language' ].should.equal( 'sr-el' );
headers[ 'content-language' ].should.equal( expectedCodeSerbianLatin );
headers.vary.should.match( /\bAccept-Language\b/i );
} )
.end( done );
@ -1021,7 +1023,7 @@ describe( '/transform/ endpoint', function () {
body: 'абвг abcd x'
}
} )
.expect( 'Content-Language', 'sr-el' )
.expect( 'Content-Language', expectedCodeSerbianLatin )
.expect( 'Vary', /\bAccept-Language\b/i )
.expect( validHtmlResponse( ( doc ) => {
doc.body.textContent.should.equal( 'abvg abcd x' );
@ -1051,7 +1053,7 @@ describe( '/transform/ endpoint', function () {
const headers = res.body.html.headers;
headers.should.have.property( 'content-language' );
headers.should.have.property( 'vary' );
headers[ 'content-language' ].should.equal( 'sr-el' );
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
headers.vary.should.match( /\bAccept-Language\b/i );
} )
.end( done );
@ -1068,7 +1070,7 @@ describe( '/transform/ endpoint', function () {
body: 'абвг abcd x'
}
} )
.expect( 'Content-Language', 'sr-el' )
.expect( 'Content-Language', expectedCodeSerbianLatin )
.expect( 'Vary', /\bAccept-Language\b/i )
.expect( validHtmlResponse( ( doc ) => {
doc.body.textContent.should.equal( 'abvg abcd x' );
@ -1094,7 +1096,7 @@ describe( '/transform/ endpoint', function () {
const headers = res.body.html.headers;
headers.should.have.property( 'content-language' );
headers.should.have.property( 'vary' );
headers[ 'content-language' ].should.equal( 'sr-el' );
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
headers.vary.should.match( /\bAccept-Language\b/i );
} )
.end( done );

View file

@ -7,9 +7,11 @@ use Generator;
use MediaWiki\MainConfigNames;
use MediaWiki\MainConfigSchema;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
use MediaWiki\Parser\Parsoid\HtmlToContentTransform;
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
use MediaWiki\Parser\RevisionOutputCache;
use MediaWiki\Permissions\UltimateAuthority;
use MediaWiki\Rest\Handler\HtmlInputTransformHelper;
use MediaWiki\Rest\Handler\ParsoidFormatHelper;
@ -26,6 +28,7 @@ use MediaWiki\Tests\Rest\RestTestTrait;
use MediaWiki\User\UserIdentityValue;
use MediaWikiIntegrationTestCase;
use NullStatsdDataFactory;
use ParserCache;
use PHPUnit\Framework\MockObject\MockObject;
use TitleValue;
use Wikimedia\Message\ITextFormatter;
@ -248,12 +251,12 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
}
public function wt2html(
PageConfig $pageConfig,
PageConfig $pageConfigConfig,
array $attribs,
?string $wikitext = null
) {
return parent::wt2html(
$pageConfig,
$pageConfigConfig,
$attribs,
$wikitext
);
@ -1910,6 +1913,8 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
array $unexpectedHtml,
array $expectedHeaders = []
) {
// $this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
$hmtlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0';
$expectedHeaders += [
'content-type' => "text/x-wiki; charset=utf-8; profile=\"$hmtlProfileUri\"",
@ -1949,6 +1954,43 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
}
}
public function testWt2html_ParserCache() {
$page = $this->getExistingTestPage();
$pageConfig = $this->getPageConfig( $page );
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get' ] );
// This is the critical assertion in this test case: the save() method should
// be called exactly once!
$parserCache->expects( $this->once() )->method( 'save' );
$parserCache->method( 'get' )->willReturn( false );
$parserCacheFactory = $this->createNoOpMock(
ParserCacheFactory::class,
[ 'getParserCache', 'getRevisionOutputCache' ]
);
$parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
$parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn(
$this->createNoOpMock( RevisionOutputCache::class )
);
$this->setService( 'ParserCacheFactory', $parserCacheFactory );
$attribs = self::DEFAULT_ATTRIBS;
$attribs['opts']['from'] = 'wikitext';
$attribs['opts']['format'] = 'html';
$handler = $this->newParsoidHandler();
// This should trigger a parser cache write, because we didn't set a write-ratio
$handler->wt2html( $pageConfig, $attribs );
$this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
// This should not trigger a parser cache write, because we set the write-ration to 0
$handler->wt2html( $pageConfig, $attribs );
}
// TODO: test wt2html failure modes
// TODO: test redlinks

View file

@ -21,6 +21,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
$this->assertSame( $pageBundle->headers, $extensionData['headers'] );
$this->assertSame( $pageBundle->headers['content-language'], $extensionData['headers']['content-language'] );
$this->assertSame( $pageBundle->version, $extensionData['version'] );
$this->assertSame( $pageBundle->contentmodel, $extensionData['contentmodel'] );
}
public function provideParserOutputFromPageBundle() {
@ -30,7 +31,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
[ 'ids' => '1.33' ],
[ 'ids' => '1.33' ],
'1.x',
[ 'content-language' => 'abc' ]
[ 'content-language' => 'abc' ],
'testing'
)
];
@ -59,6 +61,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
$this->assertSame( $extensionData['version'] ?? null, $pageBundle->version );
$this->assertSame( $extensionData['headers'] ?? [], $pageBundle->headers );
$this->assertSame( $extensionData['headers']['content-language'], $pageBundle->headers['content-language'] );
$this->assertSame( $extensionData['contentmodel'] ?? null, $pageBundle->contentmodel );
}
public function providePageBundleFromParserOutput() {
@ -69,7 +72,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
'parsoid' => [ 'ids' => '1.22' ],
'mw' => [],
'version' => '2.x',
'headers' => [ 'content-language' => 'xyz' ]
'headers' => [ 'content-language' => 'xyz' ],
'testing'
]
)
];