ParsoidHandler: use HtmlOutputRendererHelper in wt2html
NOTE: This causes Parsoid output to be written to the parser cache.
This should be unconditional in the future, but for now it is
controled by wgTemporaryParsoidHandlerParserCacheWriteRatio.
This change affects the following endpoints that use the wt2html method:
* /coredev/v0/transform/wikitext/to/html in core
* /{domain}/v3/transform/wikitext/to/html from parsoid
* /{domain}/v3/page/html/{title} from parsoid
The /v1/page/{title}/html endpoint is not affected, since it
doesn't use wt2html, but has always been using HtmlOutputRendererHelper
directly.
Bug: T322672
Depends-On: Ic37f606bb51504c8164d005af55ca9a65f595041
Change-Id: Ie430acd0753880d88370bb9f22bb40a0f9ded917
This commit is contained in:
parent
f66b4dcc82
commit
e82f11c246
8 changed files with 330 additions and 165 deletions
|
|
@ -174,8 +174,6 @@ class HtmlOutputRendererHelper {
|
|||
* Flavors may influence parser options, parsoid options, and DOM transformations.
|
||||
* They will be reflected by the ETag returned by getETag().
|
||||
*
|
||||
* Flavors cannot be combined. For more fine-grained control, use setOption
|
||||
*
|
||||
* @param string $flavor
|
||||
*
|
||||
* @return void
|
||||
|
|
@ -577,14 +575,20 @@ class HtmlOutputRendererHelper {
|
|||
* Set the HTTP headers based on the response generated
|
||||
*
|
||||
* @param ResponseInterface $response
|
||||
* @param bool $setContentLanguageHeader
|
||||
* @param bool $forHtml Whether the response will be HTML (rather than JSON)
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function putHeaders( ResponseInterface $response, bool $setContentLanguageHeader ) {
|
||||
public function putHeaders( ResponseInterface $response, bool $forHtml = true ) {
|
||||
if ( $forHtml ) {
|
||||
// For HTML we want to set the Content-Language. For JSON, we probably don't.
|
||||
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
|
||||
|
||||
$pb = $this->getPageBundle();
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version );
|
||||
}
|
||||
|
||||
if ( $this->targetLanguageCode ) {
|
||||
if ( $setContentLanguageHeader ) {
|
||||
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
|
||||
}
|
||||
$response->addHeader( 'Vary', 'Accept-Language' );
|
||||
}
|
||||
|
||||
|
|
@ -593,6 +597,12 @@ class HtmlOutputRendererHelper {
|
|||
if ( !$this->isCacheable ) {
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
}
|
||||
|
||||
// TODO: cache control for stable HTML? See ContentHelper::setCacheControl
|
||||
|
||||
if ( $this->getRevisionId() ) {
|
||||
$response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ class PageHTMLHandler extends SimpleHandler {
|
|||
switch ( $outputMode ) {
|
||||
case 'html':
|
||||
$response = $this->getResponseFactory()->create();
|
||||
$response->setHeader( 'Content-Type', 'text/html' );
|
||||
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
|
||||
$response->setBody( new StringStream( $parserOutputHtml ) );
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,11 @@ use ExtensionRegistry;
|
|||
use InvalidArgumentException;
|
||||
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
|
||||
use LogicException;
|
||||
use MediaWiki\Linker\LinkTarget;
|
||||
use MediaWiki\Logger\LoggerFactory;
|
||||
use MediaWiki\MainConfigNames;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Page\ExistingPageRecord;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Rest\Handler;
|
||||
use MediaWiki\Rest\HttpException;
|
||||
|
|
@ -37,7 +39,6 @@ use MediaWiki\Revision\MutableRevisionRecord;
|
|||
use MediaWiki\Revision\RevisionAccessException;
|
||||
use MediaWiki\Revision\SlotRecord;
|
||||
use MobileContext;
|
||||
use ParserOutput;
|
||||
use RequestContext;
|
||||
use Title;
|
||||
use WikiMap;
|
||||
|
|
@ -62,6 +63,7 @@ use WikitextContent;
|
|||
* Base class for Parsoid handlers.
|
||||
*/
|
||||
abstract class ParsoidHandler extends Handler {
|
||||
|
||||
// TODO logging, timeouts(?), CORS
|
||||
// TODO content negotiation (routes.js routes.acceptable)
|
||||
// TODO handle MaxConcurrentCallsError (pool counter?)
|
||||
|
|
@ -279,6 +281,70 @@ abstract class ParsoidHandler extends Handler {
|
|||
return $this->requestAttributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $attribs
|
||||
* @param ?string $source
|
||||
* @param PageConfig|PageIdentity $page
|
||||
*
|
||||
* @return HtmlOutputRendererHelper
|
||||
*/
|
||||
private function getHtmlOutputRendererHelper(
|
||||
array $attribs,
|
||||
?string $source,
|
||||
$page
|
||||
): HtmlOutputRendererHelper {
|
||||
$services = MediaWikiServices::getInstance();
|
||||
|
||||
// Support PageConfig for backwards compatibility.
|
||||
// We should leave it to lower level code to create it.
|
||||
if ( $page instanceof PageConfig ) {
|
||||
$title = $page->getTitle();
|
||||
$page = $services->getPageStore()->getPageByText( $title );
|
||||
|
||||
if ( !$page ) {
|
||||
throw new HttpException( "Bad title: $title", 400 );
|
||||
}
|
||||
}
|
||||
|
||||
$helper = new HtmlOutputRendererHelper(
|
||||
$services->getParsoidOutputStash(),
|
||||
$services->getStatsdDataFactory(),
|
||||
$services->getParsoidOutputAccess(),
|
||||
$services->getHtmlTransformFactory(),
|
||||
$services->getContentHandlerFactory(),
|
||||
$services->getLanguageFactory()
|
||||
);
|
||||
|
||||
$user = RequestContext::getMain()->getUser();
|
||||
|
||||
$params = [];
|
||||
$helper->init( $page, $params, $user );
|
||||
|
||||
// XXX: should default to the page's content model?
|
||||
$model = $attribs['opts']['contentmodel']
|
||||
?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT );
|
||||
|
||||
if ( $source !== null ) {
|
||||
$helper->setContentSource( $source, $model );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['envOptions']['outputContentVersion'] )
|
||||
&& $attribs['envOptions']['outputContentVersion'] !== Parsoid::defaultHTMLVersion()
|
||||
) {
|
||||
$helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['pagelanguage'] ) ) {
|
||||
$helper->setPageLanguage( $attribs['pagelanguage'] );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['envOptions']['htmlVariantLanguage'] ) ) {
|
||||
$helper->setVariantConversionLanguage( $attribs['envOptions']['htmlVariantLanguage'] );
|
||||
}
|
||||
|
||||
return $helper;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $attribs
|
||||
* @param string $html
|
||||
|
|
@ -617,6 +683,43 @@ abstract class ParsoidHandler extends Handler {
|
|||
return '/v1/revision/{revision}/html';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param LinkTarget $redirectTarget
|
||||
* @param string $domain
|
||||
* @param string $format
|
||||
*
|
||||
* @throws ResponseException
|
||||
*/
|
||||
private function followWikiRedirect( $redirectTarget, $domain, $format ): void {
|
||||
$pageStore = MediaWikiServices::getInstance()->getPageStore();
|
||||
$titleFormatter = MediaWikiServices::getInstance()->getTitleFormatter();
|
||||
$redirectTarget = $pageStore->getPageForLink( $redirectTarget );
|
||||
|
||||
if ( $redirectTarget instanceof ExistingPageRecord ) {
|
||||
$pathParams = [
|
||||
'domain' => $domain,
|
||||
'format' => $format,
|
||||
'title' => $titleFormatter->getPrefixedDBkey( $redirectTarget ),
|
||||
'revision' => $redirectTarget->getLatest()
|
||||
];
|
||||
|
||||
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
|
||||
// so the below will fail unless the methods are overwritten.
|
||||
if ( $redirectTarget->exists() ) {
|
||||
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
} else {
|
||||
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
}
|
||||
throw new ResponseException(
|
||||
$this->createRedirectResponse(
|
||||
$redirectPath,
|
||||
$pathParams,
|
||||
$this->getRequest()->getQueryParams()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the current URL with the latest revision number and redirect there.
|
||||
*
|
||||
|
|
@ -654,6 +757,37 @@ abstract class ParsoidHandler extends Handler {
|
|||
return $this->createRedirectResponse( $newPath, $pathParams, $this->getRequest()->getQueryParams() );
|
||||
}
|
||||
|
||||
public function wtLint( PageConfig $pageConfig, array $attribs, ?string $wikitext = null ) {
|
||||
$envOptions = $attribs['envOptions'];
|
||||
$oldid = $attribs['oldid'];
|
||||
|
||||
if ( $wikitext === null && $oldid !== null ) {
|
||||
$envOptions['logLinterData'] = true;
|
||||
}
|
||||
|
||||
try {
|
||||
$parsoid = $this->newParsoid();
|
||||
return $parsoid->wikitext2lint( $pageConfig, $envOptions );
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
}
|
||||
}
|
||||
|
||||
private function allowParserCacheWrite() {
|
||||
$config = RequestContext::getMain()->getConfig();
|
||||
|
||||
// HACK: remove before the release of MW 1.40 / early 2023.
|
||||
if ( $config->has( 'TemporaryParsoidHandlerParserCacheWriteRatio' ) ) {
|
||||
// We need to be careful about ramping up the cache writes,
|
||||
// so we don't run out of disk space.
|
||||
return wfRandom() < $config->get( 'TemporaryParsoidHandlerParserCacheWriteRatio' );
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wikitext -> HTML helper.
|
||||
* Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
|
||||
|
|
@ -662,6 +796,7 @@ abstract class ParsoidHandler extends Handler {
|
|||
* @param array $attribs Request attributes from getRequestAttributes()
|
||||
* @param ?string $wikitext Wikitext to transform (or null to use the
|
||||
* page specified in the request attributes).
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
protected function wt2html(
|
||||
|
|
@ -672,75 +807,67 @@ abstract class ParsoidHandler extends Handler {
|
|||
$format = $opts['format'];
|
||||
$oldid = $attribs['oldid'];
|
||||
|
||||
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
|
||||
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
|
||||
$lints = $this->wtLint( $pageConfig, $attribs, $wikitext );
|
||||
$response = $this->getResponseFactory()->createJson( $lints );
|
||||
return $response;
|
||||
}
|
||||
|
||||
// Performance Timing options
|
||||
// init refers to time elapsed before parsing begins
|
||||
$metrics = $this->metrics;
|
||||
$timing = Timing::start( $metrics );
|
||||
|
||||
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
|
||||
'!=' . Parsoid::defaultHTMLVersion() ) ) {
|
||||
$metrics->increment( 'wt2html.parse.version.notdefault' );
|
||||
}
|
||||
$helper = $this->getHtmlOutputRendererHelper(
|
||||
$attribs,
|
||||
$wikitext,
|
||||
$pageConfig
|
||||
);
|
||||
|
||||
$parsoid = $this->newParsoid();
|
||||
if ( !$this->allowParserCacheWrite() ) {
|
||||
// NOTE: In theory, we want to always write to the parser cache. However,
|
||||
// the ParserCache takes a lot of disk space, and we need to have fine grained control
|
||||
// over when we write to it, so we can avoid running out of disc space.
|
||||
$helper->setUseParserCache( true, false );
|
||||
}
|
||||
|
||||
if (
|
||||
!empty( $this->parsoidSettings['devAPI'] ) &&
|
||||
( $request->getQueryParams()['follow_redirects'] ?? false )
|
||||
) {
|
||||
$content = $pageConfig->getRevisionContent();
|
||||
$revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
|
||||
$revision = $revisionStore->getRevisionById( $helper->getRevisionId() );
|
||||
$content = $revision->getContent( SlotRecord::MAIN );
|
||||
$redirectTarget = $content ? $content->getRedirectTarget() : null;
|
||||
if ( $redirectTarget ) {
|
||||
$redirectInfo = $this->dataAccess->getPageInfo(
|
||||
$pageConfig, [ $redirectTarget ]
|
||||
);
|
||||
$pathParams = [
|
||||
'domain' => $attribs['envOptions']['domain'],
|
||||
'format' => $format,
|
||||
'title' => $redirectTarget,
|
||||
'revision' => $redirectInfo['revId']
|
||||
];
|
||||
|
||||
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
|
||||
// so the below will fail unless the methods are overwritten.
|
||||
if ( $redirectInfo['revId'] ) {
|
||||
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
} else {
|
||||
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
}
|
||||
throw new ResponseException(
|
||||
$this->createRedirectResponse( $redirectPath, $pathParams, $request->getQueryParams() )
|
||||
$this->followWikiRedirect(
|
||||
$redirectTarget,
|
||||
$attribs['envOptions']['domain'],
|
||||
$format
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$reqOpts = $attribs['envOptions'] + [
|
||||
'pageBundle' => $needsPageBundle,
|
||||
'contentmodel' => $opts['contentmodel'] ?? null,
|
||||
];
|
||||
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
|
||||
|
||||
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
|
||||
'!=' . Parsoid::defaultHTMLVersion() ) ) {
|
||||
$metrics->increment( 'wt2html.parse.version.notdefault' );
|
||||
}
|
||||
|
||||
// VE, the only client using body_only property,
|
||||
// doesn't want section tags when this flag is set.
|
||||
// (T181226)
|
||||
if ( $attribs['body_only'] ) {
|
||||
$reqOpts['wrapSections'] = false;
|
||||
$reqOpts['body_only'] = true;
|
||||
$helper->setFlavor( 'fragment' );
|
||||
} elseif ( !$needsPageBundle ) {
|
||||
// Inline data-parsoid. This will happen when no special params are set.
|
||||
$helper->setFlavor( 'edit' );
|
||||
}
|
||||
|
||||
if ( $wikitext === null && $oldid !== null ) {
|
||||
$reqOpts['logLinterData'] = true;
|
||||
$mstr = 'pageWithOldid';
|
||||
} else {
|
||||
$mstr = 'wt';
|
||||
}
|
||||
|
||||
// XXX: Not necessary, since it's in the pageConfig
|
||||
// if ( isset( $attribs['pagelanguage'] ) ) {
|
||||
// $reqOpts['pagelanguage'] = $attribs['pagelanguage'];
|
||||
// }
|
||||
|
||||
$timing->end( "wt2html.$mstr.init" );
|
||||
$metrics->timing(
|
||||
"wt2html.$mstr.size.input",
|
||||
|
|
@ -748,107 +875,89 @@ abstract class ParsoidHandler extends Handler {
|
|||
);
|
||||
$parseTiming = Timing::start( $metrics );
|
||||
|
||||
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
|
||||
try {
|
||||
$lints = $parsoid->wikitext2lint( $pageConfig, $reqOpts );
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
}
|
||||
$response = $this->getResponseFactory()->createJson( $lints );
|
||||
if ( $needsPageBundle ) {
|
||||
$pb = $helper->getPageBundle();
|
||||
|
||||
$response = $this->getResponseFactory()->createJson( $pb->responseData() );
|
||||
$helper->putHeaders( $response, false );
|
||||
|
||||
ParsoidFormatHelper::setContentType(
|
||||
$response,
|
||||
ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
|
||||
$pb->version
|
||||
);
|
||||
} else {
|
||||
$parserOutput = new ParserOutput();
|
||||
try {
|
||||
$out = $parsoid->wikitext2html(
|
||||
$pageConfig, $reqOpts, $headers, $parserOutput
|
||||
);
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
$out = $helper->getHtml();
|
||||
|
||||
$response = $this->getResponseFactory()->create();
|
||||
$response->getBody()->write( $out->getRawText() );
|
||||
|
||||
$helper->putHeaders( $response, true );
|
||||
|
||||
// XXX: Since we don't enable stashing, the ETag is not really useful.
|
||||
$eTag = $helper->getETag();
|
||||
if ( $eTag ) {
|
||||
$response->setHeader( 'ETag', $eTag );
|
||||
}
|
||||
if ( $needsPageBundle ) {
|
||||
$response = $this->getResponseFactory()->createJson( $out->responseData() );
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
|
||||
$out->version );
|
||||
} else {
|
||||
$response = $this->getResponseFactory()->create();
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML,
|
||||
$attribs['envOptions']['outputContentVersion'] );
|
||||
$response->getBody()->write( $out );
|
||||
// @phan-suppress-next-next-line PhanTypeArraySuspiciousNullable $headers can't be null after the
|
||||
// method call, but the docblock of wikitext2html doesn't say that.
|
||||
$response->setHeader( 'Content-Language', $headers['content-language'] );
|
||||
// @phan-suppress-next-line PhanTypeArraySuspiciousNullable Same.
|
||||
$response->addHeader( 'Vary', $headers['vary'] );
|
||||
}
|
||||
|
||||
// XXX: For pagebundle requests, this can be somewhat inflated
|
||||
// because of pagebundle json-encoding overheads
|
||||
$outSize = $response->getBody()->getSize();
|
||||
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
|
||||
$timing->end( 'wt2html.total' );
|
||||
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
|
||||
|
||||
// Ignore slow parse metrics for non-oldid parses
|
||||
if ( $mstr === 'pageWithOldid' ) {
|
||||
if ( $parseTime > 3000 ) {
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
|
||||
// NOTE: We used to generate an ETag here, but since it was random every time and the
|
||||
// output wasn't stored anywhere, it could not possibly match anything, ever.
|
||||
if ( $parseTime > 10 && $outSize > 100 ) {
|
||||
// * Don't bother with this metric for really small parse times
|
||||
// p99 for initialization time is ~7ms according to grafana.
|
||||
// So, 10ms ensures that startup overheads don't skew the metrics
|
||||
// * For body_only=false requests, <head> section isn't generated
|
||||
// and if the output is small, per-request overheads can skew
|
||||
// the timePerKB metrics.
|
||||
|
||||
// FIXME: For pagebundle requests, this can be somewhat inflated
|
||||
// because of pagebundle json-encoding overheads
|
||||
$outSize = $response->getBody()->getSize();
|
||||
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
|
||||
$timing->end( 'wt2html.total' );
|
||||
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
|
||||
// NOTE: This is slightly misleading since there are fixed costs
|
||||
// for generating output like the <head> section and should be factored in,
|
||||
// but this is good enough for now as a useful first degree of approxmation.
|
||||
$timePerKB = $parseTime * 1024 / $outSize;
|
||||
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
|
||||
|
||||
// Ignore slow parse metrics for non-oldid parses
|
||||
if ( $mstr === 'pageWithOldid' ) {
|
||||
if ( $parseTime > 3000 ) {
|
||||
if ( $timePerKB > 500 ) {
|
||||
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
|
||||
// So, we probably want to shoot for a threshold under 100ms.
|
||||
// But, let's start with 500ms+ outliers first and see what we uncover.
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'timePerKB' => number_format( $timePerKB, 1 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
|
||||
if ( $parseTime > 10 && $outSize > 100 ) {
|
||||
// * Don't bother with this metric for really small parse times
|
||||
// p99 for initialization time is ~7ms according to grafana.
|
||||
// So, 10ms ensures that startup overheads don't skew the metrics
|
||||
// * For body_only=false requests, <head> section isn't generated
|
||||
// and if the output is small, per-request overheads can skew
|
||||
// the timePerKB metrics.
|
||||
|
||||
// FIXME: This is slightly misleading since there are fixed costs
|
||||
// for generating output like the <head> section and should be factored in,
|
||||
// but this is good enough for now as a useful first degree of approxmation.
|
||||
$timePerKB = $parseTime * 1024 / $outSize;
|
||||
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
|
||||
|
||||
if ( $timePerKB > 500 ) {
|
||||
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
|
||||
// So, we probably want to shoot for a threshold under 100ms.
|
||||
// But, let's start with 500ms+ outliers first and see what we uncover.
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'timePerKB' => number_format( $timePerKB, 1 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( $wikitext !== null ) {
|
||||
// Don't cache requests when wt is set in case somebody uses
|
||||
// GET for wikitext parsing
|
||||
// XXX: can we just refuse to do wikitext parsing in a GET request?
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
} elseif ( $oldid !== null ) {
|
||||
// FIXME this should be handled in core (cf OutputPage::sendCacheControl)
|
||||
// XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content.
|
||||
if ( $request->getHeaderLine( 'Cookie' ) ||
|
||||
$request->getHeaderLine( 'Authorization' ) ) {
|
||||
// Don't cache requests with a session.
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
}
|
||||
// Indicate the MediaWiki revision in a header as well for
|
||||
// ease of extraction in clients.
|
||||
$response->setHeader( 'Content-Revision-Id', $oldid );
|
||||
} else {
|
||||
throw new LogicException( 'Should be unreachable' );
|
||||
}
|
||||
return $response;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,8 @@ final class PageBundleParserOutputConverter {
|
|||
'parsoid' => $pageBundle->parsoid,
|
||||
'mw' => $pageBundle->mw,
|
||||
'version' => $pageBundle->version,
|
||||
'headers' => $pageBundle->headers
|
||||
'headers' => $pageBundle->headers,
|
||||
'contentmodel' => $pageBundle->contentmodel,
|
||||
]
|
||||
);
|
||||
|
||||
|
|
@ -66,7 +67,8 @@ final class PageBundleParserOutputConverter {
|
|||
$pageBundleData['parsoid'] ?? [],
|
||||
$pageBundleData['mw'] ?? [],
|
||||
$pageBundleData['version'] ?? null,
|
||||
$pageBundleData['headers'] ?? []
|
||||
$pageBundleData['headers'] ?? [],
|
||||
$pageBundleData['contentmodel'] ?? null
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ use Config;
|
|||
use HashConfig;
|
||||
use IBufferingStatsdDataFactory;
|
||||
use InvalidArgumentException;
|
||||
use Language;
|
||||
use Liuggio\StatsdClient\Factory\StatsdDataFactory;
|
||||
use MediaWiki\Config\ServiceOptions;
|
||||
use MediaWiki\Logger\LoggerFactory;
|
||||
|
|
@ -43,6 +42,7 @@ use ParserOptions;
|
|||
use ParserOutput;
|
||||
use Status;
|
||||
use UnexpectedValueException;
|
||||
use Wikimedia\Parsoid\Config\PageConfig;
|
||||
use Wikimedia\Parsoid\Config\SiteConfig;
|
||||
use Wikimedia\Parsoid\Core\ClientError;
|
||||
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
|
||||
|
|
@ -238,41 +238,28 @@ class ParsoidOutputAccess {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param PageIdentity $page
|
||||
* @param array $envOptions
|
||||
* @param ?RevisionRecord $revision
|
||||
* @param Language|null $languageOverride
|
||||
* @param PageConfig $pageConfig
|
||||
* @param array $parsoidOptions
|
||||
*
|
||||
* @return Status
|
||||
*/
|
||||
private function parseInternal(
|
||||
PageIdentity $page,
|
||||
array $envOptions,
|
||||
?RevisionRecord $revision = null,
|
||||
Language $languageOverride = null
|
||||
PageConfig $pageConfig,
|
||||
array $parsoidOptions
|
||||
): Status {
|
||||
$defaultOptions = [
|
||||
'pageBundle' => true,
|
||||
'prefix' => $this->parsoidWikiId,
|
||||
'pageName' => $page,
|
||||
'htmlVariantLanguage' => $languageOverride ? $languageOverride->getCode() : null,
|
||||
'pageName' => $pageConfig->getTitle(),
|
||||
'htmlVariantLanguage' => $pageConfig->getPageLanguage(),
|
||||
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
|
||||
];
|
||||
|
||||
try {
|
||||
$langCode = $languageOverride ? $languageOverride->getCode() : null;
|
||||
$pageConfig = $this->parsoidPageConfigFactory->create(
|
||||
$page,
|
||||
null,
|
||||
$revision,
|
||||
null,
|
||||
$langCode,
|
||||
$this->options->get( MainConfigNames::ParsoidSettings )
|
||||
);
|
||||
$startTime = microtime( true );
|
||||
$pageBundle = $this->parsoid->wikitext2html(
|
||||
$pageConfig,
|
||||
$envOptions + $defaultOptions
|
||||
$parsoidOptions + $defaultOptions
|
||||
);
|
||||
|
||||
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle );
|
||||
|
|
@ -281,7 +268,7 @@ class ParsoidOutputAccess {
|
|||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
'time' => number_format( $time, 2 ),
|
||||
'title' => (string)$page,
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
return Status::newGood( $parserOutput );
|
||||
|
|
@ -381,7 +368,7 @@ class ParsoidOutputAccess {
|
|||
/**
|
||||
* @param PageIdentity $page
|
||||
* @param ParserOptions $parserOpts
|
||||
* @param array $envOptions
|
||||
* @param array $parsoidOptions
|
||||
* @param RevisionRecord|int|null $revision
|
||||
*
|
||||
* @return Status
|
||||
|
|
@ -389,7 +376,7 @@ class ParsoidOutputAccess {
|
|||
public function parse(
|
||||
PageIdentity $page,
|
||||
ParserOptions $parserOpts,
|
||||
array $envOptions,
|
||||
array $parsoidOptions,
|
||||
$revision
|
||||
): Status {
|
||||
// NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
|
||||
|
|
@ -398,9 +385,18 @@ class ParsoidOutputAccess {
|
|||
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
|
||||
}
|
||||
|
||||
$revId = $revision ? $revision->getId() : $page->getId();
|
||||
$languageOverride = $parserOpts->getTargetLanguage();
|
||||
$langCode = $languageOverride ? $languageOverride->getCode() : null;
|
||||
$pageConfig = $this->parsoidPageConfigFactory->create(
|
||||
$page,
|
||||
null,
|
||||
$revision,
|
||||
null,
|
||||
$langCode,
|
||||
$this->options->get( MainConfigNames::ParsoidSettings )
|
||||
);
|
||||
|
||||
$status = $this->parseInternal( $page, $envOptions, $revision, $parserOpts->getTargetLanguage() );
|
||||
$status = $this->parseInternal( $pageConfig, $parsoidOptions );
|
||||
|
||||
if ( !$status->isOK() ) {
|
||||
return $status;
|
||||
|
|
@ -411,6 +407,7 @@ class ParsoidOutputAccess {
|
|||
// TODO: when we make tighter integration with Parsoid, render ID should become
|
||||
// a standard ParserOutput property. Nothing else needs it now, so don't generate
|
||||
// it in ParserCache just yet.
|
||||
$revId = $revision->getId();
|
||||
$parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
|
||||
$parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
|
||||
|
||||
|
|
|
|||
|
|
@ -968,6 +968,8 @@ describe( '/transform/ endpoint', function () {
|
|||
} );
|
||||
|
||||
describe( 'Variant conversion', function () {
|
||||
// NOTE: Continue to accept sr-el for a while, to remain compatible with older versions of the parsoid lib.
|
||||
const expectedCodeSerbianLatin = /sr-el|sr-Latn/;
|
||||
|
||||
it( 'should perform variant conversion for transform given pagelanguage in HTTP header (html)', function ( done ) {
|
||||
client.req
|
||||
|
|
@ -977,7 +979,7 @@ describe( '/transform/ endpoint', function () {
|
|||
.send( {
|
||||
wikitext: 'абвг abcd x'
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1003,7 +1005,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.equal( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
@ -1021,7 +1023,7 @@ describe( '/transform/ endpoint', function () {
|
|||
body: 'абвг abcd x'
|
||||
}
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1051,7 +1053,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
@ -1068,7 +1070,7 @@ describe( '/transform/ endpoint', function () {
|
|||
body: 'абвг abcd x'
|
||||
}
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1094,7 +1096,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
|
|||
|
|
@ -7,9 +7,11 @@ use Generator;
|
|||
use MediaWiki\MainConfigNames;
|
||||
use MediaWiki\MainConfigSchema;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Parser\ParserCacheFactory;
|
||||
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
||||
use MediaWiki\Parser\Parsoid\HtmlToContentTransform;
|
||||
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
|
||||
use MediaWiki\Parser\RevisionOutputCache;
|
||||
use MediaWiki\Permissions\UltimateAuthority;
|
||||
use MediaWiki\Rest\Handler\HtmlInputTransformHelper;
|
||||
use MediaWiki\Rest\Handler\ParsoidFormatHelper;
|
||||
|
|
@ -26,6 +28,7 @@ use MediaWiki\Tests\Rest\RestTestTrait;
|
|||
use MediaWiki\User\UserIdentityValue;
|
||||
use MediaWikiIntegrationTestCase;
|
||||
use NullStatsdDataFactory;
|
||||
use ParserCache;
|
||||
use PHPUnit\Framework\MockObject\MockObject;
|
||||
use TitleValue;
|
||||
use Wikimedia\Message\ITextFormatter;
|
||||
|
|
@ -248,12 +251,12 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
}
|
||||
|
||||
public function wt2html(
|
||||
PageConfig $pageConfig,
|
||||
PageConfig $pageConfigConfig,
|
||||
array $attribs,
|
||||
?string $wikitext = null
|
||||
) {
|
||||
return parent::wt2html(
|
||||
$pageConfig,
|
||||
$pageConfigConfig,
|
||||
$attribs,
|
||||
$wikitext
|
||||
);
|
||||
|
|
@ -1910,6 +1913,8 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
array $unexpectedHtml,
|
||||
array $expectedHeaders = []
|
||||
) {
|
||||
// $this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
|
||||
|
||||
$hmtlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0';
|
||||
$expectedHeaders += [
|
||||
'content-type' => "text/x-wiki; charset=utf-8; profile=\"$hmtlProfileUri\"",
|
||||
|
|
@ -1949,6 +1954,43 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public function testWt2html_ParserCache() {
|
||||
$page = $this->getExistingTestPage();
|
||||
$pageConfig = $this->getPageConfig( $page );
|
||||
|
||||
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get' ] );
|
||||
|
||||
// This is the critical assertion in this test case: the save() method should
|
||||
// be called exactly once!
|
||||
$parserCache->expects( $this->once() )->method( 'save' );
|
||||
$parserCache->method( 'get' )->willReturn( false );
|
||||
|
||||
$parserCacheFactory = $this->createNoOpMock(
|
||||
ParserCacheFactory::class,
|
||||
[ 'getParserCache', 'getRevisionOutputCache' ]
|
||||
);
|
||||
$parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
|
||||
$parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn(
|
||||
$this->createNoOpMock( RevisionOutputCache::class )
|
||||
);
|
||||
|
||||
$this->setService( 'ParserCacheFactory', $parserCacheFactory );
|
||||
|
||||
$attribs = self::DEFAULT_ATTRIBS;
|
||||
$attribs['opts']['from'] = 'wikitext';
|
||||
$attribs['opts']['format'] = 'html';
|
||||
|
||||
$handler = $this->newParsoidHandler();
|
||||
|
||||
// This should trigger a parser cache write, because we didn't set a write-ratio
|
||||
$handler->wt2html( $pageConfig, $attribs );
|
||||
|
||||
$this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
|
||||
|
||||
// This should not trigger a parser cache write, because we set the write-ration to 0
|
||||
$handler->wt2html( $pageConfig, $attribs );
|
||||
}
|
||||
|
||||
// TODO: test wt2html failure modes
|
||||
// TODO: test redlinks
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
$this->assertSame( $pageBundle->headers, $extensionData['headers'] );
|
||||
$this->assertSame( $pageBundle->headers['content-language'], $extensionData['headers']['content-language'] );
|
||||
$this->assertSame( $pageBundle->version, $extensionData['version'] );
|
||||
$this->assertSame( $pageBundle->contentmodel, $extensionData['contentmodel'] );
|
||||
}
|
||||
|
||||
public function provideParserOutputFromPageBundle() {
|
||||
|
|
@ -30,7 +31,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
[ 'ids' => '1.33' ],
|
||||
[ 'ids' => '1.33' ],
|
||||
'1.x',
|
||||
[ 'content-language' => 'abc' ]
|
||||
[ 'content-language' => 'abc' ],
|
||||
'testing'
|
||||
)
|
||||
];
|
||||
|
||||
|
|
@ -59,6 +61,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
$this->assertSame( $extensionData['version'] ?? null, $pageBundle->version );
|
||||
$this->assertSame( $extensionData['headers'] ?? [], $pageBundle->headers );
|
||||
$this->assertSame( $extensionData['headers']['content-language'], $pageBundle->headers['content-language'] );
|
||||
$this->assertSame( $extensionData['contentmodel'] ?? null, $pageBundle->contentmodel );
|
||||
}
|
||||
|
||||
public function providePageBundleFromParserOutput() {
|
||||
|
|
@ -69,7 +72,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
'parsoid' => [ 'ids' => '1.22' ],
|
||||
'mw' => [],
|
||||
'version' => '2.x',
|
||||
'headers' => [ 'content-language' => 'xyz' ]
|
||||
'headers' => [ 'content-language' => 'xyz' ],
|
||||
'testing'
|
||||
]
|
||||
)
|
||||
];
|
||||
|
|
|
|||
Loading…
Reference in a new issue