[Re-apply] ParsoidHandler: use HtmlOutputRendererHelper in wt2html
This restores change Ie430acd0753880d88370bb9f22bb40a0f9ded917.
This reverts commit ab6baad1a5.
NOTE: Also needs the patch the fixes the original reason for the
revert: Ief721c23ed9a57d781cfdac625a62113f22f87a5
Change-Id: Ic48db1b5fdff1dfd4f2d2643d64252e5fc721e79
This commit is contained in:
parent
ab6baad1a5
commit
5cb388455b
8 changed files with 330 additions and 165 deletions
|
|
@ -174,8 +174,6 @@ class HtmlOutputRendererHelper {
|
|||
* Flavors may influence parser options, parsoid options, and DOM transformations.
|
||||
* They will be reflected by the ETag returned by getETag().
|
||||
*
|
||||
* Flavors cannot be combined. For more fine-grained control, use setOption
|
||||
*
|
||||
* @param string $flavor
|
||||
*
|
||||
* @return void
|
||||
|
|
@ -577,14 +575,20 @@ class HtmlOutputRendererHelper {
|
|||
* Set the HTTP headers based on the response generated
|
||||
*
|
||||
* @param ResponseInterface $response
|
||||
* @param bool $setContentLanguageHeader
|
||||
* @param bool $forHtml Whether the response will be HTML (rather than JSON)
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function putHeaders( ResponseInterface $response, bool $setContentLanguageHeader ) {
|
||||
public function putHeaders( ResponseInterface $response, bool $forHtml = true ) {
|
||||
if ( $forHtml ) {
|
||||
// For HTML we want to set the Content-Language. For JSON, we probably don't.
|
||||
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
|
||||
|
||||
$pb = $this->getPageBundle();
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version );
|
||||
}
|
||||
|
||||
if ( $this->targetLanguageCode ) {
|
||||
if ( $setContentLanguageHeader ) {
|
||||
$response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage() );
|
||||
}
|
||||
$response->addHeader( 'Vary', 'Accept-Language' );
|
||||
}
|
||||
|
||||
|
|
@ -593,6 +597,12 @@ class HtmlOutputRendererHelper {
|
|||
if ( !$this->isCacheable ) {
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
}
|
||||
|
||||
// TODO: cache control for stable HTML? See ContentHelper::setCacheControl
|
||||
|
||||
if ( $this->getRevisionId() ) {
|
||||
$response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ class PageHTMLHandler extends SimpleHandler {
|
|||
switch ( $outputMode ) {
|
||||
case 'html':
|
||||
$response = $this->getResponseFactory()->create();
|
||||
$response->setHeader( 'Content-Type', 'text/html' );
|
||||
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
|
||||
$response->setBody( new StringStream( $parserOutputHtml ) );
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,11 @@ use ExtensionRegistry;
|
|||
use InvalidArgumentException;
|
||||
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
|
||||
use LogicException;
|
||||
use MediaWiki\Linker\LinkTarget;
|
||||
use MediaWiki\Logger\LoggerFactory;
|
||||
use MediaWiki\MainConfigNames;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Page\ExistingPageRecord;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Rest\Handler;
|
||||
use MediaWiki\Rest\HttpException;
|
||||
|
|
@ -37,7 +39,6 @@ use MediaWiki\Revision\MutableRevisionRecord;
|
|||
use MediaWiki\Revision\RevisionAccessException;
|
||||
use MediaWiki\Revision\SlotRecord;
|
||||
use MobileContext;
|
||||
use ParserOutput;
|
||||
use RequestContext;
|
||||
use Title;
|
||||
use WikiMap;
|
||||
|
|
@ -62,6 +63,7 @@ use WikitextContent;
|
|||
* Base class for Parsoid handlers.
|
||||
*/
|
||||
abstract class ParsoidHandler extends Handler {
|
||||
|
||||
// TODO logging, timeouts(?), CORS
|
||||
// TODO content negotiation (routes.js routes.acceptable)
|
||||
// TODO handle MaxConcurrentCallsError (pool counter?)
|
||||
|
|
@ -279,6 +281,70 @@ abstract class ParsoidHandler extends Handler {
|
|||
return $this->requestAttributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $attribs
|
||||
* @param ?string $source
|
||||
* @param PageConfig|PageIdentity $page
|
||||
*
|
||||
* @return HtmlOutputRendererHelper
|
||||
*/
|
||||
private function getHtmlOutputRendererHelper(
|
||||
array $attribs,
|
||||
?string $source,
|
||||
$page
|
||||
): HtmlOutputRendererHelper {
|
||||
$services = MediaWikiServices::getInstance();
|
||||
|
||||
// Support PageConfig for backwards compatibility.
|
||||
// We should leave it to lower level code to create it.
|
||||
if ( $page instanceof PageConfig ) {
|
||||
$title = $page->getTitle();
|
||||
$page = $services->getPageStore()->getPageByText( $title );
|
||||
|
||||
if ( !$page ) {
|
||||
throw new HttpException( "Bad title: $title", 400 );
|
||||
}
|
||||
}
|
||||
|
||||
$helper = new HtmlOutputRendererHelper(
|
||||
$services->getParsoidOutputStash(),
|
||||
$services->getStatsdDataFactory(),
|
||||
$services->getParsoidOutputAccess(),
|
||||
$services->getHtmlTransformFactory(),
|
||||
$services->getContentHandlerFactory(),
|
||||
$services->getLanguageFactory()
|
||||
);
|
||||
|
||||
$user = RequestContext::getMain()->getUser();
|
||||
|
||||
$params = [];
|
||||
$helper->init( $page, $params, $user );
|
||||
|
||||
// XXX: should default to the page's content model?
|
||||
$model = $attribs['opts']['contentmodel']
|
||||
?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT );
|
||||
|
||||
if ( $source !== null ) {
|
||||
$helper->setContentSource( $source, $model );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['envOptions']['outputContentVersion'] )
|
||||
&& $attribs['envOptions']['outputContentVersion'] !== Parsoid::defaultHTMLVersion()
|
||||
) {
|
||||
$helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['pagelanguage'] ) ) {
|
||||
$helper->setPageLanguage( $attribs['pagelanguage'] );
|
||||
}
|
||||
|
||||
if ( isset( $attribs['envOptions']['htmlVariantLanguage'] ) ) {
|
||||
$helper->setVariantConversionLanguage( $attribs['envOptions']['htmlVariantLanguage'] );
|
||||
}
|
||||
|
||||
return $helper;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $attribs
|
||||
* @param string $html
|
||||
|
|
@ -617,6 +683,43 @@ abstract class ParsoidHandler extends Handler {
|
|||
return '/v1/revision/{revision}/html';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param LinkTarget $redirectTarget
|
||||
* @param string $domain
|
||||
* @param string $format
|
||||
*
|
||||
* @throws ResponseException
|
||||
*/
|
||||
private function followWikiRedirect( $redirectTarget, $domain, $format ): void {
|
||||
$pageStore = MediaWikiServices::getInstance()->getPageStore();
|
||||
$titleFormatter = MediaWikiServices::getInstance()->getTitleFormatter();
|
||||
$redirectTarget = $pageStore->getPageForLink( $redirectTarget );
|
||||
|
||||
if ( $redirectTarget instanceof ExistingPageRecord ) {
|
||||
$pathParams = [
|
||||
'domain' => $domain,
|
||||
'format' => $format,
|
||||
'title' => $titleFormatter->getPrefixedDBkey( $redirectTarget ),
|
||||
'revision' => $redirectTarget->getLatest()
|
||||
];
|
||||
|
||||
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
|
||||
// so the below will fail unless the methods are overwritten.
|
||||
if ( $redirectTarget->exists() ) {
|
||||
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
} else {
|
||||
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
}
|
||||
throw new ResponseException(
|
||||
$this->createRedirectResponse(
|
||||
$redirectPath,
|
||||
$pathParams,
|
||||
$this->getRequest()->getQueryParams()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the current URL with the latest revision number and redirect there.
|
||||
*
|
||||
|
|
@ -654,6 +757,37 @@ abstract class ParsoidHandler extends Handler {
|
|||
return $this->createRedirectResponse( $newPath, $pathParams, $this->getRequest()->getQueryParams() );
|
||||
}
|
||||
|
||||
public function wtLint( PageConfig $pageConfig, array $attribs, ?string $wikitext = null ) {
|
||||
$envOptions = $attribs['envOptions'];
|
||||
$oldid = $attribs['oldid'];
|
||||
|
||||
if ( $wikitext === null && $oldid !== null ) {
|
||||
$envOptions['logLinterData'] = true;
|
||||
}
|
||||
|
||||
try {
|
||||
$parsoid = $this->newParsoid();
|
||||
return $parsoid->wikitext2lint( $pageConfig, $envOptions );
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
}
|
||||
}
|
||||
|
||||
private function allowParserCacheWrite() {
|
||||
$config = RequestContext::getMain()->getConfig();
|
||||
|
||||
// HACK: remove before the release of MW 1.40 / early 2023.
|
||||
if ( $config->has( 'TemporaryParsoidHandlerParserCacheWriteRatio' ) ) {
|
||||
// We need to be careful about ramping up the cache writes,
|
||||
// so we don't run out of disk space.
|
||||
return wfRandom() < $config->get( 'TemporaryParsoidHandlerParserCacheWriteRatio' );
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wikitext -> HTML helper.
|
||||
* Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
|
||||
|
|
@ -662,6 +796,7 @@ abstract class ParsoidHandler extends Handler {
|
|||
* @param array $attribs Request attributes from getRequestAttributes()
|
||||
* @param ?string $wikitext Wikitext to transform (or null to use the
|
||||
* page specified in the request attributes).
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
protected function wt2html(
|
||||
|
|
@ -672,75 +807,67 @@ abstract class ParsoidHandler extends Handler {
|
|||
$format = $opts['format'];
|
||||
$oldid = $attribs['oldid'];
|
||||
|
||||
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
|
||||
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
|
||||
$lints = $this->wtLint( $pageConfig, $attribs, $wikitext );
|
||||
$response = $this->getResponseFactory()->createJson( $lints );
|
||||
return $response;
|
||||
}
|
||||
|
||||
// Performance Timing options
|
||||
// init refers to time elapsed before parsing begins
|
||||
$metrics = $this->metrics;
|
||||
$timing = Timing::start( $metrics );
|
||||
|
||||
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
|
||||
'!=' . Parsoid::defaultHTMLVersion() ) ) {
|
||||
$metrics->increment( 'wt2html.parse.version.notdefault' );
|
||||
}
|
||||
$helper = $this->getHtmlOutputRendererHelper(
|
||||
$attribs,
|
||||
$wikitext,
|
||||
$pageConfig
|
||||
);
|
||||
|
||||
$parsoid = $this->newParsoid();
|
||||
if ( !$this->allowParserCacheWrite() ) {
|
||||
// NOTE: In theory, we want to always write to the parser cache. However,
|
||||
// the ParserCache takes a lot of disk space, and we need to have fine grained control
|
||||
// over when we write to it, so we can avoid running out of disc space.
|
||||
$helper->setUseParserCache( true, false );
|
||||
}
|
||||
|
||||
if (
|
||||
!empty( $this->parsoidSettings['devAPI'] ) &&
|
||||
( $request->getQueryParams()['follow_redirects'] ?? false )
|
||||
) {
|
||||
$content = $pageConfig->getRevisionContent();
|
||||
$revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
|
||||
$revision = $revisionStore->getRevisionById( $helper->getRevisionId() );
|
||||
$content = $revision->getContent( SlotRecord::MAIN );
|
||||
$redirectTarget = $content ? $content->getRedirectTarget() : null;
|
||||
if ( $redirectTarget ) {
|
||||
$redirectInfo = $this->dataAccess->getPageInfo(
|
||||
$pageConfig, [ $redirectTarget ]
|
||||
);
|
||||
$pathParams = [
|
||||
'domain' => $attribs['envOptions']['domain'],
|
||||
'format' => $format,
|
||||
'title' => $redirectTarget,
|
||||
'revision' => $redirectInfo['revId']
|
||||
];
|
||||
|
||||
// NOTE: Core doesn't have REST endpoints that return raw wikitext,
|
||||
// so the below will fail unless the methods are overwritten.
|
||||
if ( $redirectInfo['revId'] ) {
|
||||
$redirectPath = $this->getRevisionContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
} else {
|
||||
$redirectPath = $this->getPageContentEndpoint( ParsoidFormatHelper::FORMAT_WIKITEXT );
|
||||
}
|
||||
throw new ResponseException(
|
||||
$this->createRedirectResponse( $redirectPath, $pathParams, $request->getQueryParams() )
|
||||
$this->followWikiRedirect(
|
||||
$redirectTarget,
|
||||
$attribs['envOptions']['domain'],
|
||||
$format
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$reqOpts = $attribs['envOptions'] + [
|
||||
'pageBundle' => $needsPageBundle,
|
||||
'contentmodel' => $opts['contentmodel'] ?? null,
|
||||
];
|
||||
$needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );
|
||||
|
||||
if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
|
||||
'!=' . Parsoid::defaultHTMLVersion() ) ) {
|
||||
$metrics->increment( 'wt2html.parse.version.notdefault' );
|
||||
}
|
||||
|
||||
// VE, the only client using body_only property,
|
||||
// doesn't want section tags when this flag is set.
|
||||
// (T181226)
|
||||
if ( $attribs['body_only'] ) {
|
||||
$reqOpts['wrapSections'] = false;
|
||||
$reqOpts['body_only'] = true;
|
||||
$helper->setFlavor( 'fragment' );
|
||||
} elseif ( !$needsPageBundle ) {
|
||||
// Inline data-parsoid. This will happen when no special params are set.
|
||||
$helper->setFlavor( 'edit' );
|
||||
}
|
||||
|
||||
if ( $wikitext === null && $oldid !== null ) {
|
||||
$reqOpts['logLinterData'] = true;
|
||||
$mstr = 'pageWithOldid';
|
||||
} else {
|
||||
$mstr = 'wt';
|
||||
}
|
||||
|
||||
// XXX: Not necessary, since it's in the pageConfig
|
||||
// if ( isset( $attribs['pagelanguage'] ) ) {
|
||||
// $reqOpts['pagelanguage'] = $attribs['pagelanguage'];
|
||||
// }
|
||||
|
||||
$timing->end( "wt2html.$mstr.init" );
|
||||
$metrics->timing(
|
||||
"wt2html.$mstr.size.input",
|
||||
|
|
@ -748,107 +875,89 @@ abstract class ParsoidHandler extends Handler {
|
|||
);
|
||||
$parseTiming = Timing::start( $metrics );
|
||||
|
||||
if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
|
||||
try {
|
||||
$lints = $parsoid->wikitext2lint( $pageConfig, $reqOpts );
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
}
|
||||
$response = $this->getResponseFactory()->createJson( $lints );
|
||||
if ( $needsPageBundle ) {
|
||||
$pb = $helper->getPageBundle();
|
||||
|
||||
$response = $this->getResponseFactory()->createJson( $pb->responseData() );
|
||||
$helper->putHeaders( $response, false );
|
||||
|
||||
ParsoidFormatHelper::setContentType(
|
||||
$response,
|
||||
ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
|
||||
$pb->version
|
||||
);
|
||||
} else {
|
||||
$parserOutput = new ParserOutput();
|
||||
try {
|
||||
$out = $parsoid->wikitext2html(
|
||||
$pageConfig, $reqOpts, $headers, $parserOutput
|
||||
);
|
||||
} catch ( ClientError $e ) {
|
||||
throw new HttpException( $e->getMessage(), 400 );
|
||||
} catch ( ResourceLimitExceededException $e ) {
|
||||
throw new HttpException( $e->getMessage(), 413 );
|
||||
$out = $helper->getHtml();
|
||||
|
||||
$response = $this->getResponseFactory()->create();
|
||||
$response->getBody()->write( $out->getRawText() );
|
||||
|
||||
$helper->putHeaders( $response, true );
|
||||
|
||||
// XXX: Since we don't enable stashing, the ETag is not really useful.
|
||||
$eTag = $helper->getETag();
|
||||
if ( $eTag ) {
|
||||
$response->setHeader( 'ETag', $eTag );
|
||||
}
|
||||
if ( $needsPageBundle ) {
|
||||
$response = $this->getResponseFactory()->createJson( $out->responseData() );
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
|
||||
$out->version );
|
||||
} else {
|
||||
$response = $this->getResponseFactory()->create();
|
||||
ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML,
|
||||
$attribs['envOptions']['outputContentVersion'] );
|
||||
$response->getBody()->write( $out );
|
||||
// @phan-suppress-next-next-line PhanTypeArraySuspiciousNullable $headers can't be null after the
|
||||
// method call, but the docblock of wikitext2html doesn't say that.
|
||||
$response->setHeader( 'Content-Language', $headers['content-language'] );
|
||||
// @phan-suppress-next-line PhanTypeArraySuspiciousNullable Same.
|
||||
$response->addHeader( 'Vary', $headers['vary'] );
|
||||
}
|
||||
|
||||
// XXX: For pagebundle requests, this can be somewhat inflated
|
||||
// because of pagebundle json-encoding overheads
|
||||
$outSize = $response->getBody()->getSize();
|
||||
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
|
||||
$timing->end( 'wt2html.total' );
|
||||
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
|
||||
|
||||
// Ignore slow parse metrics for non-oldid parses
|
||||
if ( $mstr === 'pageWithOldid' ) {
|
||||
if ( $parseTime > 3000 ) {
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
|
||||
// NOTE: We used to generate an ETag here, but since it was random every time and the
|
||||
// output wasn't stored anywhere, it could not possibly match anything, ever.
|
||||
if ( $parseTime > 10 && $outSize > 100 ) {
|
||||
// * Don't bother with this metric for really small parse times
|
||||
// p99 for initialization time is ~7ms according to grafana.
|
||||
// So, 10ms ensures that startup overheads don't skew the metrics
|
||||
// * For body_only=false requests, <head> section isn't generated
|
||||
// and if the output is small, per-request overheads can skew
|
||||
// the timePerKB metrics.
|
||||
|
||||
// FIXME: For pagebundle requests, this can be somewhat inflated
|
||||
// because of pagebundle json-encoding overheads
|
||||
$outSize = $response->getBody()->getSize();
|
||||
$parseTime = $parseTiming->end( "wt2html.$mstr.parse" );
|
||||
$timing->end( 'wt2html.total' );
|
||||
$metrics->timing( "wt2html.$mstr.size.output", $outSize );
|
||||
// NOTE: This is slightly misleading since there are fixed costs
|
||||
// for generating output like the <head> section and should be factored in,
|
||||
// but this is good enough for now as a useful first degree of approxmation.
|
||||
$timePerKB = $parseTime * 1024 / $outSize;
|
||||
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
|
||||
|
||||
// Ignore slow parse metrics for non-oldid parses
|
||||
if ( $mstr === 'pageWithOldid' ) {
|
||||
if ( $parseTime > 3000 ) {
|
||||
if ( $timePerKB > 500 ) {
|
||||
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
|
||||
// So, we probably want to shoot for a threshold under 100ms.
|
||||
// But, let's start with 500ms+ outliers first and see what we uncover.
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'timePerKB' => number_format( $timePerKB, 1 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
|
||||
if ( $parseTime > 10 && $outSize > 100 ) {
|
||||
// * Don't bother with this metric for really small parse times
|
||||
// p99 for initialization time is ~7ms according to grafana.
|
||||
// So, 10ms ensures that startup overheads don't skew the metrics
|
||||
// * For body_only=false requests, <head> section isn't generated
|
||||
// and if the output is small, per-request overheads can skew
|
||||
// the timePerKB metrics.
|
||||
|
||||
// FIXME: This is slightly misleading since there are fixed costs
|
||||
// for generating output like the <head> section and should be factored in,
|
||||
// but this is good enough for now as a useful first degree of approxmation.
|
||||
$timePerKB = $parseTime * 1024 / $outSize;
|
||||
$metrics->timing( 'wt2html.timePerKB', $timePerKB );
|
||||
|
||||
if ( $timePerKB > 500 ) {
|
||||
// At 100ms/KB, even a 100KB page which isn't that large will take 10s.
|
||||
// So, we probably want to shoot for a threshold under 100ms.
|
||||
// But, let's start with 500ms+ outliers first and see what we uncover.
|
||||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
|
||||
'time' => number_format( $parseTime / 1000, 2 ),
|
||||
'timePerKB' => number_format( $timePerKB, 1 ),
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( $wikitext !== null ) {
|
||||
// Don't cache requests when wt is set in case somebody uses
|
||||
// GET for wikitext parsing
|
||||
// XXX: can we just refuse to do wikitext parsing in a GET request?
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
} elseif ( $oldid !== null ) {
|
||||
// FIXME this should be handled in core (cf OutputPage::sendCacheControl)
|
||||
// XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content.
|
||||
if ( $request->getHeaderLine( 'Cookie' ) ||
|
||||
$request->getHeaderLine( 'Authorization' ) ) {
|
||||
// Don't cache requests with a session.
|
||||
$response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
|
||||
}
|
||||
// Indicate the MediaWiki revision in a header as well for
|
||||
// ease of extraction in clients.
|
||||
$response->setHeader( 'Content-Revision-Id', $oldid );
|
||||
} else {
|
||||
throw new LogicException( 'Should be unreachable' );
|
||||
}
|
||||
return $response;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,8 @@ final class PageBundleParserOutputConverter {
|
|||
'parsoid' => $pageBundle->parsoid,
|
||||
'mw' => $pageBundle->mw,
|
||||
'version' => $pageBundle->version,
|
||||
'headers' => $pageBundle->headers
|
||||
'headers' => $pageBundle->headers,
|
||||
'contentmodel' => $pageBundle->contentmodel,
|
||||
]
|
||||
);
|
||||
|
||||
|
|
@ -66,7 +67,8 @@ final class PageBundleParserOutputConverter {
|
|||
$pageBundleData['parsoid'] ?? [],
|
||||
$pageBundleData['mw'] ?? [],
|
||||
$pageBundleData['version'] ?? null,
|
||||
$pageBundleData['headers'] ?? []
|
||||
$pageBundleData['headers'] ?? [],
|
||||
$pageBundleData['contentmodel'] ?? null
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ use Config;
|
|||
use HashConfig;
|
||||
use IBufferingStatsdDataFactory;
|
||||
use InvalidArgumentException;
|
||||
use Language;
|
||||
use Liuggio\StatsdClient\Factory\StatsdDataFactory;
|
||||
use MediaWiki\Config\ServiceOptions;
|
||||
use MediaWiki\Logger\LoggerFactory;
|
||||
|
|
@ -43,6 +42,7 @@ use ParserOptions;
|
|||
use ParserOutput;
|
||||
use Status;
|
||||
use UnexpectedValueException;
|
||||
use Wikimedia\Parsoid\Config\PageConfig;
|
||||
use Wikimedia\Parsoid\Config\SiteConfig;
|
||||
use Wikimedia\Parsoid\Core\ClientError;
|
||||
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
|
||||
|
|
@ -238,41 +238,28 @@ class ParsoidOutputAccess {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param PageIdentity $page
|
||||
* @param array $envOptions
|
||||
* @param ?RevisionRecord $revision
|
||||
* @param Language|null $languageOverride
|
||||
* @param PageConfig $pageConfig
|
||||
* @param array $parsoidOptions
|
||||
*
|
||||
* @return Status
|
||||
*/
|
||||
private function parseInternal(
|
||||
PageIdentity $page,
|
||||
array $envOptions,
|
||||
?RevisionRecord $revision = null,
|
||||
Language $languageOverride = null
|
||||
PageConfig $pageConfig,
|
||||
array $parsoidOptions
|
||||
): Status {
|
||||
$defaultOptions = [
|
||||
'pageBundle' => true,
|
||||
'prefix' => $this->parsoidWikiId,
|
||||
'pageName' => $page,
|
||||
'htmlVariantLanguage' => $languageOverride ? $languageOverride->getCode() : null,
|
||||
'pageName' => $pageConfig->getTitle(),
|
||||
'htmlVariantLanguage' => $pageConfig->getPageLanguage(),
|
||||
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
|
||||
];
|
||||
|
||||
try {
|
||||
$langCode = $languageOverride ? $languageOverride->getCode() : null;
|
||||
$pageConfig = $this->parsoidPageConfigFactory->create(
|
||||
$page,
|
||||
null,
|
||||
$revision,
|
||||
null,
|
||||
$langCode,
|
||||
$this->options->get( MainConfigNames::ParsoidSettings )
|
||||
);
|
||||
$startTime = microtime( true );
|
||||
$pageBundle = $this->parsoid->wikitext2html(
|
||||
$pageConfig,
|
||||
$envOptions + $defaultOptions
|
||||
$parsoidOptions + $defaultOptions
|
||||
);
|
||||
|
||||
$parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle );
|
||||
|
|
@ -281,7 +268,7 @@ class ParsoidOutputAccess {
|
|||
LoggerFactory::getInstance( 'slow-parsoid' )
|
||||
->info( 'Parsing {title} was slow, took {time} seconds', [
|
||||
'time' => number_format( $time, 2 ),
|
||||
'title' => (string)$page,
|
||||
'title' => $pageConfig->getTitle(),
|
||||
] );
|
||||
}
|
||||
return Status::newGood( $parserOutput );
|
||||
|
|
@ -381,7 +368,7 @@ class ParsoidOutputAccess {
|
|||
/**
|
||||
* @param PageIdentity $page
|
||||
* @param ParserOptions $parserOpts
|
||||
* @param array $envOptions
|
||||
* @param array $parsoidOptions
|
||||
* @param RevisionRecord|int|null $revision
|
||||
*
|
||||
* @return Status
|
||||
|
|
@ -389,7 +376,7 @@ class ParsoidOutputAccess {
|
|||
public function parse(
|
||||
PageIdentity $page,
|
||||
ParserOptions $parserOpts,
|
||||
array $envOptions,
|
||||
array $parsoidOptions,
|
||||
$revision
|
||||
): Status {
|
||||
// NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
|
||||
|
|
@ -398,9 +385,18 @@ class ParsoidOutputAccess {
|
|||
[ $page, $revision ] = $this->resolveRevision( $page, $revision );
|
||||
}
|
||||
|
||||
$revId = $revision ? $revision->getId() : $page->getId();
|
||||
$languageOverride = $parserOpts->getTargetLanguage();
|
||||
$langCode = $languageOverride ? $languageOverride->getCode() : null;
|
||||
$pageConfig = $this->parsoidPageConfigFactory->create(
|
||||
$page,
|
||||
null,
|
||||
$revision,
|
||||
null,
|
||||
$langCode,
|
||||
$this->options->get( MainConfigNames::ParsoidSettings )
|
||||
);
|
||||
|
||||
$status = $this->parseInternal( $page, $envOptions, $revision, $parserOpts->getTargetLanguage() );
|
||||
$status = $this->parseInternal( $pageConfig, $parsoidOptions );
|
||||
|
||||
if ( !$status->isOK() ) {
|
||||
return $status;
|
||||
|
|
@ -411,6 +407,7 @@ class ParsoidOutputAccess {
|
|||
// TODO: when we make tighter integration with Parsoid, render ID should become
|
||||
// a standard ParserOutput property. Nothing else needs it now, so don't generate
|
||||
// it in ParserCache just yet.
|
||||
$revId = $revision->getId();
|
||||
$parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
|
||||
$parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
|
||||
|
||||
|
|
|
|||
|
|
@ -968,6 +968,8 @@ describe( '/transform/ endpoint', function () {
|
|||
} );
|
||||
|
||||
describe( 'Variant conversion', function () {
|
||||
// NOTE: Continue to accept sr-el for a while, to remain compatible with older versions of the parsoid lib.
|
||||
const expectedCodeSerbianLatin = /sr-el|sr-Latn/;
|
||||
|
||||
it( 'should perform variant conversion for transform given pagelanguage in HTTP header (html)', function ( done ) {
|
||||
client.req
|
||||
|
|
@ -977,7 +979,7 @@ describe( '/transform/ endpoint', function () {
|
|||
.send( {
|
||||
wikitext: 'абвг abcd x'
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1003,7 +1005,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.equal( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
@ -1021,7 +1023,7 @@ describe( '/transform/ endpoint', function () {
|
|||
body: 'абвг abcd x'
|
||||
}
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1051,7 +1053,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
@ -1068,7 +1070,7 @@ describe( '/transform/ endpoint', function () {
|
|||
body: 'абвг abcd x'
|
||||
}
|
||||
} )
|
||||
.expect( 'Content-Language', 'sr-el' )
|
||||
.expect( 'Content-Language', expectedCodeSerbianLatin )
|
||||
.expect( 'Vary', /\bAccept-Language\b/i )
|
||||
.expect( validHtmlResponse( ( doc ) => {
|
||||
doc.body.textContent.should.equal( 'abvg abcd x' );
|
||||
|
|
@ -1094,7 +1096,7 @@ describe( '/transform/ endpoint', function () {
|
|||
const headers = res.body.html.headers;
|
||||
headers.should.have.property( 'content-language' );
|
||||
headers.should.have.property( 'vary' );
|
||||
headers[ 'content-language' ].should.equal( 'sr-el' );
|
||||
headers[ 'content-language' ].should.match( expectedCodeSerbianLatin );
|
||||
headers.vary.should.match( /\bAccept-Language\b/i );
|
||||
} )
|
||||
.end( done );
|
||||
|
|
|
|||
|
|
@ -7,9 +7,11 @@ use Generator;
|
|||
use MediaWiki\MainConfigNames;
|
||||
use MediaWiki\MainConfigSchema;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
use MediaWiki\Parser\ParserCacheFactory;
|
||||
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
||||
use MediaWiki\Parser\Parsoid\HtmlToContentTransform;
|
||||
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
|
||||
use MediaWiki\Parser\RevisionOutputCache;
|
||||
use MediaWiki\Permissions\UltimateAuthority;
|
||||
use MediaWiki\Rest\Handler\HtmlInputTransformHelper;
|
||||
use MediaWiki\Rest\Handler\ParsoidFormatHelper;
|
||||
|
|
@ -26,6 +28,7 @@ use MediaWiki\Tests\Rest\RestTestTrait;
|
|||
use MediaWiki\User\UserIdentityValue;
|
||||
use MediaWikiIntegrationTestCase;
|
||||
use NullStatsdDataFactory;
|
||||
use ParserCache;
|
||||
use PHPUnit\Framework\MockObject\MockObject;
|
||||
use TitleValue;
|
||||
use Wikimedia\Message\ITextFormatter;
|
||||
|
|
@ -248,12 +251,12 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
}
|
||||
|
||||
public function wt2html(
|
||||
PageConfig $pageConfig,
|
||||
PageConfig $pageConfigConfig,
|
||||
array $attribs,
|
||||
?string $wikitext = null
|
||||
) {
|
||||
return parent::wt2html(
|
||||
$pageConfig,
|
||||
$pageConfigConfig,
|
||||
$attribs,
|
||||
$wikitext
|
||||
);
|
||||
|
|
@ -1910,6 +1913,8 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
array $unexpectedHtml,
|
||||
array $expectedHeaders = []
|
||||
) {
|
||||
// $this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
|
||||
|
||||
$hmtlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0';
|
||||
$expectedHeaders += [
|
||||
'content-type' => "text/x-wiki; charset=utf-8; profile=\"$hmtlProfileUri\"",
|
||||
|
|
@ -1949,6 +1954,43 @@ class ParsoidHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public function testWt2html_ParserCache() {
|
||||
$page = $this->getExistingTestPage();
|
||||
$pageConfig = $this->getPageConfig( $page );
|
||||
|
||||
$parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get' ] );
|
||||
|
||||
// This is the critical assertion in this test case: the save() method should
|
||||
// be called exactly once!
|
||||
$parserCache->expects( $this->once() )->method( 'save' );
|
||||
$parserCache->method( 'get' )->willReturn( false );
|
||||
|
||||
$parserCacheFactory = $this->createNoOpMock(
|
||||
ParserCacheFactory::class,
|
||||
[ 'getParserCache', 'getRevisionOutputCache' ]
|
||||
);
|
||||
$parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
|
||||
$parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn(
|
||||
$this->createNoOpMock( RevisionOutputCache::class )
|
||||
);
|
||||
|
||||
$this->setService( 'ParserCacheFactory', $parserCacheFactory );
|
||||
|
||||
$attribs = self::DEFAULT_ATTRIBS;
|
||||
$attribs['opts']['from'] = 'wikitext';
|
||||
$attribs['opts']['format'] = 'html';
|
||||
|
||||
$handler = $this->newParsoidHandler();
|
||||
|
||||
// This should trigger a parser cache write, because we didn't set a write-ratio
|
||||
$handler->wt2html( $pageConfig, $attribs );
|
||||
|
||||
$this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
|
||||
|
||||
// This should not trigger a parser cache write, because we set the write-ration to 0
|
||||
$handler->wt2html( $pageConfig, $attribs );
|
||||
}
|
||||
|
||||
// TODO: test wt2html failure modes
|
||||
// TODO: test redlinks
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
$this->assertSame( $pageBundle->headers, $extensionData['headers'] );
|
||||
$this->assertSame( $pageBundle->headers['content-language'], $extensionData['headers']['content-language'] );
|
||||
$this->assertSame( $pageBundle->version, $extensionData['version'] );
|
||||
$this->assertSame( $pageBundle->contentmodel, $extensionData['contentmodel'] );
|
||||
}
|
||||
|
||||
public function provideParserOutputFromPageBundle() {
|
||||
|
|
@ -30,7 +31,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
[ 'ids' => '1.33' ],
|
||||
[ 'ids' => '1.33' ],
|
||||
'1.x',
|
||||
[ 'content-language' => 'abc' ]
|
||||
[ 'content-language' => 'abc' ],
|
||||
'testing'
|
||||
)
|
||||
];
|
||||
|
||||
|
|
@ -59,6 +61,7 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
$this->assertSame( $extensionData['version'] ?? null, $pageBundle->version );
|
||||
$this->assertSame( $extensionData['headers'] ?? [], $pageBundle->headers );
|
||||
$this->assertSame( $extensionData['headers']['content-language'], $pageBundle->headers['content-language'] );
|
||||
$this->assertSame( $extensionData['contentmodel'] ?? null, $pageBundle->contentmodel );
|
||||
}
|
||||
|
||||
public function providePageBundleFromParserOutput() {
|
||||
|
|
@ -69,7 +72,8 @@ class PageBundleParserOutputConverterTest extends MediaWikiUnitTestCase {
|
|||
'parsoid' => [ 'ids' => '1.22' ],
|
||||
'mw' => [],
|
||||
'version' => '2.x',
|
||||
'headers' => [ 'content-language' => 'xyz' ]
|
||||
'headers' => [ 'content-language' => 'xyz' ],
|
||||
'testing'
|
||||
]
|
||||
)
|
||||
];
|
||||
|
|
|
|||
Loading…
Reference in a new issue