wiki.techinc.nl/includes/Rest/Handler/Helper/HtmlInputTransformHelper.php
Máté Szabó c4e9f987f1 rest: Return a 400 for invalid render IDs
Why:

- The REST API takes an optional renderid param when converting HTML
  back to source wikitext, which is user-provided and may be invalid.
- Invalid render IDs cause an InvalidArgumentException to be thrown that
  causes a 500 response.

What:

- Introduce a new error message for invalid render IDs in the REST API.
- Return a 400 with this new error message for HTML reverse-parses with
  an invalid render ID.

Bug: T385568
Change-Id: I062419fe8952329a39781a49cdca2e94c3996447
(cherry picked from commit cd1d42a5066e4bcb9b9d4ed9b4f7714fd428fea3)
2025-02-04 14:54:50 +00:00

819 lines
26 KiB
PHP

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Rest\Handler\Helper;
use InvalidArgumentException;
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use MediaWiki\Content\Content;
use MediaWiki\Edit\ParsoidOutputStash;
use MediaWiki\Edit\ParsoidRenderID;
use MediaWiki\Edit\SelserContext;
use MediaWiki\Language\LanguageCode;
use MediaWiki\MainConfigNames;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageLookup;
use MediaWiki\Page\PageRecord;
use MediaWiki\Page\ParserOutputAccess;
use MediaWiki\Parser\ParserOptions;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Parsoid\HtmlToContentTransform;
use MediaWiki\Parser\Parsoid\HtmlTransformFactory;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\HttpException;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\ResponseInterface;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Status\Status;
use MWUnknownContentModelException;
use Wikimedia\Bcp47Code\Bcp47Code;
use Wikimedia\Message\MessageValue;
use Wikimedia\ParamValidator\ParamValidator;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\Stats\StatsFactory;
/**
* REST helper for converting HTML to page content source (e.g. wikitext).
*
* @since 1.40
*
* @unstable Pending consolidation of the Parsoid extension with core code.
*/
class HtmlInputTransformHelper {
/**
* @internal
*/
public const CONSTRUCTOR_OPTIONS = [
MainConfigNames::ParsoidCacheConfig
];
/** @var PageIdentity|null */
private $page = null;
/**
* @var HtmlToContentTransform
*/
private $transform;
/**
* @var array
*/
private $envOptions;
private StatsFactory $statsFactory;
private HtmlTransformFactory $htmlTransformFactory;
private ParsoidOutputStash $parsoidOutputStash;
private ParserOutputAccess $parserOutputAccess;
private PageLookup $pageLookup;
private RevisionLookup $revisionLookup;
/**
* @param StatsFactory $statsFactory
* @param HtmlTransformFactory $htmlTransformFactory
* @param ParsoidOutputStash $parsoidOutputStash
* @param ParserOutputAccess $parserOutputAccess
* @param PageLookup $pageLookup
* @param RevisionLookup $revisionLookup
* @param array $envOptions
* @param ?PageIdentity $page
* @param array|string $body Body structure, or an HTML string
* @param array $parameters
* @param RevisionRecord|null $originalRevision
* @param Bcp47Code|null $pageLanguage
*/
public function __construct(
StatsFactory $statsFactory,
HtmlTransformFactory $htmlTransformFactory,
ParsoidOutputStash $parsoidOutputStash,
ParserOutputAccess $parserOutputAccess,
PageLookup $pageLookup,
RevisionLookup $revisionLookup,
array $envOptions = [],
?PageIdentity $page = null,
$body = '',
array $parameters = [],
?RevisionRecord $originalRevision = null,
?Bcp47Code $pageLanguage = null
) {
$this->statsFactory = $statsFactory;
$this->htmlTransformFactory = $htmlTransformFactory;
$this->parsoidOutputStash = $parsoidOutputStash;
$this->envOptions = $envOptions + [
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
'offsetType' => 'byte',
];
$this->parserOutputAccess = $parserOutputAccess;
$this->pageLookup = $pageLookup;
$this->revisionLookup = $revisionLookup;
if ( $page === null ) {
wfDeprecated( __METHOD__ . ' without $page', '1.43' );
} else {
$this->initInternal( $page, $body, $parameters, $originalRevision, $pageLanguage );
}
}
/**
* @return array
*/
public function getParamSettings(): array {
// JSON body schema:
/*
doc:
properties:
headers:
type: array
items:
type: string
body:
type: [ string, object ]
required: [ body ]
body:
properties:
offsetType:
type: string
revid:
type: integer
renderid:
type: string
etag:
type: string
html:
type: [ doc, string ]
data-mw:
type: doc
original:
properties:
html:
type: doc
source:
type: doc
data-mw:
type: doc
data-parsoid:
type: doc
required: [ html ]
*/
// FUTURE: more params
// - slot (for loading the base content)
return [
// XXX: should we really declare this here? Or should end endpoint do this?
// We are not reading this property...
'title' => [
Handler::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_DEFAULT => '',
ParamValidator::PARAM_REQUIRED => false,
],
// XXX: Needed for compatibility with the parsoid transform endpoint.
// But revid should just be part of the info about the original data
// in the body.
'oldid' => [
Handler::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'int',
ParamValidator::PARAM_DEFAULT => 0,
ParamValidator::PARAM_REQUIRED => false,
],
// XXX: Supported for compatibility with the parsoid transform endpoint.
// If given, it should be 'html' or 'pagebundle'.
'from' => [
Handler::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_DEFAULT => '',
ParamValidator::PARAM_REQUIRED => false,
],
// XXX: Supported for compatibility with the parsoid transform endpoint.
// Ignored.
'format' => [
Handler::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_DEFAULT => '',
ParamValidator::PARAM_REQUIRED => false,
],
'contentmodel' => [ // XXX: get this from the Accept header?
Handler::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_DEFAULT => '',
ParamValidator::PARAM_REQUIRED => false,
],
'language' => [ // TODO: get this from Accept-Language header?!
Handler::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_DEFAULT => '',
ParamValidator::PARAM_REQUIRED => false,
]
];
}
/**
* Modify body and parameters to provide compatibility with legacy endpoints.
*
* @see ParsoidHandler::getRequestAttributes
*
* @param array<string,mixed> &$body
* @param array<string,mixed> &$parameters
*
* @throws HttpException
*
* @return void
*/
private static function normalizeParameters( array &$body, array &$parameters ) {
// If the revision ID is given in the path, pretend it was given in the body.
if ( isset( $parameters['oldid'] ) && (int)$parameters['oldid'] > 0 ) {
$body['original']['revid'] = (int)$parameters['oldid'];
}
// If an etag is given in the body, use it as the render ID.
// Note that we support ETag format in the renderid field.
if ( !empty( $body['original']['etag'] ) ) {
// @phan-suppress-next-line PhanTypeInvalidDimOffset false positive
$body['original']['renderid'] = $body['original']['etag'];
}
// Accept 'wikitext' as an alias for 'source'.
if ( isset( $body['original']['wikitext'] ) ) {
// @phan-suppress-next-line PhanTypeInvalidDimOffset false positive
$body['original']['source'] = $body['original']['wikitext'];
unset( $body['original']['wikitext'] );
}
// If 'from' is not set, we accept page bundle style input as well as full HTML.
// If 'from' is set, we only accept page bundle style input if it is set to FORMAT_PAGEBUNDLE.
if (
isset( $parameters['from'] ) && $parameters['from'] !== '' &&
$parameters['from'] !== ParsoidFormatHelper::FORMAT_PAGEBUNDLE
) {
unset( $body['original']['data-parsoid']['body'] );
unset( $body['original']['data-mw']['body'] );
unset( $body['data-mw']['body'] );
}
// If 'from' is given, it must be html or pagebundle.
if (
isset( $parameters['from'] ) && $parameters['from'] !== '' &&
$parameters['from'] !== ParsoidFormatHelper::FORMAT_HTML &&
$parameters['from'] !== ParsoidFormatHelper::FORMAT_PAGEBUNDLE
) {
throw new LocalizedHttpException(
new MessageValue( "rest-unsupported-transform-input", [ $parameters['from'] ] ), 400
);
}
if ( isset( $body['contentmodel'] ) && $body['contentmodel'] !== '' ) {
$parameters['contentmodel'] = $body['contentmodel'];
} elseif ( isset( $parameters['format'] ) && $parameters['format'] !== '' ) {
$parameters['contentmodel'] = $parameters['format'];
}
}
/**
* @param PageIdentity $page
* @param array|string $body Body structure, or an HTML string
* @param array $parameters
* @param RevisionRecord|null $originalRevision
* @param Bcp47Code|null $pageLanguage
*
* @throws HttpException
* @deprecated since 1.43; pass arguments to constructor instead
*/
public function init(
PageIdentity $page,
$body,
array $parameters,
?RevisionRecord $originalRevision = null,
?Bcp47Code $pageLanguage = null
) {
wfDeprecated( __METHOD__, '1.43' );
$this->initInternal( $page, $body, $parameters, $originalRevision, $pageLanguage );
}
/**
* @param PageIdentity $page
* @param array|string $body Body structure, or an HTML string
* @param array $parameters
* @param RevisionRecord|null $originalRevision
* @param Bcp47Code|null $pageLanguage
*
* @throws HttpException
*/
private function initInternal(
PageIdentity $page,
$body,
array $parameters,
?RevisionRecord $originalRevision = null,
?Bcp47Code $pageLanguage = null
) {
if ( is_string( $body ) ) {
$body = [ 'html' => $body ];
}
self::normalizeParameters( $body, $parameters );
$this->page = $page;
if ( !isset( $body['html'] ) ) {
throw new LocalizedHttpException( new MessageValue( "rest-missing-body-field", [ 'html' ] ) );
}
$html = is_array( $body['html'] ) ? $body['html']['body'] : $body['html'];
// TODO: validate $body against a proper schema.
$this->transform = $this->htmlTransformFactory->getHtmlToContentTransform(
$html,
$this->page
);
$this->transform->setMetrics( $this->statsFactory );
// NOTE: Env::getContentModel will fall back to the page's recorded content model
// if none is set here.
$this->transform->setOptions( [
'contentmodel' => $parameters['contentmodel'] ?? null,
'offsetType' => $body['offsetType'] ?? $this->envOptions['offsetType'],
] );
$original = $body['original'] ?? [];
$originalRendering = null;
if ( !isset( $original['html'] ) && !empty( $original['renderid'] ) ) {
$key = $original['renderid'];
if ( preg_match( '!^(W/)?".*"$!', $key ) ) {
$originalRendering = ParsoidRenderID::newFromETag( $key );
if ( !$originalRendering ) {
throw new LocalizedHttpException( new MessageValue( "rest-bad-etag", [ $key ] ), 400 );
}
} else {
try {
$originalRendering = ParsoidRenderID::newFromKey( $key );
} catch ( InvalidArgumentException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-parsoid-bad-render-id', [ $key ] ),
400
);
}
}
} elseif ( !empty( $original['html'] ) || !empty( $original['data-parsoid'] ) ) {
// NOTE: We might have an incomplete PageBundle here, with no HTML but with data-parsoid!
// XXX: Do we need to support that, or can that just be a 400?
$originalRendering = new PageBundle(
$original['html']['body'] ?? '',
$original['data-parsoid']['body'] ?? null,
$original['data-mw']['body'] ?? null,
null, // will be derived from $original['html']['headers']['content-type']
$original['html']['headers'] ?? []
);
}
if ( !$originalRevision && !empty( $original['revid'] ) ) {
$originalRevision = (int)$original['revid'];
}
if ( $originalRevision || $originalRendering ) {
$this->setOriginal( $originalRevision, $originalRendering );
} else {
if ( $this->page->exists() ) {
$this->statsFactory
->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'false' )
->setLabel( 'page_exists', 'true' )
->setLabel( 'status', 'unknown' )
->copyToStatsdAt( 'html_input_transform.original_html.not_given.page_exists' )
->increment();
} else {
$this->statsFactory
->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'false' )
->setLabel( 'page_exists', 'false' )
->setLabel( 'status', 'unknown' )
->copyToStatsdAt( 'html_input_transform.original_html.not_given.page_not_exist' )
->increment();
}
}
if ( isset( $body['data-mw']['body'] ) ) {
$this->transform->setModifiedDataMW( $body['data-mw']['body'] );
}
if ( $pageLanguage ) {
$this->transform->setContentLanguage( $pageLanguage );
} elseif ( isset( $parameters['language'] ) && $parameters['language'] !== '' ) {
$pageLanguage = LanguageCode::normalizeNonstandardCodeAndWarn(
$parameters['language']
);
$this->transform->setContentLanguage( $pageLanguage );
}
if ( isset( $original['source']['body'] ) ) {
// XXX: do we really have to support wikitext overrides?
$this->transform->setOriginalText( $original['source']['body'] );
}
}
/**
* Return HTMLTransform object, so additional context can be provided by calling setters on it.
* @return HtmlToContentTransform
*/
public function getTransform(): HtmlToContentTransform {
return $this->transform;
}
/**
* Set metrics sink.
*
* @note Passing a StatsdDataFactoryInterface here has been deprecated
* since 1.43.
*
* @param StatsFactory|StatsdDataFactoryInterface $statsFactory
*/
public function setMetrics( $statsFactory ) {
if ( $statsFactory instanceof StatsdDataFactoryInterface ) {
// Uncomment this once all WMF code has been transitioned, but
// leave it in for the 1.43 release.
wfDeprecated( __METHOD__ . ' with StatsdDataFactoryInterface', '1.43' );
return;
}
$this->statsFactory = $statsFactory;
if ( $this->transform ) {
$this->transform->setMetrics( $statsFactory );
}
}
/**
* Supply information about the revision and rendering that was the original basis of
* the input HTML. This is used to apply selective serialization (selser), if possible.
*
* @param RevisionRecord|int|null $rev
* @param ParsoidRenderID|PageBundle|ParserOutput|null $originalRendering
*/
public function setOriginal( $rev, $originalRendering ) {
if ( $originalRendering instanceof ParsoidRenderID ) {
$renderId = $originalRendering;
// If the client asked for a render ID, load original data from stash
try {
$selserContext = $this->fetchSelserContextFromStash( $renderId );
} catch ( InvalidArgumentException $ex ) {
$this->statsFactory
->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'as_renderid' )
->setLabel( 'page_exists', 'unknown' )
->setLabel( 'status', 'bad_renderid' )
->copyToStatsdAt( 'html_input_transform.original_html.given.as_renderid.bad' )
->increment();
throw new LocalizedHttpException( new MessageValue( "rest-bad-stash-key" ),
400,
[
'reason' => $ex->getMessage(),
'key' => "$renderId"
]
);
}
if ( !$selserContext ) {
// NOTE: When the client asked for a specific stash key (resp. etag),
// we should fail with a 412 if we don't have the specific rendering.
// On the other hand, of the client only provided a base revision ID,
// we can re-parse and hope for the best.
throw new LocalizedHttpException(
new MessageValue( "rest-no-stashed-content", [ $renderId->getKey() ] ), 412
);
// TODO: This class should provide getETag and getLastModified methods for use by
// the REST endpoint, to provide proper support for conditionals.
// However, that requires some refactoring of how HTTP conditional checks
// work in the Handler base class.
}
if ( !$rev ) {
$rev = $renderId->getRevisionID();
}
$originalRendering = $selserContext->getPageBundle();
$content = $selserContext->getContent();
if ( $content ) {
$this->transform->setOriginalContent( $content );
}
} elseif ( !$originalRendering && $rev ) {
// The client provided a revision ID, but not stash key.
// Try to get a rendering for the given revision, and use it as the basis for selser.
// Chances are good that the resulting diff will be reasonably clean.
// NOTE: If we don't have a revision ID, we should not attempt selser!
$originalRendering = $this->fetchParserOutputFromParsoid( $this->page, $rev, true );
if ( $originalRendering ) {
$this->statsFactory->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'as_revid' )
->setLabel( 'page_exists', 'unknown' )
->setLabel( 'status', 'found' )
->copyToStatsdAt( 'html_input_transform.original_html.given.as_revid.found' )
->increment();
} else {
$this->statsFactory->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'as_revid' )
->setLabel( 'page_exists', 'unknown' )
->setLabel( 'status', 'not_found' )
->copyToStatsdAt( 'html_input_transform.original_html.given.as_revid.not_found' )
->increment();
}
} elseif ( $originalRendering ) {
$this->statsFactory->getCounter( 'html_input_transform_total' )
->setLabel( 'original_html_given', 'true' )
->setLabel( 'page_exists', 'unknown' )
->setLabel( 'status', 'verbatim' )
->copyToStatsdAt( 'html_input_transform.original_html.given.verbatim' )
->increment();
}
if ( $originalRendering instanceof ParserOutput ) {
$originalRendering = PageBundleParserOutputConverter::pageBundleFromParserOutput( $originalRendering );
// NOTE: Use the default if we got a ParserOutput object.
// Don't apply the default if we got passed a PageBundle,
// in that case, we want to require the version to be explicit.
if ( $originalRendering->version === null && !isset( $originalRendering->headers['content-type'] ) ) {
$originalRendering->version = Parsoid::defaultHTMLVersion();
}
}
if ( !$originalRendering instanceof PageBundle ) {
return;
}
if ( $originalRendering->version !== null ) {
$this->transform->setOriginalSchemaVersion( $originalRendering->version );
} elseif ( !empty( $originalRendering->headers['content-type'] ) ) {
$vOriginal = ParsoidFormatHelper::parseContentTypeHeader(
// @phan-suppress-next-line PhanTypeArraySuspiciousNullable Silly Phan, we just checked.
$originalRendering->headers['content-type']
);
if ( $vOriginal ) {
$this->transform->setOriginalSchemaVersion( $vOriginal );
}
}
if ( $rev instanceof RevisionRecord ) {
$this->transform->setOriginalRevision( $rev );
} elseif ( $rev && is_int( $rev ) ) {
$this->transform->setOriginalRevisionId( $rev );
}
// NOTE: We might have an incomplete PageBundle here, with no HTML.
// PageBundle::$html is declared to not be nullable, so it would be set to the empty
// string if not given. Note however that it might also be null, since it's a public field.
if ( $originalRendering->html !== null && $originalRendering->html !== '' ) {
$this->transform->setOriginalHtml( $originalRendering->html );
}
if ( $originalRendering->parsoid !== null ) {
$this->transform->setOriginalDataParsoid( $originalRendering->parsoid );
}
if ( $originalRendering->mw !== null ) {
$this->transform->setOriginalDataMW( $originalRendering->mw );
}
}
/**
* @return Content the content derived from the input HTML.
* @throws HttpException
*/
public function getContent(): Content {
try {
return $this->transform->htmlToContent();
} catch ( ClientError $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-html-backend-error', [ $e->getMessage() ] ),
400,
[ 'reason' => $e->getMessage() ]
);
} catch ( ResourceLimitExceededException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-resource-limit-exceeded' ),
413,
[ 'reason' => $e->getMessage() ]
);
} catch ( MWUnknownContentModelException $e ) {
throw new LocalizedHttpException(
new MessageValue( "rest-unknown-content-model", [ $e->getModelId() ] ),
400
);
}
}
/**
* Creates a response containing the content derived from the input HTML.
* This will set the appropriate Content-Type header.
*
* @param ResponseInterface $response
*/
public function putContent( ResponseInterface $response ) {
$content = $this->getContent();
$data = $content->serialize();
try {
$contentType = ParsoidFormatHelper::getContentType(
$content->getModel(),
$this->envOptions['outputContentVersion']
);
} catch ( InvalidArgumentException $e ) {
// If Parsoid doesn't know the content type,
// ask the ContentHandler!
$contentType = $content->getDefaultFormat();
}
$response->setHeader( 'Content-Type', $contentType );
$response->getBody()->write( $data );
}
/**
* @param PageIdentity $page
* @param RevisionRecord|int $revision
* @param bool $mayParse
*
* @return ParserOutput|null
* @throws HttpException
*/
private function fetchParserOutputFromParsoid( PageIdentity $page, $revision, bool $mayParse ): ?ParserOutput {
$parserOptions = ParserOptions::newFromAnon();
$parserOptions->setUseParsoid();
try {
if ( !$page instanceof PageRecord ) {
$name = "$page";
$page = $this->pageLookup->getPageByReference( $page );
if ( !$page ) {
throw new RevisionAccessException( 'Page {name} not found',
[ 'name' => $name ] );
}
}
if ( is_int( $revision ) ) {
$revId = $revision;
$revision = $this->revisionLookup->getRevisionById( $revId, 0, $page );
if ( !$revision ) {
throw new RevisionAccessException( 'Revision {revId} not found',
[ 'revId' => $revId ] );
}
}
if ( $page->getId() !== $revision->getPageId() ) {
throw new RevisionAccessException( 'Revision {revId} does not belong to page {name}',
[ 'name' => $page->getDBkey(),
'revId' => $revision->getId() ] );
}
if ( $mayParse ) {
try {
$status = $this->parserOutputAccess->getParserOutput(
$page, $parserOptions, $revision
);
} catch ( ClientError $e ) {
$status = Status::newFatal( 'parsoid-client-error', $e->getMessage() );
} catch ( ResourceLimitExceededException $e ) {
$status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
}
if ( !$status->isOK() ) {
$this->throwHttpExceptionForStatus( $status );
}
$parserOutput = $status->getValue();
} else {
$parserOutput = $this->parserOutputAccess->getCachedParserOutput(
$page, $parserOptions, $revision
);
}
} catch ( RevisionAccessException $e ) {
// The client supplied bad revision ID, or the revision was deleted or suppressed.
throw new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ),
404,
[ 'reason' => $e->getMessage() ]
);
}
return $parserOutput;
}
/**
* @param ParsoidRenderID $renderID
*
* @return SelserContext|null
*/
private function fetchSelserContextFromStash( $renderID ): ?SelserContext {
$selserContext = $this->parsoidOutputStash->get( $renderID );
$labels = [
'original_html_given' => 'as_renderid',
'page_exists' => 'unknown',
'status' => 'hit-stashed'
];
$counter = $this->statsFactory->getCounter( 'html_input_transform_total' );
if ( $selserContext ) {
$counter->setLabels( $labels )
->copyToStatsdAt( 'html_input_transform.original_html.given.as_renderid.stash_hit.found.hit' )
->increment();
return $selserContext;
} else {
// Looks like the rendering is gone from stash (or the client send us a bogus key).
// Try to load it from the parser cache instead.
// On a wiki with low edit frequency, there is a good chance that it's still there.
try {
$parserOutput = $this->fetchParserOutputFromParsoid( $this->page, $renderID->getRevisionID(), false );
if ( !$parserOutput ) {
$labels[ 'status' ] = 'miss-fallback_not_found';
$counter->setLabels( $labels )->copyToStatsdAt(
'html_input_transform.original_html.given.as_renderid.stash_miss_pc_fallback.not_found.miss'
)->increment();
return null;
}
$cachedRenderID = ParsoidRenderID::newFromParserOutput( $parserOutput );
if ( $cachedRenderID->getKey() !== $renderID->getKey() ) {
$labels[ 'status' ] = 'mismatch-fallback_not_found';
$counter->setLabels( $labels )
->copyToStatsdAt(
'html_input_transform.original_html.given.as_renderid.' .
'stash_miss_pc_fallback.not_found.mismatch'
)
->increment();
// It's not the correct rendering.
return null;
}
$labels[ 'status' ] = 'hit-fallback_found';
$counter->setLabels( $labels )
->copyToStatsdAt(
'html_input_transform.original_html.given.as_renderid.' .
'stash_miss_pc_fallback.found.hit'
)
->increment();
$pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
return new SelserContext( $pb, $renderID->getRevisionID() );
} catch ( HttpException $e ) {
$labels[ 'status' ] = 'failed-fallback_not_found';
$counter->setLabels( $labels )
->copyToStatsdAt(
'html_input_transform.original_html.given.as_renderid.' .
'stash_miss_pc_fallback.not_found.failed'
)
->increment();
// If the revision isn't found, don't trigger a 404. Return null to trigger a 412.
return null;
}
}
}
/**
* @param Status $status
*
* @return never
* @throws HttpException
*/
private function throwHttpExceptionForStatus( Status $status ) {
// TODO: make this nicer.
if ( $status->hasMessage( 'parsoid-resource-limit-exceeded' ) ) {
throw new LocalizedHttpException( new MessageValue( "rest-parsoid-resource-exceeded" ),
413,
[ 'reason' => $status->getHTML() ]
);
} else {
throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error" ),
400,
[ 'reason' => $status->getHTML() ]
);
}
}
}