wiki.techinc.nl/includes/Rest/Handler/RevisionHTMLHandler.php
C. Scott Ananian cfd9c516e1 Allow setting a ParserOption to generate Parsoid HTML
This is an initial quick-and-dirty implementation.  The
ParsoidParser class will eventually inherit from \Parser,
but this is an initial placeholder to unblock other Parsoid
read views work.

Currently Parsoid does not fully implement all the ParserOutput
metadata set by the legacy parser, but we're working on it.

This patch also addresses T300325 by ensuring the the Page HTML
APIs use ParserOutput::getRawText(), which will return the entire
Parsoid HTML document without post-processing.  This is what
the Parsoid team refers to as "edit mode" HTML. The
ParserOutput::getText() method returns only the <body> contents
of the HTML, and applies several transformations, including
inserting Table of Contents and style deduplication; this is
the "read views" flavor of the Parsoid HTML.

We need to be careful of the interaction of the `useParsoid` flag with
the ParserCacheMetadata.  Effectively `useParsoid` should *always* be
marked as "used" or else the ParserCache will assume its value doesn't
matter and will serve legacy content for parsoid requests and
vice-versa.  T330677 is a follow up to address this more thoroughly by
splitting the parser cache in ParserOutputAccess; the stop gap in this
patch is fragile and, because it doesn't fork the ParserCacheMetadata
cache, may corrupt the ParserCacheMetadata in the case when Parsoid
and the legacy parser consult different sets of options to render a
page.

Bug: T300191
Bug: T330677
Bug: T300325
Change-Id: Ica09a4284c00d7917f8b6249e946232b2fb38011
2023-03-26 21:46:05 -04:00

153 lines
4.6 KiB
PHP

<?php
namespace MediaWiki\Rest\Handler;
use LogicException;
use MediaWiki\MediaWikiServices;
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
use MediaWiki\Rest\Handler\Helper\PageRestHelperFactory;
use MediaWiki\Rest\Handler\Helper\RevisionContentHelper;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\SimpleHandler;
use MediaWiki\Rest\StringStream;
use Wikimedia\Assert\Assert;
/**
* A handler that returns Parsoid HTML for the following routes:
* - /revision/{revision}/html,
* - /revision/{revision}/with_html
*
* Class RevisionHTMLHandler
* @package MediaWiki\Rest\Handler
*/
class RevisionHTMLHandler extends SimpleHandler {
/** @var HtmlOutputRendererHelper */
private $htmlHelper;
/** @var RevisionContentHelper */
private $contentHelper;
public function __construct( PageRestHelperFactory $helperFactory ) {
$this->contentHelper = $helperFactory->newRevisionContentHelper();
$this->htmlHelper = $helperFactory->newHtmlOutputRendererHelper();
}
protected function postValidationSetup() {
// TODO: Once Authority supports rate limit (T310476), just inject the Authority.
$user = MediaWikiServices::getInstance()->getUserFactory()
->newFromUserIdentity( $this->getAuthority()->getUser() );
$this->contentHelper->init( $user, $this->getValidatedParams() );
$page = $this->contentHelper->getPage();
$revision = $this->contentHelper->getTargetRevision();
if ( $page && $revision ) {
$this->htmlHelper->init( $page, $this->getValidatedParams(), $user, $revision );
$request = $this->getRequest();
$acceptLanguage = $request->getHeaderLine( 'Accept-Language' ) ?: null;
if ( $acceptLanguage ) {
$this->htmlHelper->setVariantConversionLanguage(
$acceptLanguage
);
}
}
}
/**
* @return Response
* @throws LocalizedHttpException
*/
public function run(): Response {
$this->contentHelper->checkAccess();
$page = $this->contentHelper->getPage();
$revisionRecord = $this->contentHelper->getTargetRevision();
// The call to $this->contentHelper->getPage() should not return null if
// $this->contentHelper->checkAccess() did not throw.
Assert::invariant( $page !== null, 'Page should be known' );
// The call to $this->contentHelper->getTargetRevision() should not return null if
// $this->contentHelper->checkAccess() did not throw.
Assert::invariant( $revisionRecord !== null, 'Revision should be known' );
$outputMode = $this->getOutputMode();
$setContentLanguageHeader = true;
switch ( $outputMode ) {
case 'html':
$parserOutput = $this->htmlHelper->getHtml();
$response = $this->getResponseFactory()->create();
// TODO: need to respect content-type returned by Parsoid.
$response->setHeader( 'Content-Type', 'text/html' );
$this->htmlHelper->putHeaders( $response, $setContentLanguageHeader );
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
$response->setBody( new StringStream( $parserOutput->getRawText() ) );
break;
case 'with_html':
$parserOutput = $this->htmlHelper->getHtml();
$body = $this->contentHelper->constructMetadata();
$body['html'] = $parserOutput->getRawText();
$response = $this->getResponseFactory()->createJson( $body );
// For JSON content, it doesn't make sense to set content language header
$this->htmlHelper->putHeaders( $response, !$setContentLanguageHeader );
$this->contentHelper->setCacheControl( $response, $parserOutput->getCacheExpiry() );
break;
default:
throw new LogicException( "Unknown HTML type $outputMode" );
}
return $response;
}
/**
* Returns an ETag representing a page's source. The ETag assumes a page's source has changed
* if the latest revision of a page has been made private, un-readable for another reason,
* or a newer revision exists.
* @return string|null
*/
protected function getETag(): ?string {
if ( !$this->contentHelper->isAccessible() ) {
return null;
}
// Vary eTag based on output mode
return $this->htmlHelper->getETag( $this->getOutputMode() );
}
/**
* @return string|null
*/
protected function getLastModified(): ?string {
if ( !$this->contentHelper->isAccessible() ) {
return null;
}
return $this->htmlHelper->getLastModified();
}
private function getOutputMode(): string {
return $this->getConfig()['format'];
}
public function needsWriteAccess(): bool {
return false;
}
public function getParamSettings(): array {
return array_merge(
$this->contentHelper->getParamSettings(),
$this->htmlHelper->getParamSettings()
);
}
/**
* @return bool
*/
protected function hasRepresentation() {
return $this->contentHelper->hasContent();
}
}