REST: /page/{title}/{bare,html,with_html} endpoints backed by RESTBase.

Bug: T234377
Bug: T234375
Change-Id: I77709c17e951e3efb542028e5c0d53eedda8c7bf
This commit is contained in:
Petr Pchelko 2020-01-16 15:40:58 -08:00
parent 146d1fee14
commit a136005a35
9 changed files with 456 additions and 144 deletions

View file

@ -0,0 +1,123 @@
<?php
namespace MediaWiki\Rest\Handler;
use Config;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\Rest\SimpleHandler;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use RequestContext;
use Title;
use TitleFormatter;
use User;
use Wikimedia\ParamValidator\ParamValidator;
abstract class LatestPageContentHandler extends SimpleHandler {
/** @var Config */
protected $config;
/** @var PermissionManager */
protected $permissionManager;
/** @var RevisionLookup */
protected $revisionLookup;
/** @var TitleFormatter */
protected $titleFormatter;
/** @var User */
protected $user;
/** @var RevisionRecord|bool */
private $latestRevision;
/** @var Title|bool */
private $titleObject;
/**
* @param Config $config
* @param PermissionManager $permissionManager
* @param RevisionLookup $revisionLookup
* @param TitleFormatter $titleFormatter
*/
public function __construct(
Config $config,
PermissionManager $permissionManager,
RevisionLookup $revisionLookup,
TitleFormatter $titleFormatter
) {
$this->config = $config;
$this->permissionManager = $permissionManager;
$this->revisionLookup = $revisionLookup;
$this->titleFormatter = $titleFormatter;
// @todo Inject this, when there is a good way to do that
$this->user = RequestContext::getMain()->getUser();
}
/**
* @return Title|bool Title or false if unable to retrieve title
*/
protected function getTitle() {
if ( $this->titleObject === null ) {
$this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] ) ?? false;
}
return $this->titleObject;
}
/**
* @return RevisionRecord|bool latest revision or false if unable to retrieve revision
*/
protected function getLatestRevision() {
if ( $this->latestRevision === null ) {
$title = $this->getTitle();
if ( $title && $title->getArticleID() ) {
$this->latestRevision = $this->revisionLookup->getKnownCurrentRevision( $title );
} else {
$this->latestRevision = false;
}
}
return $this->latestRevision;
}
protected function isAccessible( $titleObject ): bool {
return $this->permissionManager->userCan( 'read', $this->user, $titleObject );
}
protected function constructMetadata(
Title $titleObject,
RevisionRecord $revision
): array {
return [
'id' => $titleObject->getArticleID(),
'key' => $this->titleFormatter->getPrefixedDbKey( $titleObject ),
'title' => $this->titleFormatter->getPrefixedText( $titleObject ),
'latest' => [
'id' => $revision->getId(),
'timestamp' => wfTimestampOrNull( TS_ISO_8601, $revision->getTimestamp() )
],
'content_model' => $titleObject->getContentModel(),
'license' => [
'url' => $this->config->get( 'RightsUrl' ),
'title' => $this->config->get( 'RightsText' )
],
];
}
public function needsWriteAccess(): bool {
return false;
}
public function getParamSettings(): array {
return [
'title' => [
self::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
],
];
}
}

View file

@ -0,0 +1,245 @@
<?php
namespace MediaWiki\Rest\Handler;
use Config;
use ConfigException;
use Exception;
use GuzzleHttp\Psr7\Uri;
use LogicException;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\StringStream;
use MediaWiki\Revision\RevisionLookup;
use RestbaseVirtualRESTService;
use TitleFormatter;
use UIDGenerator;
use VirtualRESTServiceClient;
use WebRequest;
use Wikimedia\Message\MessageValue;
use Wikimedia\ParamValidator\ParamValidator;
/**
* A handler that returns Parsoid HTML for the following routes:
* - /page/{title}/html,
* - /page/{title}/with_html
* - /page/{title}/bare routes.
* Currently the HTML is fetched from RESTBase, thus in order to use the routes,
* RESTBase must be installed and VirtualRESTService for RESTBase needs to be configured.
*
* Class PageHTMLHandler
* @package MediaWiki\Rest\Handler
*/
class PageHTMLHandler extends LatestPageContentHandler {
private const MAX_AGE_200 = 5;
/** @var VirtualRESTServiceClient */
private $restClient;
/** @var array */
private $htmlResponse;
/**
* PageHTMLHandler constructor.
* @param Config $config
* @param PermissionManager $permissionManager
* @param RevisionLookup $revisionLookup
* @param TitleFormatter $titleFormatter
* @param VirtualRESTServiceClient $virtualRESTServiceClient
*/
public function __construct(
Config $config,
PermissionManager $permissionManager,
RevisionLookup $revisionLookup,
TitleFormatter $titleFormatter,
VirtualRESTServiceClient $virtualRESTServiceClient
) {
parent::__construct( $config, $permissionManager, $revisionLookup, $titleFormatter );
$this->restClient = $virtualRESTServiceClient;
}
/**
* @param LinkTarget $title
* @return array
* @throws LocalizedHttpException
*/
private function fetchHtmlFromRESTBase( LinkTarget $title ): array {
if ( $this->htmlResponse !== null ) {
return $this->htmlResponse;
}
list( , $service ) = $this->restClient->getMountAndService( '/restbase/ ' );
if ( !$service ) {
try {
$restConfig = $this->config->get( 'VirtualRestConfig' );
if ( !isset( $restConfig['modules']['restbase'] ) ) {
throw new ConfigException(
__CLASS__ . " requires restbase module configured for VirtualRestConfig"
);
}
$this->restClient->mount( '/restbase/',
new RestbaseVirtualRESTService( $restConfig['modules']['restbase'] ) );
} catch ( Exception $e ) {
// This would usually be config exception, but let's fail on any exception
throw new LocalizedHttpException( MessageValue::new( 'rest-html-backend-error' ), 500 );
}
}
$this->htmlResponse = $this->restClient->run( [
'method' => 'GET',
'url' => '/restbase/local/v1/page/html/' .
urlencode( $this->titleFormatter->getPrefixedDBkey( $title ) ) .
'?redirect=false'
] );
return $this->htmlResponse;
}
/**
* @param LinkTarget $title
* @return array
* @throws LocalizedHttpException
*/
private function fetch200HtmlFromRESTBase( LinkTarget $title ): array {
$restbaseResp = $this->fetchHtmlFromRESTBase( $title );
if ( $restbaseResp['code'] !== 200 ) {
throw new LocalizedHttpException(
MessageValue::new( 'rest-html-backend-error' ),
$restbaseResp['code']
);
}
return $restbaseResp;
}
/**
* @return string
*/
private function constructHtmlUrl(): string {
$wr = new WebRequest();
$urlParts = wfParseUrl( $wr->getFullRequestURL() );
$currentPathParts = explode( '/', $urlParts['path'] );
$currentPathParts[ count( $currentPathParts ) - 1 ] = 'html';
$urlParts['path'] = implode( '/', $currentPathParts );
return Uri::fromParts( $urlParts );
}
/**
* @param string $title
* @param string $htmlType
* @return Response
* @throws LocalizedHttpException
*/
public function run( string $title, string $htmlType ): Response {
$titleObj = $this->getTitle();
if ( !$titleObj || !$titleObj->getArticleID() ) {
throw new LocalizedHttpException(
MessageValue::new( 'rest-nonexistent-title' )->plaintextParams( $title ),
404
);
}
if ( !$this->isAccessible( $titleObj ) ) {
throw new LocalizedHttpException(
MessageValue::new( 'rest-permission-denied-title' )->plaintextParams( $title ),
403
);
}
$revision = $this->getLatestRevision();
if ( !$revision ) {
throw new LocalizedHttpException(
MessageValue::new( 'rest-no-revision' )->plaintextParams( $title ),
404
);
}
switch ( $htmlType ) {
case 'bare':
$body = $this->constructMetadata( $titleObj, $revision );
$body['html_url'] = $this->constructHtmlUrl();
$response = $this->getResponseFactory()->createJson( $body );
break;
case 'html':
$restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj );
$response = $this->getResponseFactory()->create();
$response->setHeader( 'Content-Type', $restbaseResp[ 'headers' ][ 'content-type' ] );
$response->setBody( new StringStream( $restbaseResp[ 'body' ] ) );
break;
case 'with_html':
$restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj );
$body = $this->constructMetadata( $titleObj, $revision );
$body['html'] = $restbaseResp['body'];
$response = $this->getResponseFactory()->createJson( $body );
break;
default:
throw new LogicException( "Unknown HTML type $htmlType" );
}
$response->setHeader( 'Cache-Control', 'maxage=' . self::MAX_AGE_200 );
return $response;
}
/**
* Returns an ETag representing a page's source. The ETag assumes a page's source has changed
* if the latest revision of a page has been made private, un-readable for another reason,
* or a newer revision exists.
* @return string|null
* @throws LocalizedHttpException
*/
protected function getETag(): ?string {
$title = $this->getTitle();
if ( !$title || !$title->getArticleID() || !$this->isAccessible( $title ) ) {
return null;
}
if ( $this->getValidatedParams()['html_type'] === 'bare' ) {
return '"' . $this->getLatestRevision()->getId() . '"';
}
$restbaseRes = $this->fetch200HtmlFromRESTBase( $title );
return $restbaseRes['headers']['etag'] ?? null;
}
/**
* @return string|null
* @throws LocalizedHttpException
*/
protected function getLastModified(): ?string {
$title = $this->getTitle();
if ( !$title || !$title->getArticleID() || !$this->isAccessible( $title ) ) {
return null;
}
if ( $this->getValidatedParams()['html_type'] === 'bare' ) {
return $this->getLatestRevision()->getTimestamp();
}
$restbaseRes = $this->fetch200HtmlFromRESTBase( $title );
$restbaseEtag = $restbaseRes['headers']['etag'] ?? null;
if ( !$restbaseEtag ) {
return null;
}
$etagComponents = [];
if ( !preg_match( '/^(?:W\/)?"?[^"\/]+(?:\/([^"\/]+))"?$/',
$restbaseEtag, $etagComponents )
) {
return null;
}
return UIDGenerator::getTimestampFromUUIDv1( $etagComponents[1] ) ?: null;
}
public function getParamSettings(): array {
$parentSettings = parent::getParamSettings();
$parentSettings['html_type'] = [
self::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => [ 'bare', 'html', 'with_html' ],
ParamValidator::PARAM_REQUIRED => true,
];
return $parentSettings;
}
}

View file

@ -2,206 +2,98 @@
namespace MediaWiki\Rest\Handler;
use Config;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\SimpleHandler;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SuppressedDataException;
use RequestContext;
use TextContent;
use Title;
use TitleFormatter;
use User;
use Wikimedia\Message\MessageValue;
use Wikimedia\Message\ParamType;
use Wikimedia\Message\ScalarParam;
use Wikimedia\ParamValidator\ParamValidator;
/**
* Handler class for Core REST API Page Source endpoint
*/
class PageSourceHandler extends SimpleHandler {
class PageSourceHandler extends LatestPageContentHandler {
private const MAX_AGE_200 = 5;
/** @var RevisionLookup */
private $revisionLookup;
/** @var TitleFormatter */
private $titleFormatter;
/** @var PermissionManager */
private $permissionManager;
/** @var User */
private $user;
/** @var Config */
private $config;
/** @var RevisionRecord|bool */
private $revision;
/** @var Title */
private $titleObject;
/**
* @param RevisionLookup $revisionLookup
* @param TitleFormatter $titleFormatter
* @param PermissionManager $permissionManager
* @param Config $config
*/
public function __construct(
RevisionLookup $revisionLookup,
TitleFormatter $titleFormatter,
PermissionManager $permissionManager,
Config $config
) {
$this->revisionLookup = $revisionLookup;
$this->titleFormatter = $titleFormatter;
$this->permissionManager = $permissionManager;
$this->config = $config;
// @todo Inject this, when there is a good way to do that
$this->user = RequestContext::getMain()->getUser();
}
// Default to main slot
private function getRole() {
private function getRole(): string {
return SlotRecord::MAIN;
}
/**
* @param string $slotRole
* @param RevisionRecord $revision
* @return TextContent $content
* @throws LocalizedHttpException slot content is not TextContent or Revision/Slot is inaccessible
*/
private function getPageContent( RevisionRecord $revision ) {
protected function getPageContent( string $slotRole, RevisionRecord $revision ): TextContent {
try {
$content = $revision
->getSlot( $this->getRole(), RevisionRecord::FOR_THIS_USER, $this->user )
->getSlot( $slotRole, RevisionRecord::FOR_THIS_USER, $this->user )
->getContent()
->convert( CONTENT_MODEL_TEXT );
if ( !( $content instanceof TextContent ) ) {
throw new LocalizedHttpException( new MessageValue( 'rest-page-source-type-error' ), 400 );
throw new LocalizedHttpException( MessageValue::new( 'rest-page-source-type-error' ), 400 );
}
} catch ( SuppressedDataException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-permission-denied-revision',
[ new ScalarParam( ParamType::NUM, $revision->getId() ) ]
),
MessageValue::new( 'rest-permission-denied-revision' )->numParams( $revision->getId() ),
403
);
} catch ( RevisionAccessException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-nonexistent-revision',
[ new ScalarParam( ParamType::NUM, $revision->getId() ) ]
),
MessageValue::new( 'rest-nonexistent-revision' )->numParams( $revision->getId() ),
404
);
}
return $content;
}
private function isAccessible( $titleObject ) {
return $this->permissionManager->userCan( 'read', $this->user, $titleObject );
}
/**
* @param string $title
* @return Response
* @throws LocalizedHttpException
*/
public function run( $title ) {
public function run( string $title ): Response {
$titleObject = $this->getTitle();
if ( !$titleObject || !$titleObject->getArticleID() ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-nonexistent-title',
[ new ScalarParam( ParamType::TEXT, $title ) ]
),
MessageValue::new( 'rest-nonexistent-title' )->plaintextParams( $title ),
404
);
}
if ( !$this->isAccessible( $titleObject ) ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-permission-denied-title',
[ new ScalarParam( ParamType::TEXT, $title ) ]
),
MessageValue::new( 'rest-permission-denied-title' )->plaintextParams( $title ),
403
);
}
$revision = $this->getRevision();
$revision = $this->getLatestRevision();
if ( !$revision ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-no-revision',
[ new ScalarParam( ParamType::TEXT, $title ) ]
),
MessageValue::new( 'rest-no-revision' ),
404
);
}
$content = $this->getPageContent( $this->revision );
$body = [
'id' => $titleObject->getArticleID(),
'key' => $this->titleFormatter->getPrefixedDbKey( $this->titleObject ),
'title' => $this->titleFormatter->getPrefixedText( $this->titleObject ),
'latest' => [
'id' => $this->revision->getId(),
'timestamp' => wfTimestampOrNull( TS_ISO_8601, $this->revision->getTimestamp() )
],
'content_model' => $content->getModel(),
'license' => [
'url' => $this->config->get( 'RightsUrl' ),
'title' => $this->config->get( 'RightsText' )
],
'source' => $content->getText()
];
$content = $this->getPageContent( $this->getRole(), $revision );
$body = $this->constructMetadata( $titleObject, $revision );
$body['source'] = $content->getText();
$response = $this->getResponseFactory()->createJson( $body );
$response->setHeader( 'Cache-Control', 'maxage=' . self::MAX_AGE_200 );
return $response;
}
public function needsWriteAccess() {
return false;
}
/**
* @return RevisionRecord|bool latest revision or false if unable to retrieve revision
*/
private function getRevision() {
if ( $this->revision === null ) {
$title = $this->getTitle();
if ( $title && $title->getArticleID() ) {
$this->revision = $this->revisionLookup->getKnownCurrentRevision( $title );
} else {
$this->revision = false;
}
}
return $this->revision;
}
/**
* @return Title|bool Title or false if unable to retrieve title
*/
private function getTitle() {
if ( $this->titleObject === null ) {
$this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] ) ?? false;
}
return $this->titleObject;
}
/**
* Returns an ETag representing a page's source. The ETag assumes a page's source has changed
* if the latest revision of a page has been made private, un-readable for another reason,
* or a newer revision exists.
* @return string
*/
protected function getETag() {
$revision = $this->getRevision();
protected function getETag(): string {
$revision = $this->getLatestRevision();
$latestRevision = $revision ? $revision->getID() : 'e0';
$isAccessible = $this->isAccessible( $this->getTitle() );
@ -214,20 +106,11 @@ class PageSourceHandler extends SimpleHandler {
/**
* @return string|null
*/
protected function getLastModified() {
$revision = $this->getRevision();
protected function getLastModified(): ?string {
$revision = $this->getLatestRevision();
if ( $revision ) {
return $this->revision->getTimestamp();
return $revision->getTimestamp();
}
}
public function getParamSettings() {
return [
'title' => [
self::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
],
];
return null;
}
}

View file

@ -0,0 +1 @@
<?php

View file

@ -65,10 +65,21 @@
"path": "/coredev/v0/page/{title}",
"class": "MediaWiki\\Rest\\Handler\\PageSourceHandler",
"services": [
"MainConfig",
"PermissionManager",
"RevisionLookup",
"TitleFormatter"
]
},
{
"path": "/coredev/v0/page/{title}/{html_type}",
"class": "MediaWiki\\Rest\\Handler\\PageHTMLHandler",
"services": [
"MainConfig",
"PermissionManager",
"RevisionLookup",
"TitleFormatter",
"PermissionManager",
"MainConfig"
"VirtualRESTServiceClient"
]
},
{

View file

@ -28,5 +28,6 @@
"rest-search-error": "Error when returning search results: $1",
"rest-page-source-type-error": "The content type of the page source requested is unsupported",
"rest-no-revision" : "Unable to retrieve revision for title $1",
"rest-media-too-many-links": "Too many media links found on title $1 ($2 allowed)"
"rest-media-too-many-links": "Too many media links found on title $1 ($2 allowed)",
"rest-html-backend-error": "Unable to fetch Parsoid HTML"
}

View file

@ -29,5 +29,6 @@
"rest-search-error": "Error message for REST API debugging, shown when an error occurs from search engine/database while searching for term. Parameters:\n* $1: The error message key.",
"rest-page-source-type-error": "Error message for REST API debugging, shown when trying to retrieve content for a page that has an unsupported content type",
"rest-no-revision": "Error message for REST API debugging, shown when fetching a revision by page ID fails. Parameters:\n* $1: The page ID we are getting revision from",
"rest-media-too-many-links": "Error message for REST API debugging, shown when there are too many media links on a page. Parameters:\n* $1: The page title.\n* $2: The number of links allowed."
"rest-media-too-many-links": "Error message for REST API debugging, shown when there are too many media links on a page. Parameters:\n* $1: The page title.\n* $2: The number of links allowed.",
"rest-html-backend-error": "Error message for REST API debugging, shown when fetching Parsoid HTML from backend has failed."
}

View file

@ -0,0 +1,47 @@
const { action, assert, REST, utils } = require( 'api-testing' );
describe( 'Page HTML Bare', () => {
const page = utils.title( 'PageHTMLBare ' );
const client = new REST( 'rest.php/coredev/v0' );
const anon = action.getAnon();
before( async () => {
await anon.edit( page, { text: "''Edit 1'' and '''Edit 2'''" } );
} );
describe( 'GET /page/{title}/bare', () => {
it( 'Should successfully return page bare', async () => {
const { status, body } = await client.get( `/page/${page}/bare` );
assert.deepEqual( status, 200 );
assert.containsAllKeys( body, [ 'latest', 'id', 'key', 'license', 'title', 'content_model', 'html_url' ] );
assert.nestedPropertyVal( body, 'content_model', 'wikitext' );
assert.nestedPropertyVal( body, 'title', page );
assert.nestedPropertyVal( body, 'key', utils.dbkey( page ) );
assert.match( body.html_url, new RegExp( `/page/${encodeURIComponent( page )}/html$` ) );
} );
it( 'Should return 404 error for non-existent page', async () => {
const dummyPageTitle = utils.title( 'DummyPage_' );
const { status } = await client.get( `/page/${dummyPageTitle}/bare` );
assert.deepEqual( status, 404 );
} );
it( 'Should have appropriate response headers', async () => {
const preEditResponse = await client.get( `/page/${page}/bare` );
const preEditDate = new Date( preEditResponse.body.latest.timestamp );
const preEditEtag = preEditResponse.headers.etag;
await anon.edit( page, { text: "'''Edit 3'''" } );
const postEditResponse = await client.get( `/page/${page}/bare` );
const postEditDate = new Date( postEditResponse.body.latest.timestamp );
const postEditHeaders = postEditResponse.headers;
const postEditEtag = postEditResponse.headers.etag;
assert.containsAllKeys( postEditHeaders, [ 'etag' ] );
assert.deepEqual( postEditHeaders[ 'last-modified' ], postEditDate.toGMTString() );
assert.match( postEditHeaders[ 'cache-control' ], /^maxage=\d/ );
assert.strictEqual( isNaN( preEditDate.getTime() ), false );
assert.strictEqual( isNaN( postEditDate.getTime() ), false );
assert.notStrictEqual( preEditDate, postEditDate );
assert.notStrictEqual( preEditEtag, postEditEtag );
} );
} );
} );

View file

@ -14,7 +14,7 @@ describe( 'Page Source', () => {
const { status, body } = await client.get( `/page/${page}` );
assert.deepEqual( status, 200 );
assert.containsAllKeys( body, [ 'latest', 'id', 'key', 'license', 'title', 'content_model', 'source' ] );
assert.nestedPropertyVal( body, 'content_model', 'text' );
assert.nestedPropertyVal( body, 'content_model', 'wikitext' );
assert.nestedPropertyVal( body, 'title', page );
assert.nestedPropertyVal( body, 'key', utils.dbkey( page ) );
assert.nestedPropertyVal( body, 'source', "''Edit 1'' and '''Edit 2'''" );