Merge "REST compare: bundle section offset information"

This commit is contained in:
jenkins-bot 2019-11-13 01:18:01 +00:00 committed by Gerrit Code Review
commit 097ad02261
3 changed files with 125 additions and 23 deletions

View file

@ -11,6 +11,7 @@ use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SuppressedDataException;
use Parser;
use RequestContext;
use TextContent;
use User;
@ -24,18 +25,26 @@ class CompareHandler extends Handler {
/** @var PermissionManager */
private $permissionManager;
/** @var Parser */
private $parser;
/** @var User */
private $user;
/** @var RevisionRecord[] */
private $revisions = [];
/** @var string[] */
private $textCache = [];
public function __construct(
RevisionLookup $revisionLookup,
PermissionManager $permissionManager
PermissionManager $permissionManager,
Parser $parser
) {
$this->revisionLookup = $revisionLookup;
$this->permissionManager = $permissionManager;
$this->parser = $parser;
// @todo Inject this, when there is a good way to do that
$this->user = RequestContext::getMain()->getUser();
@ -61,10 +70,12 @@ class CompareHandler extends Handler {
'from' => [
'id' => $fromRev->getId(),
'slot_role' => $this->getRole(),
'sections' => $this->getSectionInfo( 'from' )
],
'to' => [
'id' => $toRev->getId(),
'slot_role' => $this->getRole()
'slot_role' => $this->getRole(),
'sections' => $this->getSectionInfo( 'to' )
],
'diff' => [ 'PLACEHOLDER' => null ]
];
@ -129,29 +140,32 @@ class CompareHandler extends Handler {
}
private function getRevisionText( $paramName ) {
$revision = $this->getRevision( $paramName );
try {
$content = $revision
->getSlot( $this->getRole(), RevisionRecord::FOR_THIS_USER, $this->user )
->getContent()
->convert( CONTENT_MODEL_TEXT );
if ( $content instanceof TextContent ) {
return $content->getText();
} else {
if ( !isset( $this->textCache[$paramName] ) ) {
$revision = $this->getRevision( $paramName );
try {
$content = $revision
->getSlot( $this->getRole(), RevisionRecord::FOR_THIS_USER, $this->user )
->getContent()
->convert( CONTENT_MODEL_TEXT );
if ( $content instanceof TextContent ) {
$this->textCache[$paramName] = $content->getText();
} else {
throw new LocalizedHttpException(
new MessageValue(
'rest-compare-wrong-content',
[ $this->getRole(), $paramName ]
),
400 );
}
} catch ( SuppressedDataException $e ) {
throw new LocalizedHttpException(
new MessageValue(
'rest-compare-wrong-content',
[ $this->getRole(), $paramName ]
),
400 );
new MessageValue( 'rest-compare-inaccessible', [ $paramName ] ), 403 );
} catch ( RevisionAccessException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-compare-nonexistent', [ $paramName ] ), 404 );
}
} catch ( SuppressedDataException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-compare-inaccessible', [ $paramName ] ), 403 );
} catch ( RevisionAccessException $e ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-compare-nonexistent', [ $paramName ] ), 404 );
}
return $this->textCache[$paramName];
}
/**
@ -169,6 +183,27 @@ class CompareHandler extends Handler {
return wikidiff2_inline_json_diff( $fromText, $toText, 2 );
}
/**
* @return array
*/
private function getSectionInfo( $paramName ) {
$text = $this->getRevisionText( $paramName );
$parserSections = $this->parser->getFlatSectionInfo( $text );
$sections = [];
foreach ( $parserSections as $i => $parserSection ) {
// Skip section zero, which comes before the first heading, since
// its offset is always zero, so the client can assume its location.
if ( $i !== 0 ) {
$sections[] = [
'level' => $parserSection['level'],
'heading' => $parserSection['heading'],
'offset' => $parserSection['offset'],
];
}
}
return $sections;
}
public function getParamSettings() {
return [
'from' => [

View file

@ -28,7 +28,8 @@
"class": "MediaWiki\\Rest\\Handler\\CompareHandler",
"services": [
"RevisionLookup",
"PermissionManager"
"PermissionManager",
"Parser"
]
},
{

View file

@ -5880,6 +5880,72 @@ class Parser {
return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
}
/**
* Get an array of preprocessor section information.
*
* Preprocessor sections are those identified by wikitext-style syntax, not
* HTML-style syntax. Templates are not expanded, so these sections do not
* include sections created by templates or parser functions. This is the
* same definition of a section as used by section editing, but not the
* same as TOC generation.
*
* These sections are typically smaller than those acted on by getSection() and
* replaceSection() since they are not nested. Section nesting could be
* reconstructed from the heading levels.
*
* The return value is an array of associative array info structures. Each
* associative array contains the following keys, describing a section:
*
* - index: An integer identifying the section.
* - level: The heading level, e.g. 1 for <h1>. For the section before the
* the first heading, this will be 0.
* - offset: The byte offset within the wikitext at which the section starts
* - heading: The wikitext for the header which introduces the section,
* including equals signs. For the section before the first heading, this
* will be an empty string.
* - text: The complete text of the section.
*
* @param string $text
* @return array[]
*/
public function getFlatSectionInfo( $text ) {
$magicScopeVariable = $this->lock();
$this->startParse( null, new ParserOptions, self::OT_PLAIN, true );
$frame = $this->getPreprocessor()->newFrame();
$root = $this->preprocessToDom( $text, 0 );
$node = $root->getFirstChild();
$offset = 0;
$currentSection = [
'index' => 0,
'level' => 0,
'offset' => 0,
'heading' => '',
'text' => ''
];
$sections = [];
while ( $node ) {
$nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
if ( $node->getName() === 'h' ) {
$bits = $node->splitHeading();
$sections[] = $currentSection;
$currentSection = [
'index' => $bits['i'],
'level' => $bits['level'],
'offset' => $offset,
'heading' => $nodeText,
'text' => $nodeText
];
} else {
$currentSection['text'] .= $nodeText;
}
$offset += strlen( $nodeText );
$node = $node->getNextSibling();
}
$sections[] = $currentSection;
return $sections;
}
/**
* Get the ID of the revision we are parsing
*