false, 'pageName' => '', 'oldid' => null, 'body_only' => null, 'errorEnc' => 'plain', 'iwp' => 'exwiki', 'subst' => null, 'offsetType' => 'byte', 'pagelanguage' => 'en', 'opts' => [], 'envOptions' => [ 'prefix' => 'exwiki', 'domain' => 'wiki.example.com', 'pageName' => '', 'offsetType' => 'byte', 'cookie' => '', 'reqId' => 'test+test+test', 'userAgent' => 'UTAgent', 'htmlVariantLanguage' => null, 'outputContentVersion' => Parsoid::AVAILABLE_VERSIONS[0], ], ]; /** @var string Imperfect wikitext to be preserved if selser is applied. Corresponds to Selser.html. */ private const IMPERFECT_WIKITEXT = "
hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [ 'parts' => [ [ 'template' => [ 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ], 'params' => [ '1' => [ 'wt' => 'hi' ] ], 'i' => 0 ] ] ] ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => $dataMediaWiki, ], ] ], ]; yield 'should apply original data-mw' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should give precedence to inline data-mw over original //////// $html = 'hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts! $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => $dataMediaWiki, ], ] ], ]; yield 'should give precedence to inline data-mw over original' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should not apply original data-mw if modified is supplied /////////// $html = 'hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts! $dataMediaWikiModified = [ 'ids' => [ 'mwAQ' => [ 'parts' => [ [ 'template' => [ 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ], 'params' => [ '1' => [ 'wt' => 'hi' ] ], 'i' => 0 ] ] ] ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ // modified data 'body' => $dataMediaWikiModified, ], 'original' => [ 'html' => [ 'headers' => $htmlHeaders999, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], ] ], ]; yield 'should not apply original data-mw if modified is supplied' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should apply original data-mw when modified is absent (captions 1) /////////// $html = $this->getTextFromFile( 'Image.html' ); $dataParsoid = [ 'ids' => [ 'mwAg' => [ 'optList' => [ [ 'ck' => 'caption', 'ak' => 'Testing 123' ] ] ], 'mwAw' => [ 'a' => [ 'href' => './File:Foobar.jpg' ], 'sa' => [] ], 'mwBA' => [ 'a' => [ 'resource' => './File:Foobar.jpg', 'height' => '28', 'width' => '240' ], 'sa' => [ 'resource' => 'File:Foobar.jpg' ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAg' => [ 'caption' => 'Testing 123' ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should apply original data-mw when modified is absent (captions 1)' => [ $attribs, $html, // modified HTML [ '[[File:Foobar.jpg|Testing 123]]' ], ]; // should give precedence to inline data-mw over modified (captions 2) ///////////// $htmlModified = $this->getTextFromFile( 'Image-data-mw.html' ); $dataMediaWikiModified = [ 'ids' => [ 'mwAg' => [ 'caption' => 'Testing 123' ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ 'body' => $dataMediaWikiModified, ], 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should give precedence to inline data-mw over modified (captions 2)' => [ $attribs, $htmlModified, // modified HTML [ '[[File:Foobar.jpg]]' ], ]; // should give precedence to modified data-mw over original (captions 3) ///////////// $dataMediaWikiModified = [ 'ids' => [ 'mwAg' => [] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ 'body' => $dataMediaWikiModified, ], 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should give precedence to modified data-mw over original (captions 3)' => [ $attribs, $html, // modified HTML [ '[[File:Foobar.jpg]]' ], ]; // should apply extra normalizations /////////////////// $htmlModified = 'FooBar'; $attribs = [ 'opts' => [ 'original' => [] ], ]; yield 'should apply extra normalizations' => [ $attribs, $htmlModified, // modified HTML [ 'FooBar' ], // empty tag was stripped ]; // should apply version downgrade /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify newer profile version for original HTML 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' ], // The profile version given inline in the original HTML doesn't matter, it's ignored 'body' => $htmlOfMinimal, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], 'data-mw' => [ 'body' => [ 'ids' => [] ] ], // required by version 999.0.0 ] ], ]; yield 'should apply version downgrade' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; // should not apply version downgrade if versions are the same /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify the exact same version specified inline in Minimal.html 2.4.0 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"' ], // The profile version given inline in the original HTML doesn't matter, it's ignored 'body' => $htmlOfMinimal, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], ] ], ]; yield 'should not apply version downgrade if versions are the same' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; // should convert html to json /////////////////////////////////// $html = $this->getTextFromFile( 'JsonConfig.html' ); $expectedText = [ '{"a":4,"b":3}', ]; $attribs = [ 'opts' => [ // even if the path says "wikitext", the contentmodel from the body should win. 'format' => ParsoidFormatHelper::FORMAT_WIKITEXT, 'contentmodel' => CONTENT_MODEL_JSON, ], ]; yield 'should convert html to json' => [ $attribs, $html, $expectedText, [ 'content-type' => 'application/json' ], ]; // page bundle input should work with no original data present /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [], ], ]; yield 'page bundle input should work with no original data present' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; } private function makePage( $title, $wikitext ): RevisionRecord { $title = new TitleValue( NS_MAIN, $title ); $rev = $this->getServiceContainer()->getRevisionLookup()->getRevisionByTitle( $title ); if ( $rev ) { return $rev; } /** @var RevisionRecord $rev */ [ 'revision-record' => $rev ] = $this->editPage( 'Test_html2wt', $wikitext )->getValue(); return $rev; } /** * @dataProvider provideHtml2wt * * @param array $attribs * @param string $html * @param string[] $expectedText * @param string[] $expectedHeaders * * @covers MediaWiki\Parser\Parsoid\HtmlToContentTransform * @covers MediaWiki\Rest\Handler\ParsoidHandler::html2wt */ public function testHtml2wt( array $attribs, string $html, array $expectedText, array $expectedHeaders = [] ) { $wikitextProfileUri = 'https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0'; $expectedHeaders += [ 'content-type' => "text/plain; charset=utf-8; profile=\"$wikitextProfileUri\"", ]; $wikitext = self::IMPERFECT_WIKITEXT; $rev = $this->makePage( 'Test_html2wt', $wikitext ); $page = $rev->getPage(); $pageConfig = $this->getPageConfig( $page ); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; if ( $attribs['oldid'] ) { // Set the actual ID of an existing revision $attribs['oldid'] = $rev->getId(); } $handler = $this->newParsoidHandler(); $response = $handler->html2wt( $pageConfig, $attribs, $html ); $body = $response->getBody(); $body->rewind(); $wikitext = $body->getContents(); foreach ( $expectedHeaders as $name => $value ) { $this->assertSame( $value, $response->getHeaderLine( $name ) ); } foreach ( (array)$expectedText as $exp ) { $this->assertStringContainsString( $exp, $wikitext ); } } public function testHtml2wtMetrics() { $page = $this->getExistingTestPage(); $pageConfig = $this->getPageConfig( $page ); $attribs = self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; $metrics = new class () extends MockMetrics { public $data = []; public function timing( $key, $time ) { $this->data[$key] = $time; parent::timing( $key, $time ); } public function increment( $key ) { $v = $this->data[$key] ?? 0; $this->data[$key] = $v + 1; return parent::increment( $key ); } }; $siteConfig = $this->createNoOpMock( SiteConfig::class, [ 'metrics' ] ); $siteConfig->method( 'metrics' )->willReturn( $metrics ); $handler = $this->newParsoidHandler( [], [ 'ParsoidSiteConfig' => $siteConfig ] ); $handler->html2wt( $pageConfig, $attribs, 'test
' ); $this->assertArrayHasKey( 'html2wt.size.input', $metrics->data ); $this->assertArrayHasKey( 'html2wt.original.version.' . Parsoid::defaultHTMLVersion(), $metrics->data ); $this->assertArrayHasKey( 'html2wt.init', $metrics->data ); $this->assertArrayHasKey( 'html2wt.total', $metrics->data ); $this->assertArrayHasKey( 'html2wt.size.output', $metrics->data ); $this->assertArrayHasKey( 'html2wt.timePerInputKB', $metrics->data ); } public function provideHtml2wtThrows() { $html = '123'; $profileVersion = '2.4.0'; $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion; $htmlContentType = "text/html;profile=\"$htmlProfileUri\""; $htmlHeaders = [ 'content-type' => $htmlContentType, ]; // XXX: what does version 999.0.0 mean?! $htmlContentType999 = 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"'; $htmlHeaders999 = [ 'content-type' => $htmlContentType999, ]; // Content-type of original html is missing //////////////////////////// $attribs = [ 'opts' => [ 'original' => [ 'html' => [ // no headers with content type 'body' => $html, ], ] ], ]; yield 'Content-type of original html is missing' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'Content-type of original html is missing.' ] ) ]; // should fail to downgrade the original version for an unknown transition /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); $htmlOfMinimal2222 = $this->getTextFromFile( 'Minimal-2222.html' ); $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify version 2222.0.0! 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2222.0.0"' ], 'body' => $htmlOfMinimal2222, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], ] ], ]; yield 'should fail to downgrade the original version for an unknown transition' => [ $attribs, $htmlOfMinimal, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'No downgrade possible from schema version 2222.0.0 to 2.4.0.' ] ) ]; // DSR offsetType mismatch: UCS2 vs byte /////////////////////////////// $attribs = [ 'offsetType' => 'byte', 'envOptions' => [ 'offsetType' => 'byte', ], 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ], 'data-parsoid' => [ 'body' => [ 'offsetType' => 'UCS2', 'ids' => [], ] ], ] ], ]; yield 'DSR offsetType mismatch: UCS2 vs byte' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'DSR offsetType mismatch: UCS2 vs byte' ] ) ]; // DSR offsetType mismatch: byte vs UCS2 /////////////////////////////// $attribs = [ 'offsetType' => 'UCS2', 'envOptions' => [ 'offsetType' => 'UCS2', ], 'opts' => [ // Enable selser 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ], 'data-parsoid' => [ 'body' => [ 'offsetType' => 'byte', 'ids' => [], ] ], ] ], ]; yield 'DSR offsetType mismatch: byte vs UCS2' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'DSR offsetType mismatch: byte vs UCS2' ] ) ]; // Could not find previous revision //////////////////////////// $attribs = [ 'oldid' => 1155779922, 'opts' => [ // set original HTML to enable selser 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ] ] ] ]; yield 'Could not find previous revision' => [ $attribs, $html, new HttpException( 'The specified revision is deleted or suppressed.', 404 ) ]; // should return a 400 for missing inline data-mw (2.x) /////////////////// $html = 'hi
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $htmlOrig = 'ho
'; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'html' => [ 'headers' => $htmlHeaders, // slightly modified 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for missing inline data-mw (2.x)' => [ $attribs, $html, new HttpException( 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src', 400 ) ]; // should return a 400 for not supplying data-mw ////////////////////// $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for not supplying data-mw' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'Invalid data-mw was provided.' ] ) ]; // should return a 400 for missing modified data-mw $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => [ // Missing data-mw.parts! 'ids' => [ 'mwAQ' => [] ], ] ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for missing modified data-mw' => [ $attribs, $html, new HttpException( 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src', 400 ) ]; // should return http 400 if supplied data-parsoid is empty //////////// $html = 'hi
'; $htmlOrig = 'ho
'; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => [], ], 'html' => [ 'headers' => $htmlHeaders, 'body' => $htmlOrig, ] ] ], ]; yield 'should return http 400 if supplied data-parsoid is empty' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error' ), 400, [ 'reason' => 'Invalid data-parsoid was provided.' ] ) ]; // TODO: ResourceLimitExceededException from $parsoid->dom2wikitext -> 413 // TODO: ClientError from $parsoid->dom2wikitext -> 413 // TODO: Errors from PageBundle->validate } /** * @dataProvider provideHtml2wtThrows * * @param array $attribs * @param string $html * @param Exception $expectedException */ public function testHtml2wtThrows( array $attribs, string $html, Exception $expectedException ) { if ( isset( $attribs['oldid'] ) ) { // If a specific revision ID is requested, it's almost certain to no exist. // So we are testing with a non-existing page. $page = $this->getNonexistingTestPage(); } else { $page = $this->getExistingTestPage(); } $pageConfig = $this->getPageConfig( $page ); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; $handler = $this->newParsoidHandler(); try { $handler->html2wt( $pageConfig, $attribs, $html ); $this->fail( 'Expected exception: ' . $expectedException ); } catch ( Exception $e ) { $this->assertInstanceOf( get_class( $expectedException ), $e ); $this->assertSame( $expectedException->getCode(), $e->getCode() ); if ( $expectedException instanceof HttpException ) { /** @var HttpException $e */ $this->assertSame( $expectedException->getErrorData(), $e->getErrorData() ); } $this->assertSame( $expectedException->getMessage(), $e->getMessage() ); } } public function provideDom2wikitextException() { yield 'ClientError' => [ new ClientError( 'test' ), new HttpException( 'test', 400 ) ]; yield 'ResourceLimitExceededException' => [ new ResourceLimitExceededException( 'test' ), new HttpException( 'test', 413 ) ]; } /** * @dataProvider provideDom2wikitextException * * @param Exception $throw * @param Exception $expectedException */ public function testHtml2wtHandlesDom2wikitextException( Exception $throw, Exception $expectedException ) { $html = 'hi
'; $page = $this->getExistingTestPage(); $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_HTML ] ] + self::DEFAULT_ATTRIBS; // Make a fake Parsoid that throws /** @var Parsoid|MockObject $parsoid */ $parsoid = $this->createNoOpMock( Parsoid::class, [ 'dom2wikitext' ] ); $parsoid->method( 'dom2wikitext' )->willThrowException( $throw ); // Make a fake HtmlTransformFactory that returns an HtmlToContentTransform that uses the fake Parsoid. /** @var HtmlTransformFactory|MockObject $factory */ $factory = $this->createNoOpMock( HtmlTransformFactory::class, [ 'getHtmlToContentTransform' ] ); $factory->method( 'getHtmlToContentTransform' )->willReturn( new HtmlToContentTransform( $html, $page, $parsoid, [], $this->getPageConfigFactory( $page ), $this->getServiceContainer()->getContentHandlerFactory() ) ); // Use an HtmlInputTransformHelper that uses the fake HtmlTransformFactory, so it ends up // using the HtmlToContentTransform that has the fake Parsoid which throws an exception. $handler = $this->newParsoidHandler( [ 'getHtmlInputHelper' => function () use ( $factory, $page, $html ) { $helper = new HtmlInputTransformHelper( new NullStatsdDataFactory(), $factory, $this->getServiceContainer()->getParsoidOutputStash(), $this->getServiceContainer()->getParsoidOutputAccess() ); $helper->init( $page, [ 'html' => $html ], [] ); return $helper; } ] ); // Check that the exception thrown by Parsoid gets converted as expected. $this->expectException( get_class( $expectedException ) ); $this->expectExceptionCode( $expectedException->getCode() ); $this->expectExceptionMessage( $expectedException->getMessage() ); $handler->html2wt( $page, $attribs, $html ); } /** @return Generator */ public function provideTryToCreatPageConfigData() { yield 'Default attribs for tryToCreatePageConfig()' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => 'en' ], 'wikitext' => null, 'html2WtMode' => false, 'expectedWikitext' => 'UTContent', 'expectedPageLanguage' => 'en', ]; yield 'tryToCreatePageConfig with wikitext' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => 'en' ], 'wikitext' => "=test=", 'html2WtMode' => false, 'expected wikitext' => '=test=', 'expected page language' => 'en', ]; yield 'tryToCreatePageConfig with html2WtMode set to true' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => null ], 'wikitext' => null, 'html2WtMode' => true, 'expected wikitext' => 'UTContent', 'expected page language' => 'en', ]; yield 'tryToCreatePageConfig with both wikitext and html2WtMode' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => 'ar' ], 'wikitext' => "=header=", 'html2WtMode' => true, 'expected wikitext' => '=header=', 'expected page language' => 'ar', ]; yield 'Try to create a page config with pageName set to empty string' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => 'de' ], 'wikitext' => null, 'html2WtMode' => false, 'expected wikitext' => 'UTContent', 'expected page language' => 'de', ]; yield 'Try to create a page config with no page language' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => null ], 'wikitext' => null, false, 'expected wikitext' => 'UTContent', 'expected page language' => 'en', ]; } /** * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig * * @dataProvider provideTryToCreatPageConfigData */ public function testTryToCreatePageConfig( array $attribs, $wikitext, $html2WtMode, $expectedWikitext, $expectedLanguage ) { $pageConfig = $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode ); $this->assertSame( $expectedWikitext, $pageConfig->getRevisionContent()->getContent( SlotRecord::MAIN ) ); $this->assertSame( $expectedLanguage, $pageConfig->getPageLanguage() ); } /** @return Generator */ public function provideTryToCreatPageConfigDataThrows() { yield "PageConfig with oldid that doesn't exist" => [ 'attribs' => [ 'oldid' => null, 'pageName' => 'Test', 'pagelanguage' => 'en' ], 'wikitext' => null, 'html2WtMode' => false, ]; yield 'PageConfig with a bad title' => [ [ 'oldid' => null, 'pageName' => 'Special:Badtitle', 'pagelanguage' => 'en' ], 'wikitext' => null, 'html2WtMode' => false, ]; yield "PageConfig with a revision that doesn't exist" => [ // 'oldid' is so large because we want to emulate a revision // that doesn't exist. [ 'oldid' => 12345678, 'pageName' => 'Test', 'pagelanguage' => 'en' ], 'wikitext' => null, 'html2WtMode' => false, ]; } /** * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig * * @dataProvider provideTryToCreatPageConfigDataThrows */ public function testTryToCreatePageConfigThrows( array $attribs, $wikitext, $html2WtMode ) { $this->expectException( HttpException::class ); $this->expectExceptionCode( 404 ); $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode ); } public function provideRoundTripNoSelser() { yield 'space in heading' => [ "==foo==\nsomething\n" ]; } public function provideRoundTripNeedingSelser() { yield 'uppercase tags' => [ "test language conversion
', ], ]; yield [ $attribs, $revision, '>esttay anguagelay onversioncay<', [ 'content-type' => $htmlContentType, 'content-language' => 'en-x-piglatin', ] ]; } /** * @dataProvider provideLanguageConversion */ public function testLanguageConversion( array $attribs, array $revision, string $expectedText, array $expectedHeaders ) { $handler = $this->newParsoidHandler(); $pageConfig = $handler->tryToCreatePageConfig( $attribs, null, true ); $response = $handler->languageConversion( $pageConfig, $attribs, $revision ); $body = $response->getBody(); $body->rewind(); $actual = $body->getContents(); $pb = json_decode( $actual, true ); $this->assertNotEmpty( $pb ); $this->assertArrayHasKey( 'html', $pb ); $this->assertArrayHasKey( 'body', $pb['html'] ); $this->assertStringContainsString( $expectedText, $pb['html']['body'] ); foreach ( $expectedHeaders as $key => $value ) { $this->assertArrayHasKey( $key, $pb['html']['headers'] ); $this->assertSame( $value, $pb['html']['headers'][$key] ); } } public function provideWt2html() { $profileVersion = '2.6.0'; $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion; $pbProfileUri = 'https://www.mediawiki.org/wiki/Specs/pagebundle/' . $profileVersion; $dpProfileUri = 'https://www.mediawiki.org/wiki/Specs/data-parsoid/' . $profileVersion; $htmlContentType = "text/html; charset=utf-8; profile=\"$htmlProfileUri\""; $pbContentType = "application/json; charset=utf-8; profile=\"$pbProfileUri\""; $dpContentType = "application/json; charset=utf-8; profile=\"$dpProfileUri\""; $lintContentType = "application/json"; $htmlHeaders = [ 'content-type' => $htmlContentType, ]; $pbHeaders = [ 'content-type' => $pbContentType, ]; $lintHeaders = [ 'content-type' => $lintContentType, ]; // should get from a title and revision (html) /////////////////////////////////// $expectedText = [ '>First Revision Content<', ' 1, // will be replaced by a real revision id ]; yield 'should get from a title and revision (html)' => [ $attribs, null, $expectedText, $unexpectedText, $htmlHeaders ]; // should get from a title and revision (pagebundle) /////////////////////////////////// $expectedText = [ // bits of json '"body":"', 'First Revision Content', 'contentmodel' => 'wikitext', 'data-parsoid' => [ 'headers' => [ 'content-type' => $dpContentType, ], 'body' => [ 'counter' => 2, 'ids' => [ // NOTE: match "First Revision Content" 'mwAA' => [ 'dsr' => [ 0, 22, 0, 0 ] ], 'mwAQ' => [], 'mwAg' => [ 'dsr' => [ 0, 22, 0, 0 ] ], ], 'offsetType' => 'ucs2', // as provided in the input ] ], ]; $unexpectedText = []; $attribs = [ 'oldid' => 1, // will be replaced by a real revision id 'opts' => [ 'format' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE ], 'envOptions' => [ 'offsetType' => 'ucs2', // make sure this is looped through to data-parsoid attribute ] ]; yield 'should get from a title and revision (pagebundle)' => [ $attribs, null, $expectedText, $unexpectedText, $pbHeaders ]; // should parse the given wikitext /////////////////////////////////// $wikitext = 'lorem ipsum'; $expectedText = [ '>lorem ipsum<', ' [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; // should parse the given wikitext (body_only) /////////////////////////////////// $wikitext = 'lorem ipsum'; $expectedText = [ '>lorem ipsum<' ]; $unexpectedText = [ ' true ]; yield 'should parse the given wikitext (body_only)' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; // should lint the given wikitext /////////////////////////////////// $wikitext = "{|\nhi\n|ho\n|}"; $expectedText = [ '"type":"fostered"', '"dsr"' ]; $unexpectedText = [ ' [ 'format' => ParsoidFormatHelper::FORMAT_LINT ] ]; yield 'should lint the given wikitext' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $lintHeaders ]; // should parse the given JSON /////////////////////////////////// $wikitext = '{ "color": "green" }'; // should be rendered as table, not interpreted as wikitext $expectedText = [ '>color', '>green', '' ]; $attribs = [ 'opts' => [ 'contentmodel' => CONTENT_MODEL_JSON, ] ]; yield 'should parse the given JSON' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; } /** * @dataProvider provideWt2html * * @param array $attribs * @param string|null $text * @param string[] $expectedData * @param string[] $unexpectedHtml * @param string[] $expectedHeaders */ public function testWt2html( array $attribs, ?string $text, array $expectedData, array $unexpectedHtml, array $expectedHeaders = [] ) { $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0'; $expectedHeaders += [ 'content-type' => "text/x-wiki; charset=utf-8; profile=\"$htmlProfileUri\"", ]; $page = $this->getNonexistingTestPage( __METHOD__ ); $status = $this->editPage( $page, 'First Revision Content' ); $currentRev = $status->getNewRevision(); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'wikitext'; $attribs['opts']['format'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; if ( $attribs['oldid'] ) { // Set the actual ID of an existing revision $attribs['oldid'] = $currentRev->getId(); // Make sure we are testing against a non-current revision $this->editPage( $page, 'this is not the content you are looking for' ); } $handler = $this->newParsoidHandler(); $revTextOrId = $text ?? $attribs['oldid'] ?? null; $pageConfig = $this->getPageConfig( $page, $revTextOrId ); $response = $handler->wt2html( $pageConfig, $attribs, $text ); $body = $response->getBody(); $body->rewind(); $data = $body->getContents(); foreach ( $expectedHeaders as $name => $value ) { $responseHeaderValue = $response->getHeaderLine( $name ); if ( $name === 'content-type' ) { $this->assertTrue( $this->contentTypeMatcher( $value, $responseHeaderValue ) ); } else { $this->assertSame( $value, $responseHeaderValue ); } } // HACK: try to parse as json, just in case: $jsonData = json_decode( $data, JSON_OBJECT_AS_ARRAY ); foreach ( $expectedData as $index => $exp ) { if ( is_int( $index ) ) { $this->assertStringContainsString( $exp, $data ); } else { $this->assertArrayHasKey( $index, $jsonData ); if ( $index === 'data-parsoid' ) { // FIXME: Assert headers as well $this->assertArrayHasKey( 'body', $jsonData[$index] ); $this->assertSame( $exp['body'], $jsonData[$index]['body'] ); } else { $this->assertSame( $exp, $jsonData[$index] ); } } } foreach ( $unexpectedHtml as $exp ) { $this->assertStringNotContainsString( $exp, $data ); } } public function testWt2html_ParserCache() { $page = $this->getExistingTestPage(); $pageConfig = $this->getPageConfig( $page ); $parserCache = $this->createNoOpMock( ParserCache::class, [ 'save', 'get' ] ); // This is the critical assertion in this test case: the save() method should // be called exactly once! $parserCache->expects( $this->once() )->method( 'save' ); $parserCache->method( 'get' )->willReturn( false ); $parserCacheFactory = $this->createNoOpMock( ParserCacheFactory::class, [ 'getParserCache', 'getRevisionOutputCache' ] ); $parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache ); $parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn( $this->createNoOpMock( RevisionOutputCache::class ) ); $this->setService( 'ParserCacheFactory', $parserCacheFactory ); $attribs = self::DEFAULT_ATTRIBS; $attribs['opts']['from'] = 'wikitext'; $attribs['opts']['format'] = 'html'; $handler = $this->newParsoidHandler(); // This should trigger a parser cache write, because we didn't set a write-ratio $handler->wt2html( $pageConfig, $attribs ); $this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 ); // This should not trigger a parser cache write, because we set the write-ration to 0 $handler->wt2html( $pageConfig, $attribs ); } public function testWt2html_BadContentModel() { $page = $this->getNonexistingTestPage( __METHOD__ ); $this->editPage( $page, new JavaScriptContent( '"not wikitext"' ) ); $pageConfig = $this->getPageConfig( $page ); $attribs = self::DEFAULT_ATTRIBS; $attribs['opts']['from'] = 'wikitext'; // Asking for a 'pagebundle' here because of T325137. $attribs['opts']['format'] = 'pagebundle'; $handler = $this->newParsoidHandler(); $response = $handler->wt2html( $pageConfig, $attribs ); $this->assertSame( 200, $response->getStatusCode() ); $body = $response->getBody(); $body->rewind(); $data = $body->getContents(); $jsonData = json_decode( $data, JSON_OBJECT_AS_ARRAY ); $this->assertIsArray( $jsonData ); $this->assertStringContainsString( "Dummy output", $jsonData['html']['body'] ); } // TODO: test wt2html failure modes // TODO: test redlinks }