2015-12-25 20:14:02 +00:00
|
|
|
<?php
|
|
|
|
|
|
Fix XML dumps for content types with non-string getNativeData()
In fdc3e9f9524d91a492bdc212486d4518991c0fe2, the code generating XML
dumps was updated to support multi-content revisions. This refactor
included a workaround for content types that are subclasses of
TextContent to use getNativeData() rather than serialize(), apparently
to satisfy the Flow extension.
However, this assumes that getNativeData() always returns a string. As
demonstrated in T155582, this is not the case, which is one of the
reasons why the method was deprecated. Notably, if a wiki has a custom
content type defined whose getNativeData() returns a non-string value,
and has pages using that content type, this breaks XML dump generation
(dumpBackup.php) for that wiki and also makes those pages unexportable
via Special:Export.
Fix it by using getText() instead of getNativeData(), which is the
recommended migration path anyways per T155582. I am somewhat perplexed
by the reference to Flow in the original code comment, because Flow's
BoardContent does not seem to extend TextContent at all.
Bug: T155582
Bug: T328503
Change-Id: I670fb53f193ec20d3d4c258e54c89e7f64cf2d1b
2023-02-01 00:12:31 +00:00
|
|
|
use MediaWiki\Content\Renderer\ContentParseParams;
|
2022-07-15 00:07:38 +00:00
|
|
|
use MediaWiki\MainConfigNames;
|
Fix XML dumps for content types with non-string getNativeData()
In fdc3e9f9524d91a492bdc212486d4518991c0fe2, the code generating XML
dumps was updated to support multi-content revisions. This refactor
included a workaround for content types that are subclasses of
TextContent to use getNativeData() rather than serialize(), apparently
to satisfy the Flow extension.
However, this assumes that getNativeData() always returns a string. As
demonstrated in T155582, this is not the case, which is one of the
reasons why the method was deprecated. Notably, if a wiki has a custom
content type defined whose getNativeData() returns a non-string value,
and has pages using that content type, this breaks XML dump generation
(dumpBackup.php) for that wiki and also makes those pages unexportable
via Special:Export.
Fix it by using getText() instead of getNativeData(), which is the
recommended migration path anyways per T155582. I am somewhat perplexed
by the reference to Flow in the original code comment, because Flow's
BoardContent does not seem to extend TextContent at all.
Bug: T155582
Bug: T328503
Change-Id: I670fb53f193ec20d3d4c258e54c89e7f64cf2d1b
2023-02-01 00:12:31 +00:00
|
|
|
use MediaWiki\Page\PageIdentity;
|
2023-03-01 20:33:26 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2022-07-15 00:07:38 +00:00
|
|
|
|
2015-12-25 20:14:02 +00:00
|
|
|
/**
|
|
|
|
|
* Test class for Export methods.
|
|
|
|
|
*
|
|
|
|
|
* @group Database
|
|
|
|
|
*
|
|
|
|
|
* @author Isaac Hutt <mhutti1@gmail.com>
|
|
|
|
|
*/
|
|
|
|
|
class ExportTest extends MediaWikiLangTestCase {
|
|
|
|
|
|
2021-07-22 03:11:47 +00:00
|
|
|
protected function setUp(): void {
|
2015-12-25 20:14:02 +00:00
|
|
|
parent::setUp();
|
2022-07-15 00:07:38 +00:00
|
|
|
$this->overrideConfigValue( MainConfigNames::CapitalLinks, true );
|
2015-12-25 20:14:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @covers WikiExporter::pageByTitle
|
|
|
|
|
*/
|
|
|
|
|
public function testPageByTitle() {
|
|
|
|
|
$pageTitle = 'UTPage';
|
|
|
|
|
|
2021-07-21 23:49:49 +00:00
|
|
|
$services = $this->getServiceContainer();
|
2015-12-25 20:14:02 +00:00
|
|
|
|
|
|
|
|
$title = Title::newFromText( $pageTitle );
|
|
|
|
|
|
Fix XML dumps for content types with non-string getNativeData()
In fdc3e9f9524d91a492bdc212486d4518991c0fe2, the code generating XML
dumps was updated to support multi-content revisions. This refactor
included a workaround for content types that are subclasses of
TextContent to use getNativeData() rather than serialize(), apparently
to satisfy the Flow extension.
However, this assumes that getNativeData() always returns a string. As
demonstrated in T155582, this is not the case, which is one of the
reasons why the method was deprecated. Notably, if a wiki has a custom
content type defined whose getNativeData() returns a non-string value,
and has pages using that content type, this breaks XML dump generation
(dumpBackup.php) for that wiki and also makes those pages unexportable
via Special:Export.
Fix it by using getText() instead of getNativeData(), which is the
recommended migration path anyways per T155582. I am somewhat perplexed
by the reference to Flow in the original code comment, because Flow's
BoardContent does not seem to extend TextContent at all.
Bug: T155582
Bug: T328503
Change-Id: I670fb53f193ec20d3d4c258e54c89e7f64cf2d1b
2023-02-01 00:12:31 +00:00
|
|
|
$xmlObject = $this->getXmlDumpForPage( $title );
|
2020-09-16 17:39:22 +00:00
|
|
|
|
2015-12-25 20:14:02 +00:00
|
|
|
/**
|
|
|
|
|
* Check namespaces match xml
|
|
|
|
|
*/
|
2020-04-30 22:10:43 +00:00
|
|
|
foreach ( $xmlObject->siteinfo->namespaces->children() as $namespace ) {
|
|
|
|
|
// Get the text content of the SimpleXMLElement
|
|
|
|
|
$xmlNamespaces[] = (string)$namespace;
|
2015-12-25 20:14:02 +00:00
|
|
|
}
|
2020-04-30 22:10:43 +00:00
|
|
|
$xmlNamespaces = str_replace( ' ', '_', $xmlNamespaces );
|
2015-12-25 20:14:02 +00:00
|
|
|
|
2021-07-21 23:49:49 +00:00
|
|
|
$actualNamespaces = (array)$services->getContentLanguage()->getNamespaces();
|
2015-12-25 20:14:02 +00:00
|
|
|
$actualNamespaces = array_values( $actualNamespaces );
|
|
|
|
|
$this->assertEquals( $actualNamespaces, $xmlNamespaces );
|
|
|
|
|
|
|
|
|
|
// Check xml page title correct
|
2016-02-17 19:39:46 +00:00
|
|
|
$xmlTitle = (array)$xmlObject->page->title;
|
2015-12-25 20:14:02 +00:00
|
|
|
$this->assertEquals( $pageTitle, $xmlTitle[0] );
|
|
|
|
|
|
|
|
|
|
// Check xml page text is not empty
|
2016-02-17 19:39:46 +00:00
|
|
|
$text = (array)$xmlObject->page->revision->text;
|
2015-12-25 20:14:02 +00:00
|
|
|
$this->assertNotEquals( '', $text[0] );
|
|
|
|
|
}
|
|
|
|
|
|
Fix XML dumps for content types with non-string getNativeData()
In fdc3e9f9524d91a492bdc212486d4518991c0fe2, the code generating XML
dumps was updated to support multi-content revisions. This refactor
included a workaround for content types that are subclasses of
TextContent to use getNativeData() rather than serialize(), apparently
to satisfy the Flow extension.
However, this assumes that getNativeData() always returns a string. As
demonstrated in T155582, this is not the case, which is one of the
reasons why the method was deprecated. Notably, if a wiki has a custom
content type defined whose getNativeData() returns a non-string value,
and has pages using that content type, this breaks XML dump generation
(dumpBackup.php) for that wiki and also makes those pages unexportable
via Special:Export.
Fix it by using getText() instead of getNativeData(), which is the
recommended migration path anyways per T155582. I am somewhat perplexed
by the reference to Flow in the original code comment, because Flow's
BoardContent does not seem to extend TextContent at all.
Bug: T155582
Bug: T328503
Change-Id: I670fb53f193ec20d3d4c258e54c89e7f64cf2d1b
2023-02-01 00:12:31 +00:00
|
|
|
/**
|
|
|
|
|
* Regression test for T328503 to verify that custom content types
|
|
|
|
|
* with a getNativeData() override that returns a non-string value export correctly.
|
|
|
|
|
*
|
|
|
|
|
* @covers XmlDumpWriter::writeText
|
|
|
|
|
*/
|
|
|
|
|
public function testShouldExportContentWithNonStringNativeData(): void {
|
|
|
|
|
// Make a mock ContentHandler for a Content that has a getNativeData() method
|
|
|
|
|
// with a non-string return value.
|
|
|
|
|
$contentModelId = 'non-string-test-content-model';
|
|
|
|
|
$contentHandler = new class( $contentModelId ) extends ContentHandler {
|
|
|
|
|
|
|
|
|
|
public function __construct( $contentModelId ) {
|
|
|
|
|
parent::__construct(
|
|
|
|
|
$contentModelId,
|
|
|
|
|
[ CONTENT_FORMAT_TEXT ]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function serializeContent( Content $content, $format = null ) {
|
|
|
|
|
return json_encode( $content->getNativeData() );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function unserializeContent( $blob, $format = null ) {
|
|
|
|
|
return $this->getTestContent( $blob );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function makeEmptyContent() {
|
|
|
|
|
return $this->getTestContent( '{}' );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function fillParserOutput(
|
|
|
|
|
Content $content,
|
|
|
|
|
ContentParseParams $cpoParams,
|
|
|
|
|
ParserOutput &$output
|
|
|
|
|
) {
|
|
|
|
|
$output->setText( json_encode( $content->getNativeData() ) );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function getTestContent( string $blob ): Content {
|
|
|
|
|
return new class( $blob, $this->getModelID() ) extends TextContent {
|
|
|
|
|
/** @var array */
|
|
|
|
|
private $data;
|
|
|
|
|
|
|
|
|
|
public function __construct( $text, $contentModelId ) {
|
|
|
|
|
parent::__construct(
|
|
|
|
|
$text,
|
|
|
|
|
$contentModelId
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
$this->data = json_decode( $text, true );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function getNativeData() {
|
|
|
|
|
return $this->data;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
$this->setTemporaryHook(
|
|
|
|
|
'ContentHandlerForModelID',
|
|
|
|
|
static function (
|
|
|
|
|
string $modelId,
|
|
|
|
|
?ContentHandler &$handlerRef
|
|
|
|
|
) use ( $contentModelId, $contentHandler ): void {
|
|
|
|
|
if ( $modelId === $contentModelId ) {
|
|
|
|
|
$handlerRef = $contentHandler;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
$wikiPage = $this->getNonexistingTestPage( 'NonStringNativeDataExportTest' );
|
|
|
|
|
|
|
|
|
|
$testText = json_encode( [ 'test' => 'data' ] );
|
|
|
|
|
$content = $contentHandler->unserializeContent( $testText );
|
|
|
|
|
|
|
|
|
|
$this->editPage( $wikiPage, $content );
|
|
|
|
|
|
|
|
|
|
$xmlObject = $this->getXmlDumpForPage( $wikiPage );
|
|
|
|
|
|
|
|
|
|
$this->assertSame( $contentModelId, (string)$xmlObject->page->revision->model );
|
|
|
|
|
$this->assertSame( $testText, (string)$xmlObject->page->revision->text );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convenience function to export the content of the given page in MediaWiki's XML dump format.
|
|
|
|
|
* @param PageIdentity $page page to export
|
|
|
|
|
* @return SimpleXMLElement root element of the generated XML
|
|
|
|
|
*/
|
|
|
|
|
private function getXmlDumpForPage( PageIdentity $page ): SimpleXMLElement {
|
|
|
|
|
$exporter = $this->getServiceContainer()
|
|
|
|
|
->getWikiExporterFactory()
|
|
|
|
|
->getWikiExporter( $this->db, WikiExporter::FULL );
|
|
|
|
|
|
|
|
|
|
$sink = new DumpStringOutput();
|
|
|
|
|
$exporter->setOutputSink( $sink );
|
|
|
|
|
$exporter->openStream();
|
|
|
|
|
$exporter->pageByTitle( $page );
|
|
|
|
|
$exporter->closeStream();
|
|
|
|
|
|
|
|
|
|
// phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
|
|
|
|
|
$oldDisable = @libxml_disable_entity_loader( true );
|
|
|
|
|
|
|
|
|
|
// This throws error if invalid xml output
|
|
|
|
|
$xmlObject = simplexml_load_string( $sink );
|
|
|
|
|
|
|
|
|
|
// phpcs:ignore Generic.PHP.NoSilencedErrors
|
|
|
|
|
@libxml_disable_entity_loader( $oldDisable );
|
|
|
|
|
|
|
|
|
|
return $xmlObject;
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-25 20:14:02 +00:00
|
|
|
}
|