Make the XML dump schema version configurable.

Bug: T174031
Change-Id: I979b6c8f0a72bc1f5ecce1d499d3fdfa0f671588
This commit is contained in:
daniel 2018-12-14 12:24:44 +01:00
parent 135718b904
commit 45f3912bf1
9 changed files with 76 additions and 27 deletions

View file

@ -8977,6 +8977,12 @@ $wgInterwikiPrefixDisplayTypes = [];
*/
$wgMultiContentRevisionSchemaMigrationStage = SCHEMA_COMPAT_WRITE_BOTH | SCHEMA_COMPAT_READ_NEW;
/**
* The schema to use per default when generating XML dumps. This allows sites to control
* explicitly when to make breaking changes to their export and dump format.
*/
$wgXmlDumpSchemaVersion = XML_DUMP_SCHEMA_VERSION_10;
/**
* Actor table schema migration stage.
*

View file

@ -317,3 +317,13 @@ define( 'MIGRATION_WRITE_BOTH', 0x10000000 | SCHEMA_COMPAT_READ_BOTH | SCHEMA_CO
define( 'MIGRATION_WRITE_NEW', 0x20000000 | SCHEMA_COMPAT_READ_BOTH | SCHEMA_COMPAT_WRITE_NEW );
define( 'MIGRATION_NEW', 0x30000000 | SCHEMA_COMPAT_NEW );
/**@}*/
/**@{
* XML dump schema versions, for use with XmlDumpWriter.
* See also the corresponding export-nnnn.xsd files in the docs directory,
* which are also listed at <https://www.mediawiki.org/xml/>.
* Note that not all old schema versions are represented here, as several
* were already unsupported at the time these constants were introduced.
*/
define( 'XML_DUMP_SCHEMA_VERSION_10', '0.10' );
/**@}*/

View file

@ -63,12 +63,16 @@ class WikiExporter {
/** @var DumpOutput */
public $sink;
/** @var XmlDumpWriter */
private $writer;
/**
* Returns the export schema version.
* Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion.
* @return string
*/
public static function schemaVersion() {
return "0.10";
global $wgXmlDumpSchemaVersion;
return $wgXmlDumpSchemaVersion;
}
/**
@ -83,11 +87,20 @@ class WikiExporter {
function __construct( $db, $history = self::CURRENT, $text = self::TEXT ) {
$this->db = $db;
$this->history = $history;
$this->writer = new XmlDumpWriter();
$this->writer = new XmlDumpWriter( $text, self::schemaVersion() );
$this->sink = new DumpOutput();
$this->text = $text;
}
/**
* @param string $schemaVersion which schema version the generated XML should comply to.
* One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX
* constants.
*/
public function setSchemaVersion( $schemaVersion ) {
$this->writer = new XmlDumpWriter( $this->text, $schemaVersion );
}
/**
* Set the DumpOutput or DumpFilter object which will receive
* various row objects and XML output for filtering. Filters

View file

@ -30,6 +30,13 @@ use MediaWiki\Storage\SqlBlobStore;
* @ingroup Dump
*/
class XmlDumpWriter {
/**
* @var string[] the schema versions supported for output
* @final
*/
public static $supportedSchemas = [
XML_DUMP_SCHEMA_VERSION_10,
];
/**
* Title of the currently processed page

View file

@ -893,6 +893,7 @@ class WikiImporter {
) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
}
// FIXME: process schema version 11!
$revision = new WikiRevision( $this->config );
if ( isset( $revisionInfo['id'] ) ) {

View file

@ -30,7 +30,6 @@ require_once __DIR__ . '/../includes/export/WikiExporter.php';
use MediaWiki\MediaWikiServices;
use MediaWiki\Storage\BlobAccessException;
use MediaWiki\Storage\BlobStore;
use MediaWiki\Storage\SqlBlobStore;
use Wikimedia\Rdbms\IMaintainableDatabase;
@ -143,7 +142,7 @@ TEXT
}
/**
* @return BlobStore
* @return SqlBlobStore
*/
private function getBlobStore() {
return MediaWikiServices::getInstance()->getBlobStore();
@ -737,16 +736,16 @@ TEXT
}
/**
* @param int|string $id Content address, or text row ID.
* @param int|string $address Content address, or text row ID.
* @return bool|string
*/
private function getTextSpawned( $id ) {
private function getTextSpawned( $address ) {
Wikimedia\suppressWarnings();
if ( !$this->spawnProc ) {
// First time?
$this->openSpawn();
}
$text = $this->getTextSpawnedOnce( $id );
$text = $this->getTextSpawnedOnce( $address );
Wikimedia\restoreWarnings();
return $text;
@ -814,11 +813,15 @@ TEXT
}
/**
* @param int|string $id Content address, or text row ID.
* @param int|string $address Content address, or text row ID.
* @return bool|string
*/
private function getTextSpawnedOnce( $id ) {
$ok = fwrite( $this->spawnWrite, "$id\n" );
private function getTextSpawnedOnce( $address ) {
if ( is_int( $address ) || intval( $address ) ) {
$address = SqlBlobStore::makeAddressFromTextId( (int)$address );
}
$ok = fwrite( $this->spawnWrite, "$address\n" );
// $this->progress( ">> $id" );
if ( !$ok ) {
return false;
@ -830,26 +833,17 @@ TEXT
return false;
}
// check that the text id they are sending is the one we asked for
// check that the text address they are sending is the one we asked for
// this avoids out of sync revision text errors we have encountered in the past
$newAddress = fgets( $this->spawnRead );
if ( $newAddress === false ) {
return false;
}
if ( strpos( $newAddress, ':' ) === false ) {
$newId = intval( $newAddress );
if ( $newId === false ) {
return false;
}
} else {
try {
$newAddressFields = SqlBlobStore::splitBlobAddress( $newAddress );
$newId = $newAddressFields[ 1 ];
} catch ( InvalidArgumentException $ex ) {
return false;
}
$newAddress = SqlBlobStore::makeAddressFromTextId( intval( $newAddress ) );
}
if ( $id != intval( $newId ) ) {
if ( $newAddress !== $address ) {
return false;
}

View file

@ -51,6 +51,7 @@ abstract class BackupDumper extends Maintenance {
protected $reportingInterval = 100;
protected $pageCount = 0;
protected $revCount = 0;
protected $schemaVersion = null; // use default
protected $server = null; // use default
protected $sink = null; // Output filters
protected $lastTime = 0;
@ -101,6 +102,8 @@ abstract class BackupDumper extends Maintenance {
'<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
$this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
'Default: 100.', false, true );
$this->addOption( 'schema-version', 'Schema version to use for output. ' .
'Default: ' . WikiExporter::schemaVersion(), false, true );
$this->addOption( 'server', 'Force reading from MySQL server', false, true );
$this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
'-mx option to 7za command.', false, true );
@ -155,6 +158,8 @@ abstract class BackupDumper extends Maintenance {
$sink = null;
$sinks = [];
$this->schemaVersion = WikiExporter::schemaVersion();
$options = $this->orderedOptions;
foreach ( $options as $arg ) {
$opt = $arg[0];
@ -215,6 +220,15 @@ abstract class BackupDumper extends Maintenance {
unset( $sink );
$sink = $filter;
break;
case 'schema-version':
if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) {
$this->fatalError(
"Unsupported schema version $param. Supported versions: " .
implode( ', ', XmlDumpWriter::$supportedSchemas )
);
}
$this->schemaVersion = $param;
break;
}
}
@ -250,6 +264,7 @@ abstract class BackupDumper extends Maintenance {
$db = $this->backupDb();
$exporter = new WikiExporter( $db, $history, $text );
$exporter->setSchemaVersion( $this->schemaVersion );
$exporter->dumpUploads = $this->dumpUploads;
$exporter->dumpUploadFileContents = $this->dumpUploadFileContents;

View file

@ -162,9 +162,9 @@ abstract class DumpTestCase extends MediaWikiLangTestCase {
* @return string
*/
protected function getXmlSchemaPath( $schemaVersion = null ) {
global $IP;
global $IP, $wgXmlDumpSchemaVersion;
$schemaVersion = $schemaVersion ?: '0.10';
$schemaVersion = $schemaVersion ?: $wgXmlDumpSchemaVersion;
return "$IP/docs/export-$schemaVersion.xsd";
}

View file

@ -12,6 +12,7 @@ use WikiExporter;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\Rdbms\LoadBalancer;
use WikiPage;
use XmlDumpWriter;
/**
* Tests for page dumps of BackupDumper
@ -171,7 +172,9 @@ class BackupDumperPageTest extends DumpTestCase {
}
public function schemaVersionProvider() {
yield [ '0.10' ];
foreach ( XmlDumpWriter::$supportedSchemas as $schemaVersion ) {
yield [ $schemaVersion ];
}
}
/**