Introduce ContentHandler::exportTransform()

ContentHandler::exportTransform() allows content handlers to apply
transformations upon export, such as conversion of legacy formats or
filtering of internal data.

Note that the transformation is applied to serialized content, since
the exporter will generally not unserialize the content blob to an
intermediate form before writing it to the dump. Implementations
may choose to unserialy, then transform and re-serialize.

Bug: 65256
Change-Id: Ic55a8bd8bea13041000b176c7c02c7c5ced76f6d
This commit is contained in:
daniel 2014-05-20 19:21:49 +02:00
parent 4f08c6760d
commit ee0ff556d4
2 changed files with 32 additions and 19 deletions

View file

@ -670,12 +670,30 @@ class XmlDumpWriter {
$out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
}
if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
$content_model = strval( $row->rev_content_model );
} else {
// probably using $wgContentHandlerUseDB = false;
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
$content_model = ContentHandler::getDefaultModelFor( $title );
}
$content_handler = ContentHandler::getForModelID( $content_model );
if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
$content_format = strval( $row->rev_content_format );
} else {
// probably using $wgContentHandlerUseDB = false;
$content_format = $content_handler->getDefaultFormat();
}
$text = '';
if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
$out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
} elseif ( isset( $row->old_text ) ) {
// Raw text from the database may have invalid chars
$text = strval( Revision::getRevisionText( $row ) );
$text = $content_handler->exportTransform( $text, $content_format );
$out .= " " . Xml::elementClean( 'text',
array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
strval( $text ) ) . "\n";
@ -695,26 +713,7 @@ class XmlDumpWriter {
$out .= " <sha1/>\n";
}
if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
$content_model = strval( $row->rev_content_model );
} else {
// probably using $wgContentHandlerUseDB = false;
// @todo test!
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
$content_model = ContentHandler::getDefaultModelFor( $title );
}
$out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
$content_format = strval( $row->rev_content_format );
} else {
// probably using $wgContentHandlerUseDB = false;
// @todo test!
$content_handler = ContentHandler::getForModelID( $content_model );
$content_format = $content_handler->getDefaultFormat();
}
$out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );

View file

@ -430,6 +430,20 @@ abstract class ContentHandler {
*/
abstract public function serializeContent( Content $content, $format = null );
/**
* Applies transformations on export (returns the blob unchanged per default).
* Subclasses may override this to perform transformations such as conversion
* of legacy formats or filtering of internal meta-data.
*
* @param string $blob The blob to be exported
* @param string|null $format The blob's serialization format
*
* @return string
*/
public function exportTransform( $blob, $format = null ) {
return $blob;
}
/**
* Unserializes a Content object of the type supported by this ContentHandler.
*