Store original media dimensions as additional header

For storage repos that support headers (such as Swift), this will store the original
media dimensions as an extra custom header, X-Content-Dimensions.
The header is formatted to minimize its length when dealing with multipage
documents, by expressing the information as page ranges keyed by dimensions.

Example for a multipage documents with some pages of different sizes:
X-Content-Dimensions: 1903x899:1-9,11/1903x873:10

Example for a single page document:
X-Content-Dimensions: 800x600:1

Bug: T150741
Change-Id: Ic4c6a86557b3705cf75d074753e9ce2ee070a6df
This commit is contained in:
Gilles Dubuc 2017-04-26 14:01:49 +02:00 committed by Aaron Schulz
parent c22d28b17a
commit cdfe08439c
22 changed files with 200 additions and 32 deletions

View file

@ -24,6 +24,9 @@ production.
ParserOptions::setWrapOutputClass().
* Added 'ChangeTagsAllowedAdd' hook, enabling extensions to allow software-
specific tags to be added by users.
* File storage backends that supports headers (eg. Swift) now store an
X-Content-Dimensions header for originals that contain the media's dimensions
as page ranges keyed by dimensions.
=== External library changes in 1.30 ===
@ -61,6 +64,8 @@ changes to languages because of Phabricator reports.
* mw.user.bucket (deprecated in 1.23) was removed.
* LoadBalancer::getServerInfo() and LoadBalancer::setServerInfo() are
deprecated. There are no known callers.
* File::getStreamHeaders() was deprecated.
* MediaHandler::getStreamHeaders() was deprecated.
== Compatibility ==
MediaWiki 1.30 requires PHP 5.5.9 or later. There is experimental support for

View file

@ -2149,15 +2149,32 @@ abstract class File implements IDBAccessObject {
}
/**
* @return array HTTP header name/value map to use for HEAD/GET request responses
* @deprecated since 1.30, use File::getContentHeaders instead
*/
function getStreamHeaders() {
wfDeprecated( __METHOD__, '1.30' );
return $this->getContentHeaders();
}
/**
* @return array HTTP header name/value map to use for HEAD/GET request responses
* @since 1.30
*/
function getContentHeaders() {
$handler = $this->getHandler();
if ( $handler ) {
return $handler->getStreamHeaders( $this->getMetadata() );
} else {
return [];
$metadata = $this->getMetadata();
if ( $metadata ) {
if ( is_string( $metadata ) ) {
$metadata = MediaWiki\quietCall( 'unserialize', $metadata );
}
return $handler->getContentHeaders( $metadata );
}
}
return [];
}
/**

View file

@ -1200,7 +1200,9 @@ class LocalFile extends File {
$options = [];
$handler = MediaHandler::getHandler( $props['mime'] );
if ( $handler ) {
$options['headers'] = $handler->getStreamHeaders( $props['metadata'] );
$metadata = MediaWiki\quietCall( 'unserialize', $props['metadata'] );
$options['headers'] = $handler->getContentHeaders( $metadata );
} else {
$options['headers'] = [];
}

View file

@ -1250,7 +1250,7 @@ abstract class FileBackendStore extends FileBackend {
* @return array
*/
protected function sanitizeOpHeaders( array $op ) {
static $longs = [ 'content-disposition' ];
static $longs = [ 'content-disposition', 'x-content-dimensions' ];
if ( isset( $op['headers'] ) ) { // op sets HTTP headers
$newHeaders = [];

View file

@ -117,6 +117,11 @@ class Exif {
* @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
*/
$this->mExifTags = [
'COMPUTED' => [
'Width' => Exif::SHORT_OR_LONG, # Image width
'Height' => Exif::SHORT_OR_LONG, # Image height
],
# TIFF Rev. 6.0 Attribute Information (p22)
'IFD0' => [
# Tags relating to image structure

View file

@ -242,4 +242,21 @@ class ExifBitmapHandler extends BitmapHandler {
return 0;
}
/**
* Get useful response headers for GET/HEAD requests for a file with the given metadata
* @param $metadata Array Contains this handler's unserialized getMetadata() for a file
* @return Array
*/
public function getContentHeaders( $metadata ) {
if ( !isset( $metadata['Width'] ) || !isset( $metadata['Height'] ) ) {
return [];
}
$dimensionsMetadata = [];
$dimensionsMetadata['width'] = $metadata['Width'];
$dimensionsMetadata['height'] = $metadata['Height'];
return parent::getContentHeaders( $dimensionsMetadata );
}
}

View file

@ -101,6 +101,9 @@ class FormatMetadata extends ContextSource {
public function makeFormattedData( $tags ) {
$resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
unset( $tags['ResolutionUnit'] );
// Width and height are for internal use and already available & displayed outside of metadata
unset( $tags['Width'] );
unset( $tags['Height'] );
foreach ( $tags as $tag => &$vals ) {
// This seems ugly to wrap non-array's in an array just to unwrap again,

View file

@ -54,9 +54,9 @@ class GIFMetadataExtractor {
* @return array
*/
static function getMetadata( $filename ) {
self::$gifFrameSep = pack( "C", ord( "," ) );
self::$gifExtensionSep = pack( "C", ord( "!" ) );
self::$gifTerm = pack( "C", ord( ";" ) );
self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
$frameCount = 0;
$duration = 0.0;
@ -82,8 +82,11 @@ class GIFMetadataExtractor {
throw new Exception( "Not a valid GIF file; header: $buf" );
}
// Skip over width and height.
fread( $fh, 4 );
// Read width and height.
$buf = fread( $fh, 2 );
$width = unpack( 'v', $buf )[1];
$buf = fread( $fh, 2 );
$height = unpack( 'v', $buf )[1];
// Read BPP
$buf = fread( $fh, 1 );
@ -251,6 +254,8 @@ class GIFMetadataExtractor {
'duration' => $duration,
'xmp' => $xmp,
'comment' => $comment,
'width' => $width,
'height' => $height,
];
}

View file

@ -305,13 +305,11 @@ abstract class MediaHandler {
}
/**
* Get useful response headers for GET/HEAD requests for a file with the given metadata
*
* @param mixed $metadata Result of the getMetadata() function of this handler for a file
* @return array
* @deprecated since 1.30, use MediaHandler::getContentHeaders instead
*/
public function getStreamHeaders( $metadata ) {
return [];
wfDeprecated( __METHOD__, '1.30' );
return $this->getContentHeaders( $metadata );
}
/**
@ -868,4 +866,68 @@ abstract class MediaHandler {
public function getWarningConfig( $file ) {
return null;
}
/**
* Converts a dimensions array about a potentially multipage document from an
* exhaustive list of ordered page numbers to a list of page ranges
* @param Array $pagesByDimensions
* @return String
* @since 1.30
*/
public static function getPageRangesByDimensions( $pagesByDimensions ) {
$pageRangesByDimensions = [];
foreach ( $pagesByDimensions as $dimensions => $pageList ) {
$ranges = [];
$firstPage = $pageList[0];
$lastPage = $firstPage - 1;
foreach ( $pageList as $page ) {
if ( $page > $lastPage + 1 ) {
if ( $firstPage != $lastPage ) {
$ranges[] = "$firstPage-$lastPage";
} else {
$ranges[] = "$firstPage";
}
$firstPage = $page;
}
$lastPage = $page;
}
if ( $firstPage != $lastPage ) {
$ranges[] = "$firstPage-$lastPage";
} else {
$ranges[] = "$firstPage";
}
$pageRangesByDimensions[ $dimensions ] = $ranges;
}
$dimensionsString = [];
foreach ( $pageRangesByDimensions as $dimensions => $pageRanges ) {
$dimensionsString[] = "$dimensions:" . implode( ',', $pageRanges );
}
return implode( '/', $dimensionsString );
}
/**
* Get useful response headers for GET/HEAD requests for a file with the given metadata
* @param $metadata Array Contains this handler's unserialized getMetadata() for a file
* @return Array
* @since 1.30
*/
public function getContentHeaders( $metadata ) {
if ( !isset( $metadata['width'] ) || !isset( $metadata['height'] ) ) {
return [];
}
$dimensionString = $metadata['width'] . 'x' . $metadata['height'];
$pagesByDimensions = [ $dimensionString => [ 1 ] ];
$pageRangesByDimensions = MediaHandler::getPageRangesByDimensions( $pagesByDimensions );
return [ 'X-Content-Dimensions' => $pageRangesByDimensions ];
}
}

View file

@ -121,6 +121,8 @@ class PNGMetadataExtractor {
if ( !$buf || strlen( $buf ) < $chunk_size ) {
throw new Exception( __METHOD__ . ": Read error" );
}
$width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
$height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
$bitDepth = ord( substr( $buf, 8, 1 ) );
// Detect the color type in British English as per the spec
// https://www.w3.org/TR/PNG/#11IHDR
@ -404,6 +406,8 @@ class PNGMetadataExtractor {
'text' => $text,
'bitDepth' => $bitDepth,
'colorType' => $colorType,
'width' => $width,
'height' => $height,
];
}

View file

@ -175,6 +175,9 @@ class XCFHandler extends BitmapHandler {
$metadata['colorType'] = 'unknown';
}
$metadata['width'] = $header['width'];
$metadata['height'] = $header['height'];
} else {
// Marker to prevent repeated attempted extraction
$metadata['error'] = true;

View file

@ -307,7 +307,9 @@ class ImportImages extends Maintenance {
$publishOptions = [];
$handler = MediaHandler::getHandler( $props['mime'] );
if ( $handler ) {
$publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] );
$metadata = MediaWiki\quietCall( 'unserialize', $props['metadata'] );
$publishOptions['headers'] = $handler->getContentHeaders( $metadata );
} else {
$publishOptions['headers'] = [];
}

View file

@ -57,13 +57,13 @@ class RefreshFileHeaders extends Maintenance {
__METHOD__, [ 'LIMIT' => $this->mBatchSize, 'ORDER BY' => 'img_name ASC' ] );
foreach ( $res as $row ) {
$file = $repo->newFileFromRow( $row );
$headers = $file->getStreamHeaders();
$headers = $file->getContentHeaders();
if ( count( $headers ) ) {
$this->updateFileHeaders( $file, $headers );
}
// Do all of the older file versions...
foreach ( $file->getHistory() as $oldFile ) {
$headers = $oldFile->getStreamHeaders();
$headers = $oldFile->getContentHeaders();
if ( count( $headers ) ) {
$this->updateFileHeaders( $oldFile, $headers );
}

View file

@ -142,6 +142,8 @@ class BitmapMetadataHandlerTest extends MediaWikiTestCase {
'SerialNumber' => '123456789',
'_MW_PNG_VERSION' => 1,
],
'width' => 50,
'height' => 50,
];
$this->assertEquals( $expected, $result );
}

View file

@ -29,6 +29,8 @@ class ExifTest extends MediaWikiTestCase {
'GPSAltitude' => -3.141592653,
'GPSDOP' => '5/1',
'GPSVersionID' => '2.2.0.0',
'Height' => 10,
'Width' => 40,
];
$this->assertEquals( $expected, $data, '', 0.0000000001 );
}
@ -40,7 +42,9 @@ class ExifTest extends MediaWikiTestCase {
$data = $exif->getFilteredData();
$expected = [
'UserComment' => 'test⁔comment'
'UserComment' => 'test⁔comment',
'Height' => 10,
'Width' => 40,
];
$this->assertEquals( $expected, $data );
}

View file

@ -83,6 +83,8 @@ EOF;
'frameCount' => 1,
'looped' => false,
'xmp' => '',
'width' => 45,
'height' => 30,
]
],
[
@ -93,6 +95,8 @@ EOF;
'frameCount' => 4,
'looped' => true,
'xmp' => '',
'width' => 45,
'height' => 30,
]
],
@ -104,6 +108,8 @@ EOF;
'frameCount' => 4,
'looped' => true,
'comment' => [ 'GIƒ·test·file' ],
'width' => 45,
'height' => 30,
]
],
];

View file

@ -79,7 +79,7 @@ class GIFHandlerTest extends MediaWikiMediaTestCase {
[ 'Something invalid!', GIFHandler::METADATA_BAD ],
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
[
'a:4:{s:10:"frameCount";i:1;s:6:"looped";b:0;s:8:"duration";d:0.1000000000000000055511151231257827021181583404541015625;s:8:"metadata";a:2:{s:14:"GIFFileComment";a:1:{i:0;s:35:"GIF test file ⁕ Created with GIMP";}s:15:"_MW_GIF_VERSION";i:1;}}',
'a:6:{s:10:"frameCount";i:1;s:6:"looped";b:0;s:8:"duration";d:0.1000000000000000055511151231257827021181583404541015625;s:8:"metadata";a:2:{s:14:"GIFFileComment";a:1:{i:0;s:35:"GIF test file ⁕ Created with GIMP";}s:15:"_MW_GIF_VERSION";i:1;}s:5:"width";i:45;s:6:"height";i:30;}',
GIFHandler::METADATA_GOOD
],
// @codingStandardsIgnoreEnd
@ -103,11 +103,11 @@ class GIFHandlerTest extends MediaWikiMediaTestCase {
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
[
'nonanimated.gif',
'a:4:{s:10:"frameCount";i:1;s:6:"looped";b:0;s:8:"duration";d:0.1000000000000000055511151231257827021181583404541015625;s:8:"metadata";a:2:{s:14:"GIFFileComment";a:1:{i:0;s:35:"GIF test file ⁕ Created with GIMP";}s:15:"_MW_GIF_VERSION";i:1;}}'
'a:6:{s:10:"frameCount";i:1;s:6:"looped";b:0;s:8:"duration";d:0.1000000000000000055511151231257827021181583404541015625;s:8:"metadata";a:2:{s:14:"GIFFileComment";a:1:{i:0;s:35:"GIF test file ⁕ Created with GIMP";}s:15:"_MW_GIF_VERSION";i:1;}s:5:"width";i:45;s:6:"height";i:30;}'
],
[
'animated-xmp.gif',
'a:4:{s:10:"frameCount";i:4;s:6:"looped";b:1;s:8:"duration";d:2.399999999999999911182158029987476766109466552734375;s:8:"metadata";a:5:{s:6:"Artist";s:7:"Bawolff";s:16:"ImageDescription";a:2:{s:9:"x-default";s:18:"A file to test GIF";s:5:"_type";s:4:"lang";}s:15:"SublocationDest";s:13:"The interwebs";s:14:"GIFFileComment";a:1:{i:0;s:16:"GIƒ·test·file";}s:15:"_MW_GIF_VERSION";i:1;}}'
'a:6:{s:10:"frameCount";i:4;s:6:"looped";b:1;s:8:"duration";d:2.399999999999999911182158029987476766109466552734375;s:8:"metadata";a:5:{s:6:"Artist";s:7:"Bawolff";s:16:"ImageDescription";a:2:{s:9:"x-default";s:18:"A file to test GIF";s:5:"_type";s:4:"lang";}s:15:"SublocationDest";s:13:"The interwebs";s:14:"GIFFileComment";a:1:{i:0;s:16:"GIƒ·test·file";}s:15:"_MW_GIF_VERSION";i:1;}s:5:"width";i:45;s:6:"height";i:30;}'
],
// @codingStandardsIgnoreEnd
];

View file

@ -25,7 +25,7 @@ class JpegTest extends MediaWikiMediaTestCase {
$file = $this->dataFile( 'test.jpg', 'image/jpeg' );
$res = $this->handler->getMetadata( $file, $this->filePath . 'test.jpg' );
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
$expected = 'a:7:{s:16:"ImageDescription";s:9:"Test file";s:11:"XResolution";s:4:"72/1";s:11:"YResolution";s:4:"72/1";s:14:"ResolutionUnit";i:2;s:16:"YCbCrPositioning";i:1;s:15:"JPEGFileComment";a:1:{i:0;s:17:"Created with GIMP";}s:22:"MEDIAWIKI_EXIF_VERSION";i:2;}';
$expected = 'a:9:{s:16:"ImageDescription";s:9:"Test file";s:11:"XResolution";s:4:"72/1";s:11:"YResolution";s:4:"72/1";s:14:"ResolutionUnit";i:2;s:16:"YCbCrPositioning";i:1;s:15:"JPEGFileComment";a:1:{i:0;s:17:"Created with GIMP";}s:22:"MEDIAWIKI_EXIF_VERSION";i:2;s:5:"Width";i:20;s:6:"Height";i:20;}';
// @codingStandardsIgnoreEnd
// Unserialize in case serialization format ever changes.
@ -39,6 +39,8 @@ class JpegTest extends MediaWikiMediaTestCase {
$file = $this->dataFile( 'test.jpg', 'image/jpeg' );
$res = $this->handler->getCommonMetaArray( $file );
$expected = [
'Height' => 20,
'Width' => 20,
'ImageDescription' => 'Test file',
'XResolution' => '72/1',
'YResolution' => '72/1',

View file

@ -65,4 +65,27 @@ class MediaHandlerTest extends MediaWikiTestCase {
}
return $result;
}
/**
* @covers MediaHandler::getPageRangesByDimensions
*
* @dataProvider provideTestGetPageRangesByDimensions
*/
public function testGetPageRangesByDimensions( $pagesByDimensions, $expected ) {
$this->assertEquals( $expected, MediaHandler::getPageRangesByDimensions( $pagesByDimensions ) );
}
public static function provideTestGetPageRangesByDimensions() {
return [
[ [ '123x456' => [ 1 ] ], '123x456:1' ],
[ [ '123x456' => [ 1, 2 ] ], '123x456:1-2' ],
[ [ '123x456' => [ 1, 2, 3 ] ], '123x456:1-3' ],
[ [ '123x456' => [ 1, 2, 3, 5 ] ], '123x456:1-3,5' ],
[ [ '123x456' => [ 1, 3 ] ], '123x456:1,3' ],
[ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ] ], '123x456:1-3,5-7' ],
[ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ],
'789x789' => [ 4, 8, 9 ] ], '123x456:1-3,5-7/789x789:4,8-9'
],
];
}
}

View file

@ -80,7 +80,7 @@ class PNGHandlerTest extends MediaWikiMediaTestCase {
[ 'Something invalid!', PNGHandler::METADATA_BAD ],
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
[
'a:6:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:8;s:9:"colorType";s:10:"truecolour";s:8:"metadata";a:1:{s:15:"_MW_PNG_VERSION";i:1;}}',
'a:8:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:8;s:9:"colorType";s:10:"truecolour";s:5:"width";i:50;s:6:"height";i:50;s:8:"metadata";a:1:{s:15:"_MW_PNG_VERSION";i:1;}}',
PNGHandler::METADATA_GOOD
],
// @codingStandardsIgnoreEnd
@ -105,11 +105,11 @@ class PNGHandlerTest extends MediaWikiMediaTestCase {
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
[
'rgb-na-png.png',
'a:6:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:8;s:9:"colorType";s:10:"truecolour";s:8:"metadata";a:1:{s:15:"_MW_PNG_VERSION";i:1;}}'
'a:8:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:8;s:9:"colorType";s:10:"truecolour";s:5:"width";i:50;s:6:"height";i:50;s:8:"metadata";a:1:{s:15:"_MW_PNG_VERSION";i:1;}}'
],
[
'xmp.png',
'a:6:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:1;s:9:"colorType";s:14:"index-coloured";s:8:"metadata";a:2:{s:12:"SerialNumber";s:9:"123456789";s:15:"_MW_PNG_VERSION";i:1;}}'
'a:8:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:1;s:9:"colorType";s:14:"index-coloured";s:5:"width";i:50;s:6:"height";i:50;s:8:"metadata";a:2:{s:12:"SerialNumber";s:9:"123456789";s:15:"_MW_PNG_VERSION";i:1;}}'
],
// @codingStandardsIgnoreEnd
];

View file

@ -35,7 +35,7 @@ class TiffTest extends MediaWikiTestCase {
$res = $this->handler->getMetadata( null, $this->filePath . 'test.tiff' );
// @codingStandardsIgnoreStart Ignore Generic.Files.LineLength.TooLong
$expected = 'a:16:{s:10:"ImageWidth";i:20;s:11:"ImageLength";i:20;s:13:"BitsPerSample";a:3:{i:0;i:8;i:1;i:8;i:2;i:8;}s:11:"Compression";i:5;s:25:"PhotometricInterpretation";i:2;s:16:"ImageDescription";s:17:"Created with GIMP";s:12:"StripOffsets";i:8;s:11:"Orientation";i:1;s:15:"SamplesPerPixel";i:3;s:12:"RowsPerStrip";i:64;s:15:"StripByteCounts";i:238;s:11:"XResolution";s:19:"1207959552/16777216";s:11:"YResolution";s:19:"1207959552/16777216";s:19:"PlanarConfiguration";i:1;s:14:"ResolutionUnit";i:2;s:22:"MEDIAWIKI_EXIF_VERSION";i:2;}';
$expected = 'a:18:{s:10:"ImageWidth";i:20;s:11:"ImageLength";i:20;s:13:"BitsPerSample";a:3:{i:0;i:8;i:1;i:8;i:2;i:8;}s:11:"Compression";i:5;s:25:"PhotometricInterpretation";i:2;s:16:"ImageDescription";s:17:"Created with GIMP";s:12:"StripOffsets";i:8;s:11:"Orientation";i:1;s:15:"SamplesPerPixel";i:3;s:12:"RowsPerStrip";i:64;s:15:"StripByteCounts";i:238;s:11:"XResolution";s:19:"1207959552/16777216";s:11:"YResolution";s:19:"1207959552/16777216";s:19:"PlanarConfiguration";i:1;s:14:"ResolutionUnit";i:2;s:22:"MEDIAWIKI_EXIF_VERSION";i:2;s:5:"Width";i:20;s:6:"Height";i:20;}';
// @codingStandardsIgnoreEnd
// Re-unserialize in case there are subtle differences between how versions

View file

@ -69,9 +69,15 @@ class XCFHandlerTest extends MediaWikiMediaTestCase {
public static function provideGetMetadata() {
return [
[ '80x60-2layers.xcf', 'a:1:{s:9:"colorType";s:16:"truecolour-alpha";}' ],
[ '80x60-RGB.xcf', 'a:1:{s:9:"colorType";s:16:"truecolour-alpha";}' ],
[ '80x60-Greyscale.xcf', 'a:1:{s:9:"colorType";s:15:"greyscale-alpha";}' ],
[ '80x60-2layers.xcf',
'a:3:{s:9:"colorType";s:16:"truecolour-alpha";s:5:"width";i:80;s:6:"height";i:60;}'
],
[ '80x60-RGB.xcf',
'a:3:{s:9:"colorType";s:16:"truecolour-alpha";s:5:"width";i:80;s:6:"height";i:60;}'
],
[ '80x60-Greyscale.xcf',
'a:3:{s:9:"colorType";s:15:"greyscale-alpha";s:5:"width";i:80;s:6:"height";i:60;}'
],
];
}
}