* Factor out file read errors and unexpected EOF errors. * For errors relating to chunk content, instead of throwing an exception which is silently discarded, just log an error and continue to the next chunk. This allows the dimensions to be extracted even if other metadata is mangled. * As an additional sanity check, verify the CRC of each chunk. Bug: T286273 Change-Id: I11d0186496324e0bb1bb0a143f438e0368a8e902
419 lines
12 KiB
PHP
419 lines
12 KiB
PHP
<?php
|
|
/**
|
|
* PNG frame counter and metadata extractor.
|
|
*
|
|
* Slightly derived from GIFMetadataExtractor.php
|
|
* Deliberately not using MWExceptions to avoid external dependencies, encouraging
|
|
* redistribution.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Media
|
|
*/
|
|
|
|
/**
|
|
* PNG frame counter.
|
|
*
|
|
* @ingroup Media
|
|
*/
|
|
class PNGMetadataExtractor {
|
|
/** @var string */
|
|
private static $pngSig;
|
|
|
|
/** @var int */
|
|
private static $crcSize;
|
|
|
|
/** @var array */
|
|
private static $textChunks;
|
|
|
|
public const VERSION = 1;
|
|
private const MAX_CHUNK_SIZE = 3145728; // 3 mebibytes
|
|
|
|
public static function getMetadata( $filename ) {
|
|
self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
|
|
self::$crcSize = 4;
|
|
/* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
|
|
* and https://www.w3.org/TR/PNG/#11keywords
|
|
*/
|
|
self::$textChunks = [
|
|
'xml:com.adobe.xmp' => 'xmp',
|
|
# Artist is unofficial. Author is the recommended
|
|
# keyword in the PNG spec. However some people output
|
|
# Artist so support both.
|
|
'artist' => 'Artist',
|
|
'model' => 'Model',
|
|
'make' => 'Make',
|
|
'author' => 'Artist',
|
|
'comment' => 'PNGFileComment',
|
|
'description' => 'ImageDescription',
|
|
'title' => 'ObjectName',
|
|
'copyright' => 'Copyright',
|
|
# Source as in original device used to make image
|
|
# not as in who gave you the image
|
|
'source' => 'Model',
|
|
'software' => 'Software',
|
|
'disclaimer' => 'Disclaimer',
|
|
'warning' => 'ContentWarning',
|
|
'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
|
|
'label' => 'Label',
|
|
'creation time' => 'DateTimeDigitized',
|
|
/* Other potentially useful things - Document */
|
|
];
|
|
|
|
$frameCount = 0;
|
|
$loopCount = 1;
|
|
$text = [];
|
|
$duration = 0.0;
|
|
$width = 0;
|
|
$height = 0;
|
|
$bitDepth = 0;
|
|
$colorType = 'unknown';
|
|
|
|
if ( !$filename ) {
|
|
throw new Exception( __METHOD__ . ": No file name specified" );
|
|
} elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
|
|
throw new Exception( __METHOD__ . ": File $filename does not exist" );
|
|
}
|
|
|
|
$fh = fopen( $filename, 'rb' );
|
|
|
|
if ( !$fh ) {
|
|
throw new Exception( __METHOD__ . ": Unable to open file $filename" );
|
|
}
|
|
|
|
// Check for the PNG header
|
|
$buf = self::read( $fh, 8 );
|
|
if ( $buf != self::$pngSig ) {
|
|
throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
|
|
}
|
|
|
|
// Read chunks
|
|
while ( !feof( $fh ) ) {
|
|
$buf = self::read( $fh, 4 );
|
|
$chunk_size = unpack( "N", $buf )[1];
|
|
|
|
if ( $chunk_size < 0 ) {
|
|
throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
|
|
}
|
|
|
|
$chunk_type = self::read( $fh, 4 );
|
|
$buf = self::read( $fh, $chunk_size );
|
|
$crc = self::read( $fh, self::$crcSize );
|
|
$computed = crc32( $chunk_type . $buf );
|
|
if ( pack( 'N', $computed ) !== $crc ) {
|
|
wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' );
|
|
continue;
|
|
}
|
|
|
|
if ( $chunk_type == "IHDR" ) {
|
|
$width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
|
|
$height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
|
|
$bitDepth = ord( substr( $buf, 8, 1 ) );
|
|
// Detect the color type in British English as per the spec
|
|
// https://www.w3.org/TR/PNG/#11IHDR
|
|
switch ( ord( substr( $buf, 9, 1 ) ) ) {
|
|
case 0:
|
|
$colorType = 'greyscale';
|
|
break;
|
|
case 2:
|
|
$colorType = 'truecolour';
|
|
break;
|
|
case 3:
|
|
$colorType = 'index-coloured';
|
|
break;
|
|
case 4:
|
|
$colorType = 'greyscale-alpha';
|
|
break;
|
|
case 6:
|
|
$colorType = 'truecolour-alpha';
|
|
break;
|
|
default:
|
|
$colorType = 'unknown';
|
|
break;
|
|
}
|
|
} elseif ( $chunk_type == "acTL" ) {
|
|
if ( $chunk_size < 4 ) {
|
|
wfDebug( __METHOD__ . ": acTL chunk too small" );
|
|
continue;
|
|
}
|
|
|
|
$actl = unpack( "Nframes/Nplays", $buf );
|
|
$frameCount = $actl['frames'];
|
|
$loopCount = $actl['plays'];
|
|
} elseif ( $chunk_type == "fcTL" ) {
|
|
$buf = substr( $buf, 20 );
|
|
if ( strlen( $buf ) < 4 ) {
|
|
wfDebug( __METHOD__ . ": fcTL chunk too small" );
|
|
continue;
|
|
}
|
|
|
|
$fctldur = unpack( "ndelay_num/ndelay_den", $buf );
|
|
if ( $fctldur['delay_den'] == 0 ) {
|
|
$fctldur['delay_den'] = 100;
|
|
}
|
|
if ( $fctldur['delay_num'] ) {
|
|
$duration += $fctldur['delay_num'] / $fctldur['delay_den'];
|
|
}
|
|
} elseif ( $chunk_type == "iTXt" ) {
|
|
// Extracts iTXt chunks, uncompressing if necessary.
|
|
$items = [];
|
|
if ( preg_match(
|
|
'/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
|
|
$buf, $items )
|
|
) {
|
|
/* $items[1] = text chunk name, $items[2] = compressed flag,
|
|
* $items[3] = lang code (or ""), $items[4]= compression type.
|
|
* $items[5] = content
|
|
*/
|
|
|
|
// Theoretically should be case-sensitive, but in practise...
|
|
$items[1] = strtolower( $items[1] );
|
|
if ( !isset( self::$textChunks[$items[1]] ) ) {
|
|
// Only extract textual chunks on our list.
|
|
continue;
|
|
}
|
|
|
|
$items[3] = strtolower( $items[3] );
|
|
if ( $items[3] == '' ) {
|
|
// if no lang specified use x-default like in xmp.
|
|
$items[3] = 'x-default';
|
|
}
|
|
|
|
// if compressed
|
|
if ( $items[2] == "\x01" ) {
|
|
if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
|
|
Wikimedia\suppressWarnings();
|
|
$items[5] = gzuncompress( $items[5] );
|
|
Wikimedia\restoreWarnings();
|
|
|
|
if ( $items[5] === false ) {
|
|
// decompression failed
|
|
wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
|
|
continue;
|
|
}
|
|
} else {
|
|
wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
|
|
. " or potentially invalid compression method" );
|
|
continue;
|
|
}
|
|
}
|
|
$finalKeyword = self::$textChunks[$items[1]];
|
|
$text[$finalKeyword][$items[3]] = $items[5];
|
|
$text[$finalKeyword]['_type'] = 'lang';
|
|
} else {
|
|
// Error reading iTXt chunk
|
|
wfDebug( __METHOD__ . ": Invalid iTXt chunk" );
|
|
}
|
|
} elseif ( $chunk_type == 'tEXt' ) {
|
|
// In case there is no \x00 which will make explode fail.
|
|
if ( strpos( $buf, "\x00" ) === false ) {
|
|
wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" );
|
|
continue;
|
|
}
|
|
|
|
list( $keyword, $content ) = explode( "\x00", $buf, 2 );
|
|
if ( $keyword === '' ) {
|
|
wfDebug( __METHOD__ . ": Empty tEXt keyword" );
|
|
continue;
|
|
}
|
|
|
|
// Theoretically should be case-sensitive, but in practise...
|
|
$keyword = strtolower( $keyword );
|
|
if ( !isset( self::$textChunks[$keyword] ) ) {
|
|
// Don't recognize chunk, so skip.
|
|
continue;
|
|
}
|
|
Wikimedia\suppressWarnings();
|
|
$content = iconv( 'ISO-8859-1', 'UTF-8', $content );
|
|
Wikimedia\restoreWarnings();
|
|
|
|
if ( $content === false ) {
|
|
wfDebug( __METHOD__ . ": Read error (error with iconv)" );
|
|
continue;
|
|
}
|
|
|
|
$finalKeyword = self::$textChunks[$keyword];
|
|
$text[$finalKeyword]['x-default'] = $content;
|
|
$text[$finalKeyword]['_type'] = 'lang';
|
|
} elseif ( $chunk_type == 'zTXt' ) {
|
|
if ( function_exists( 'gzuncompress' ) ) {
|
|
// In case there is no \x00 which will make explode fail.
|
|
if ( strpos( $buf, "\x00" ) === false ) {
|
|
wfDebug( __METHOD__ . ": No null byte in zTXt chunk" );
|
|
continue;
|
|
}
|
|
|
|
list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
|
|
if ( $keyword === '' || $postKeyword === '' ) {
|
|
wfDebug( __METHOD__ . ": Empty zTXt chunk" );
|
|
continue;
|
|
}
|
|
// Theoretically should be case-sensitive, but in practise...
|
|
$keyword = strtolower( $keyword );
|
|
|
|
if ( !isset( self::$textChunks[$keyword] ) ) {
|
|
// Don't recognize chunk, so skip.
|
|
continue;
|
|
}
|
|
$compression = substr( $postKeyword, 0, 1 );
|
|
$content = substr( $postKeyword, 1 );
|
|
if ( $compression !== "\x00" ) {
|
|
wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
|
|
continue;
|
|
}
|
|
|
|
Wikimedia\suppressWarnings();
|
|
$content = gzuncompress( $content );
|
|
Wikimedia\restoreWarnings();
|
|
|
|
if ( $content === false ) {
|
|
// decompression failed
|
|
wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
|
|
continue;
|
|
}
|
|
|
|
Wikimedia\suppressWarnings();
|
|
$content = iconv( 'ISO-8859-1', 'UTF-8', $content );
|
|
Wikimedia\restoreWarnings();
|
|
|
|
if ( $content === false ) {
|
|
wfDebug( __METHOD__ . ": iconv error in zTXt chunk" );
|
|
continue;
|
|
}
|
|
|
|
$finalKeyword = self::$textChunks[$keyword];
|
|
$text[$finalKeyword]['x-default'] = $content;
|
|
$text[$finalKeyword]['_type'] = 'lang';
|
|
} else {
|
|
wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
|
|
}
|
|
} elseif ( $chunk_type == 'tIME' ) {
|
|
// last mod timestamp.
|
|
if ( $chunk_size !== 7 ) {
|
|
wfDebug( __METHOD__ . ": tIME wrong size" );
|
|
continue;
|
|
}
|
|
|
|
// Note: spec says this should be UTC.
|
|
$t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
|
|
$strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
|
|
$t['y'], $t['m'], $t['d'], $t['h'],
|
|
$t['min'], $t['s'] );
|
|
|
|
$exifTime = wfTimestamp( TS_EXIF, $strTime );
|
|
|
|
if ( $exifTime ) {
|
|
$text['DateTime'] = $exifTime;
|
|
}
|
|
} elseif ( $chunk_type == 'pHYs' ) {
|
|
// how big pixels are (dots per meter).
|
|
if ( $chunk_size !== 9 ) {
|
|
wfDebug( __METHOD__ . ": pHYs wrong size" );
|
|
continue;
|
|
}
|
|
|
|
$dim = unpack( "Nwidth/Nheight/Cunit", $buf );
|
|
if ( $dim['unit'] == 1 ) {
|
|
// Need to check for negative because php
|
|
// doesn't deal with super-large unsigned 32-bit ints well
|
|
if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
|
|
// unit is meters
|
|
// (as opposed to 0 = undefined )
|
|
$text['XResolution'] = $dim['width']
|
|
. '/100';
|
|
$text['YResolution'] = $dim['height']
|
|
. '/100';
|
|
$text['ResolutionUnit'] = 3;
|
|
// 3 = dots per cm (from Exif).
|
|
}
|
|
}
|
|
} elseif ( $chunk_type == "IEND" ) {
|
|
break;
|
|
}
|
|
}
|
|
fclose( $fh );
|
|
|
|
if ( $loopCount > 1 ) {
|
|
$duration *= $loopCount;
|
|
}
|
|
|
|
if ( isset( $text['DateTimeDigitized'] ) ) {
|
|
// Convert date format from rfc2822 to exif.
|
|
foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
|
|
if ( $name === '_type' ) {
|
|
continue;
|
|
}
|
|
|
|
// @todo FIXME: Currently timezones are ignored.
|
|
// possibly should be wfTimestamp's
|
|
// responsibility. (at least for numeric TZ)
|
|
$formatted = wfTimestamp( TS_EXIF, $value );
|
|
if ( $formatted ) {
|
|
// Only change if we could convert the
|
|
// date.
|
|
// The png standard says it should be
|
|
// in rfc2822 format, but not required.
|
|
// In general for the exif stuff we
|
|
// prettify the date if we can, but we
|
|
// display as-is if we cannot or if
|
|
// it is invalid.
|
|
// So do the same here.
|
|
|
|
$value = $formatted;
|
|
}
|
|
}
|
|
}
|
|
|
|
return [
|
|
'width' => $width,
|
|
'height' => $height,
|
|
'frameCount' => $frameCount,
|
|
'loopCount' => $loopCount,
|
|
'duration' => $duration,
|
|
'text' => $text,
|
|
'bitDepth' => $bitDepth,
|
|
'colorType' => $colorType,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Read a chunk, checking to make sure its not too big.
|
|
*
|
|
* @param resource $fh The file handle
|
|
* @param int $size Size in bytes.
|
|
* @throws Exception If too big
|
|
* @return string The chunk.
|
|
*/
|
|
private static function read( $fh, $size ) {
|
|
if ( $size > self::MAX_CHUNK_SIZE ) {
|
|
throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
|
|
' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
|
|
}
|
|
if ( $size === 0 ) {
|
|
return '';
|
|
}
|
|
|
|
$result = fread( $fh, $size );
|
|
if ( $result === false ) {
|
|
throw new Exception( __METHOD__ . ': read error' );
|
|
}
|
|
if ( strlen( $result ) < $size ) {
|
|
throw new Exception( __METHOD__ . ': unexpected end of file' );
|
|
}
|
|
return $result;
|
|
}
|
|
}
|