'xmp', # Artist is unofficial. Author is the recommended # keyword in the PNG spec. However some people output # Artist so support both. 'artist' => 'Artist', 'model' => 'Model', 'make' => 'Make', 'author' => 'Artist', 'comment' => 'PNGFileComment', 'description' => 'ImageDescription', 'title' => 'ObjectName', 'copyright' => 'Copyright', # Source as in original device used to make image # not as in who gave you the image 'source' => 'Model', 'software' => 'Software', 'disclaimer' => 'Disclaimer', 'warning' => 'ContentWarning', 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. 'label' => 'Label', 'creation time' => 'DateTimeDigitized', /* Other potentially useful things - Document */ ]; $frameCount = 0; $loopCount = 1; $text = []; $duration = 0.0; $width = 0; $height = 0; $bitDepth = 0; $colorType = 'unknown'; if ( !$filename ) { throw new Exception( __METHOD__ . ": No file name specified" ); } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { throw new Exception( __METHOD__ . ": File $filename does not exist" ); } $fh = fopen( $filename, 'rb' ); if ( !$fh ) { throw new Exception( __METHOD__ . ": Unable to open file $filename" ); } // Check for the PNG header $buf = self::read( $fh, 8 ); if ( $buf != self::$pngSig ) { throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" ); } // Read chunks while ( !feof( $fh ) ) { $buf = self::read( $fh, 4 ); $chunk_size = unpack( "N", $buf )[1]; if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) { wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size . ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE ); if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) { throw new Exception( __METHOD__ . ': seek error' ); } continue; } $chunk_type = self::read( $fh, 4 ); $buf = self::read( $fh, $chunk_size ); $crc = self::read( $fh, self::$crcSize ); $computed = crc32( $chunk_type . $buf ); if ( pack( 'N', $computed ) !== $crc ) { wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' ); continue; } if ( $chunk_type == "IHDR" ) { $width = unpack( 'N', substr( $buf, 0, 4 ) )[1]; $height = unpack( 'N', substr( $buf, 4, 4 ) )[1]; $bitDepth = ord( substr( $buf, 8, 1 ) ); // Detect the color type in British English as per the spec // https://www.w3.org/TR/PNG/#11IHDR switch ( ord( substr( $buf, 9, 1 ) ) ) { case 0: $colorType = 'greyscale'; break; case 2: $colorType = 'truecolour'; break; case 3: $colorType = 'index-coloured'; break; case 4: $colorType = 'greyscale-alpha'; break; case 6: $colorType = 'truecolour-alpha'; break; default: $colorType = 'unknown'; break; } } elseif ( $chunk_type == "acTL" ) { if ( $chunk_size < 4 ) { wfDebug( __METHOD__ . ": acTL chunk too small" ); continue; } $actl = unpack( "Nframes/Nplays", $buf ); $frameCount = $actl['frames']; $loopCount = $actl['plays']; } elseif ( $chunk_type == "fcTL" ) { $buf = substr( $buf, 20 ); if ( strlen( $buf ) < 4 ) { wfDebug( __METHOD__ . ": fcTL chunk too small" ); continue; } $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); if ( $fctldur['delay_den'] == 0 ) { $fctldur['delay_den'] = 100; } if ( $fctldur['delay_num'] ) { $duration += $fctldur['delay_num'] / $fctldur['delay_den']; } } elseif ( $chunk_type == "iTXt" ) { // Extracts iTXt chunks, uncompressing if necessary. $items = []; if ( preg_match( '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', $buf, $items ) ) { /* $items[1] = text chunk name, $items[2] = compressed flag, * $items[3] = lang code (or ""), $items[4]= compression type. * $items[5] = content */ // Theoretically should be case-sensitive, but in practise... $items[1] = strtolower( $items[1] ); if ( !isset( self::$textChunks[$items[1]] ) ) { // Only extract textual chunks on our list. continue; } $items[3] = strtolower( $items[3] ); if ( $items[3] == '' ) { // if no lang specified use x-default like in xmp. $items[3] = 'x-default'; } // if compressed if ( $items[2] == "\x01" ) { if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { AtEase::suppressWarnings(); $items[5] = gzuncompress( $items[5] ); AtEase::restoreWarnings(); if ( $items[5] === false ) { // decompression failed wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] ); continue; } } else { wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' . " or potentially invalid compression method" ); continue; } } $finalKeyword = self::$textChunks[$items[1]]; $text[$finalKeyword][$items[3]] = $items[5]; $text[$finalKeyword]['_type'] = 'lang'; } else { // Error reading iTXt chunk wfDebug( __METHOD__ . ": Invalid iTXt chunk" ); } } elseif ( $chunk_type == 'tEXt' ) { // In case there is no \x00 which will make explode fail. if ( strpos( $buf, "\x00" ) === false ) { wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" ); continue; } list( $keyword, $content ) = explode( "\x00", $buf, 2 ); if ( $keyword === '' ) { wfDebug( __METHOD__ . ": Empty tEXt keyword" ); continue; } // Theoretically should be case-sensitive, but in practise... $keyword = strtolower( $keyword ); if ( !isset( self::$textChunks[$keyword] ) ) { // Don't recognize chunk, so skip. continue; } AtEase::suppressWarnings(); $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); AtEase::restoreWarnings(); if ( $content === false ) { wfDebug( __METHOD__ . ": Read error (error with iconv)" ); continue; } $finalKeyword = self::$textChunks[$keyword]; $text[$finalKeyword]['x-default'] = $content; $text[$finalKeyword]['_type'] = 'lang'; } elseif ( $chunk_type == 'zTXt' ) { if ( function_exists( 'gzuncompress' ) ) { // In case there is no \x00 which will make explode fail. if ( strpos( $buf, "\x00" ) === false ) { wfDebug( __METHOD__ . ": No null byte in zTXt chunk" ); continue; } list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 ); if ( $keyword === '' || $postKeyword === '' ) { wfDebug( __METHOD__ . ": Empty zTXt chunk" ); continue; } // Theoretically should be case-sensitive, but in practise... $keyword = strtolower( $keyword ); if ( !isset( self::$textChunks[$keyword] ) ) { // Don't recognize chunk, so skip. continue; } $compression = substr( $postKeyword, 0, 1 ); $content = substr( $postKeyword, 1 ); if ( $compression !== "\x00" ) { wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." ); continue; } AtEase::suppressWarnings(); $content = gzuncompress( $content ); AtEase::restoreWarnings(); if ( $content === false ) { // decompression failed wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword ); continue; } AtEase::suppressWarnings(); $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); AtEase::restoreWarnings(); if ( $content === false ) { wfDebug( __METHOD__ . ": iconv error in zTXt chunk" ); continue; } $finalKeyword = self::$textChunks[$keyword]; $text[$finalKeyword]['x-default'] = $content; $text[$finalKeyword]['_type'] = 'lang'; } else { wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." ); } } elseif ( $chunk_type == 'tIME' ) { // last mod timestamp. if ( $chunk_size !== 7 ) { wfDebug( __METHOD__ . ": tIME wrong size" ); continue; } // Note: spec says this should be UTC. $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", $t['y'], $t['m'], $t['d'], $t['h'], $t['min'], $t['s'] ); $exifTime = wfTimestamp( TS_EXIF, $strTime ); if ( $exifTime ) { $text['DateTime'] = $exifTime; } } elseif ( $chunk_type == 'pHYs' ) { // how big pixels are (dots per meter). if ( $chunk_size !== 9 ) { wfDebug( __METHOD__ . ": pHYs wrong size" ); continue; } $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); if ( $dim['unit'] == 1 ) { // Need to check for negative because php // doesn't deal with super-large unsigned 32-bit ints well if ( $dim['width'] > 0 && $dim['height'] > 0 ) { // unit is meters // (as opposed to 0 = undefined ) $text['XResolution'] = $dim['width'] . '/100'; $text['YResolution'] = $dim['height'] . '/100'; $text['ResolutionUnit'] = 3; // 3 = dots per cm (from Exif). } } } elseif ( $chunk_type == "IEND" ) { break; } } fclose( $fh ); if ( $loopCount > 1 ) { $duration *= $loopCount; } if ( isset( $text['DateTimeDigitized'] ) ) { // Convert date format from rfc2822 to exif. foreach ( $text['DateTimeDigitized'] as $name => &$value ) { if ( $name === '_type' ) { continue; } // @todo FIXME: Currently timezones are ignored. // possibly should be wfTimestamp's // responsibility. (at least for numeric TZ) $formatted = wfTimestamp( TS_EXIF, $value ); if ( $formatted ) { // Only change if we could convert the // date. // The png standard says it should be // in rfc2822 format, but not required. // In general for the exif stuff we // prettify the date if we can, but we // display as-is if we cannot or if // it is invalid. // So do the same here. $value = $formatted; } } } return [ 'width' => $width, 'height' => $height, 'frameCount' => $frameCount, 'loopCount' => $loopCount, 'duration' => $duration, 'text' => $text, 'bitDepth' => $bitDepth, 'colorType' => $colorType, ]; } /** * Read a chunk, checking to make sure its not too big. * * @param resource $fh The file handle * @param int $size Size in bytes. * @throws Exception If too big * @return string The chunk. */ private static function read( $fh, $size ) { if ( $size === 0 ) { return ''; } $result = fread( $fh, $size ); if ( $result === false ) { throw new Exception( __METHOD__ . ': read error' ); } if ( strlen( $result ) < $size ) { throw new Exception( __METHOD__ . ': unexpected end of file' ); } return $result; } }