2005-05-21 07:46:17 +00:00
|
|
|
<?php
|
2010-08-08 14:23:14 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Module defining helper functions for detecting and dealing with MIME types.
|
2005-05-21 07:46:17 +00:00
|
|
|
*
|
2012-05-12 20:33:02 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2010-08-08 14:23:14 +00:00
|
|
|
* @file
|
2005-05-21 07:46:17 +00:00
|
|
|
*/
|
|
|
|
|
|
2010-08-08 14:23:14 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Defines a set of well known MIME types
|
2005-05-21 07:46:17 +00:00
|
|
|
* This is used as a fallback to mime.types files.
|
2014-07-24 14:04:48 +00:00
|
|
|
* An extensive list of well known MIME types is provided by
|
2005-05-21 07:46:17 +00:00
|
|
|
* the file mime.types in the includes directory.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* This list concatenated with mime.types is used to create a MIME <-> ext
|
|
|
|
|
* map. Each line contains a MIME type followed by a space separated list of
|
|
|
|
|
* extensions. If multiple extensions for a single MIME type exist or if
|
|
|
|
|
* multiple MIME types exist for a single extension then in most cases
|
|
|
|
|
* MediaWiki assumes that the first extension following the MIME type is the
|
|
|
|
|
* canonical extension, and the first time a MIME type appears for a certain
|
|
|
|
|
* extension is considered the canonical MIME type.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2011-01-07 19:41:38 +00:00
|
|
|
* (Note that appending $wgMimeTypeFile to the end of MM_WELL_KNOWN_MIME_TYPES
|
2012-10-19 20:03:05 +00:00
|
|
|
* sucks because you can't redefine canonical types. This could be fixed by
|
2011-01-07 19:41:38 +00:00
|
|
|
* appending MM_WELL_KNOWN_MIME_TYPES behind $wgMimeTypeFile, but who knows
|
|
|
|
|
* what will break? In practice this probably isn't a problem anyway -- Bryan)
|
2005-05-21 07:46:17 +00:00
|
|
|
*/
|
2013-04-20 22:49:30 +00:00
|
|
|
define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
|
2010-05-02 19:40:33 +00:00
|
|
|
application/ogg ogx ogg ogm ogv oga spx
|
2005-05-21 07:46:17 +00:00
|
|
|
application/pdf pdf
|
2008-09-25 18:43:33 +00:00
|
|
|
application/vnd.oasis.opendocument.chart odc
|
|
|
|
|
application/vnd.oasis.opendocument.chart-template otc
|
2012-01-04 16:24:41 +00:00
|
|
|
application/vnd.oasis.opendocument.database odb
|
2008-09-25 18:43:33 +00:00
|
|
|
application/vnd.oasis.opendocument.formula odf
|
|
|
|
|
application/vnd.oasis.opendocument.formula-template otf
|
|
|
|
|
application/vnd.oasis.opendocument.graphics odg
|
|
|
|
|
application/vnd.oasis.opendocument.graphics-template otg
|
|
|
|
|
application/vnd.oasis.opendocument.image odi
|
|
|
|
|
application/vnd.oasis.opendocument.image-template oti
|
|
|
|
|
application/vnd.oasis.opendocument.presentation odp
|
|
|
|
|
application/vnd.oasis.opendocument.presentation-template otp
|
|
|
|
|
application/vnd.oasis.opendocument.spreadsheet ods
|
|
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template ots
|
|
|
|
|
application/vnd.oasis.opendocument.text odt
|
|
|
|
|
application/vnd.oasis.opendocument.text-master otm
|
2012-01-04 16:24:41 +00:00
|
|
|
application/vnd.oasis.opendocument.text-template ott
|
2008-09-25 18:43:33 +00:00
|
|
|
application/vnd.oasis.opendocument.text-web oth
|
2016-01-26 01:33:24 +00:00
|
|
|
application/javascript js
|
2005-05-21 07:46:17 +00:00
|
|
|
application/x-shockwave-flash swf
|
|
|
|
|
audio/midi mid midi kar
|
|
|
|
|
audio/mpeg mpga mpa mp2 mp3
|
|
|
|
|
audio/x-aiff aif aiff aifc
|
|
|
|
|
audio/x-wav wav
|
2010-05-02 19:40:33 +00:00
|
|
|
audio/ogg oga spx ogg
|
2005-05-21 07:46:17 +00:00
|
|
|
image/x-bmp bmp
|
|
|
|
|
image/gif gif
|
|
|
|
|
image/jpeg jpeg jpg jpe
|
|
|
|
|
image/png png
|
2012-10-19 20:03:05 +00:00
|
|
|
image/svg+xml svg
|
2011-01-07 19:41:38 +00:00
|
|
|
image/svg svg
|
2005-05-21 07:46:17 +00:00
|
|
|
image/tiff tiff tif
|
2011-01-07 19:41:38 +00:00
|
|
|
image/vnd.djvu djvu
|
|
|
|
|
image/x.djvu djvu
|
|
|
|
|
image/x-djvu djvu
|
2007-03-31 17:15:00 +00:00
|
|
|
image/x-portable-pixmap ppm
|
2008-02-05 22:11:36 +00:00
|
|
|
image/x-xcf xcf
|
2005-05-21 07:46:17 +00:00
|
|
|
text/plain txt
|
2006-01-07 13:09:30 +00:00
|
|
|
text/html html htm
|
2010-05-02 19:40:33 +00:00
|
|
|
video/ogg ogv ogm ogg
|
2005-05-21 07:46:17 +00:00
|
|
|
video/mpeg mpg mpeg
|
|
|
|
|
END_STRING
|
|
|
|
|
);
|
|
|
|
|
|
2010-08-08 14:23:14 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Defines a set of well known MIME info entries
|
2005-05-21 07:46:17 +00:00
|
|
|
* This is used as a fallback to mime.info files.
|
2014-07-24 14:04:48 +00:00
|
|
|
* An extensive list of well known MIME types is provided by
|
2005-05-21 07:46:17 +00:00
|
|
|
* the file mime.info in the includes directory.
|
|
|
|
|
*/
|
2013-04-20 22:49:30 +00:00
|
|
|
define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
|
2005-05-21 07:46:17 +00:00
|
|
|
application/pdf [OFFICE]
|
2008-09-25 18:43:33 +00:00
|
|
|
application/vnd.oasis.opendocument.chart [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.chart-template [OFFICE]
|
2012-01-04 16:24:41 +00:00
|
|
|
application/vnd.oasis.opendocument.database [OFFICE]
|
2008-09-25 18:43:33 +00:00
|
|
|
application/vnd.oasis.opendocument.formula [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.formula-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.graphics [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.graphics-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.image [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.image-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.presentation [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.presentation-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.spreadsheet [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.text [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.text-template [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.text-master [OFFICE]
|
|
|
|
|
application/vnd.oasis.opendocument.text-web [OFFICE]
|
2016-01-26 01:33:24 +00:00
|
|
|
application/javascript text/javascript application/x-javascript [EXECUTABLE]
|
2005-05-21 07:46:17 +00:00
|
|
|
application/x-shockwave-flash [MULTIMEDIA]
|
|
|
|
|
audio/midi [AUDIO]
|
|
|
|
|
audio/x-aiff [AUDIO]
|
|
|
|
|
audio/x-wav [AUDIO]
|
|
|
|
|
audio/mp3 audio/mpeg [AUDIO]
|
|
|
|
|
application/ogg audio/ogg video/ogg [MULTIMEDIA]
|
2010-04-01 23:27:49 +00:00
|
|
|
image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
|
2005-05-21 07:46:17 +00:00
|
|
|
image/gif [BITMAP]
|
|
|
|
|
image/jpeg [BITMAP]
|
|
|
|
|
image/png [BITMAP]
|
2007-04-20 12:31:36 +00:00
|
|
|
image/svg+xml [DRAWING]
|
2005-05-21 07:46:17 +00:00
|
|
|
image/tiff [BITMAP]
|
2006-06-24 07:11:17 +00:00
|
|
|
image/vnd.djvu [BITMAP]
|
2008-02-05 22:11:36 +00:00
|
|
|
image/x-xcf [BITMAP]
|
2007-03-31 17:15:00 +00:00
|
|
|
image/x-portable-pixmap [BITMAP]
|
2005-05-21 07:46:17 +00:00
|
|
|
text/plain [TEXT]
|
|
|
|
|
text/html [TEXT]
|
|
|
|
|
video/ogg [VIDEO]
|
|
|
|
|
video/mpeg [VIDEO]
|
|
|
|
|
unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
|
|
|
|
|
END_STRING
|
|
|
|
|
);
|
|
|
|
|
|
2008-04-14 07:45:50 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Implements functions related to MIME types such as detection and mapping to
|
2007-04-24 06:53:31 +00:00
|
|
|
* file extension.
|
|
|
|
|
*
|
2012-05-10 17:01:50 +00:00
|
|
|
* Instances of this class are stateless, there only needs to be one global instance
|
2007-04-24 06:53:31 +00:00
|
|
|
* of MimeMagic. Please use MimeMagic::singleton() to get that instance.
|
|
|
|
|
*/
|
2005-05-21 07:46:17 +00:00
|
|
|
class MimeMagic {
|
|
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* @var array Mapping of media types to arrays of MIME types.
|
2013-03-07 16:27:38 +00:00
|
|
|
* This is used by findMediaType and getMediaType, respectively
|
|
|
|
|
*/
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $mMediaTypes = null;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
/** @var array Map of MIME type aliases
|
2013-03-07 16:27:38 +00:00
|
|
|
*/
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $mMimeTypeAliases = null;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
/** @var array Map of MIME types to file extensions (as a space separated list)
|
2013-03-07 16:27:38 +00:00
|
|
|
*/
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $mMimeToExt = null;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
/** @var array Map of file extensions types to MIME types (as a space separated list)
|
2013-03-07 16:27:38 +00:00
|
|
|
*/
|
2014-05-12 14:42:51 +00:00
|
|
|
public $mExtToMime = null;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-05-12 14:42:51 +00:00
|
|
|
/** @var IEContentAnalyzer
|
2008-12-12 15:06:35 +00:00
|
|
|
*/
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $mIEAnalyzer;
|
2008-12-12 15:06:35 +00:00
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
/** @var string Extra MIME types, set for example by media handling extensions
|
|
|
|
|
*/
|
|
|
|
|
private $mExtraTypes = '';
|
|
|
|
|
|
|
|
|
|
/** @var string Extra MIME info, set for example by media handling extensions
|
|
|
|
|
*/
|
|
|
|
|
private $mExtraInfo = '';
|
|
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
/** @var Config */
|
|
|
|
|
private $mConfig;
|
|
|
|
|
|
2014-05-12 14:42:51 +00:00
|
|
|
/** @var MimeMagic The singleton instance
|
2006-10-03 13:00:52 +00:00
|
|
|
*/
|
2013-11-23 00:49:32 +00:00
|
|
|
private static $instance = null;
|
2006-10-03 13:00:52 +00:00
|
|
|
|
|
|
|
|
/** Initializes the MimeMagic object. This is called by MimeMagic::singleton().
|
2011-05-21 19:35:16 +00:00
|
|
|
*
|
|
|
|
|
* This constructor parses the mime.types and mime.info files and build internal mappings.
|
2014-08-23 08:13:19 +00:00
|
|
|
*
|
|
|
|
|
* @todo Make this constructor private once everything uses the singleton instance
|
|
|
|
|
* @param Config $config
|
2011-05-21 19:35:16 +00:00
|
|
|
*/
|
2014-08-23 08:13:19 +00:00
|
|
|
function __construct( Config $config = null ) {
|
|
|
|
|
if ( !$config ) {
|
|
|
|
|
wfDebug( __METHOD__ . ' called with no Config instance passed to it' );
|
|
|
|
|
$config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
|
|
|
|
|
}
|
|
|
|
|
$this->mConfig = $config;
|
|
|
|
|
|
2011-05-21 19:35:16 +00:00
|
|
|
/**
|
2013-03-07 16:27:38 +00:00
|
|
|
* --- load mime.types ---
|
|
|
|
|
*/
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
global $IP;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
# Allow media handling extensions adding MIME-types and MIME-info
|
2016-02-17 09:09:32 +00:00
|
|
|
Hooks::run( 'MimeMagicInit', [ $this ] );
|
2014-06-21 12:00:55 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$types = MM_WELL_KNOWN_MIME_TYPES;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
$mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' );
|
|
|
|
|
if ( $mimeTypeFile == 'includes/mime.types' ) {
|
|
|
|
|
$mimeTypeFile = "$IP/$mimeTypeFile";
|
2007-06-18 06:33:48 +00:00
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
if ( $mimeTypeFile ) {
|
2014-10-23 20:03:14 +00:00
|
|
|
if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
|
2014-08-23 08:13:19 +00:00
|
|
|
wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
|
2007-06-18 06:33:48 +00:00
|
|
|
$types .= "\n";
|
2014-08-23 08:13:19 +00:00
|
|
|
$types .= file_get_contents( $mimeTypeFile );
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2014-08-23 08:13:19 +00:00
|
|
|
wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2014-12-12 08:41:27 +00:00
|
|
|
wfDebug( __METHOD__ . ": no mime types file defined, using built-ins only.\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
$types .= "\n" . $this->mExtraTypes;
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
|
2007-06-18 06:33:48 +00:00
|
|
|
$types = str_replace( "\t", " ", $types );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->mMimeToExt = [];
|
|
|
|
|
$this->mExtToMime = [];
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2013-02-03 20:05:24 +00:00
|
|
|
$lines = explode( "\n", $types );
|
2007-06-18 06:33:48 +00:00
|
|
|
foreach ( $lines as $s ) {
|
|
|
|
|
$s = trim( $s );
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( empty( $s ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if ( strpos( $s, '#' ) === 0 ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$s = strtolower( $s );
|
|
|
|
|
$i = strpos( $s, ' ' );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( $i === false ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$mime = substr( $s, 0, $i );
|
2013-04-13 11:36:24 +00:00
|
|
|
$ext = trim( substr( $s, $i + 1 ) );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( empty( $ext ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( !empty( $this->mMimeToExt[$mime] ) ) {
|
|
|
|
|
$this->mMimeToExt[$mime] .= ' ' . $ext;
|
|
|
|
|
} else {
|
|
|
|
|
$this->mMimeToExt[$mime] = $ext;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$extensions = explode( ' ', $ext );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
foreach ( $extensions as $e ) {
|
|
|
|
|
$e = trim( $e );
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( empty( $e ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( !empty( $this->mExtToMime[$e] ) ) {
|
|
|
|
|
$this->mExtToMime[$e] .= ' ' . $mime;
|
|
|
|
|
} else {
|
|
|
|
|
$this->mExtToMime[$e] = $mime;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-05-21 19:35:16 +00:00
|
|
|
/**
|
|
|
|
|
* --- load mime.info ---
|
|
|
|
|
*/
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
$mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' );
|
|
|
|
|
if ( $mimeInfoFile == 'includes/mime.info' ) {
|
|
|
|
|
$mimeInfoFile = "$IP/$mimeInfoFile";
|
2007-06-18 06:33:48 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$info = MM_WELL_KNOWN_MIME_INFO;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
if ( $mimeInfoFile ) {
|
2014-10-23 20:03:14 +00:00
|
|
|
if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
|
2014-08-23 08:13:19 +00:00
|
|
|
wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
|
2007-06-18 06:33:48 +00:00
|
|
|
$info .= "\n";
|
2014-08-23 08:13:19 +00:00
|
|
|
$info .= file_get_contents( $mimeInfoFile );
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2014-08-23 08:13:19 +00:00
|
|
|
wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2014-12-12 08:41:27 +00:00
|
|
|
wfDebug( __METHOD__ . ": no mime info file defined, using built-ins only.\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
$info .= "\n" . $this->mExtraInfo;
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
|
2007-06-18 06:33:48 +00:00
|
|
|
$info = str_replace( "\t", " ", $info );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->mMimeTypeAliases = [];
|
|
|
|
|
$this->mMediaTypes = [];
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$lines = explode( "\n", $info );
|
|
|
|
|
foreach ( $lines as $s ) {
|
|
|
|
|
$s = trim( $s );
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( empty( $s ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if ( strpos( $s, '#' ) === 0 ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$s = strtolower( $s );
|
|
|
|
|
$i = strpos( $s, ' ' );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( $i === false ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2015-09-11 13:44:59 +00:00
|
|
|
# print "processing MIME INFO line $s<br>";
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$match = [];
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
|
|
|
|
|
$s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
|
|
|
|
|
$mtype = trim( strtoupper( $match[1] ) );
|
|
|
|
|
} else {
|
|
|
|
|
$mtype = MEDIATYPE_UNKNOWN;
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = explode( ' ', $s );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( !isset( $this->mMediaTypes[$mtype] ) ) {
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->mMediaTypes[$mtype] = [];
|
2007-06-18 06:33:48 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
foreach ( $m as $mime ) {
|
|
|
|
|
$mime = trim( $mime );
|
2011-03-13 22:26:56 +00:00
|
|
|
if ( empty( $mime ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$this->mMediaTypes[$mtype][] = $mime;
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2013-01-26 21:20:04 +00:00
|
|
|
if ( count( $m ) > 1 ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$main = $m[0];
|
2014-05-12 14:42:51 +00:00
|
|
|
$mCount = count( $m );
|
|
|
|
|
for ( $i = 1; $i < $mCount; $i += 1 ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$mime = $m[$i];
|
|
|
|
|
$this->mMimeTypeAliases[$mime] = $main;
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2006-10-03 13:00:52 +00:00
|
|
|
/**
|
|
|
|
|
* Get an instance of this class
|
2011-02-02 11:38:50 +00:00
|
|
|
* @return MimeMagic
|
2006-10-03 13:00:52 +00:00
|
|
|
*/
|
2013-11-23 00:49:32 +00:00
|
|
|
public static function singleton() {
|
2013-03-10 15:23:46 +00:00
|
|
|
if ( self::$instance === null ) {
|
2014-08-23 08:13:19 +00:00
|
|
|
self::$instance = new MimeMagic(
|
|
|
|
|
ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
|
|
|
|
|
);
|
2006-10-03 13:00:52 +00:00
|
|
|
}
|
|
|
|
|
return self::$instance;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
/**
|
|
|
|
|
* Adds to the list mapping MIME to file extensions.
|
|
|
|
|
* As an extension author, you are encouraged to submit patches to
|
|
|
|
|
* MediaWiki's core to add new MIME types to mime.types.
|
|
|
|
|
* @param string $types
|
|
|
|
|
*/
|
|
|
|
|
public function addExtraTypes( $types ) {
|
|
|
|
|
$this->mExtraTypes .= "\n" . $types;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Adds to the list mapping MIME to media type.
|
|
|
|
|
* As an extension author, you are encouraged to submit patches to
|
|
|
|
|
* MediaWiki's core to add new MIME info to mime.info.
|
|
|
|
|
* @param string $info
|
|
|
|
|
*/
|
|
|
|
|
public function addExtraInfo( $info ) {
|
|
|
|
|
$this->mExtraInfo .= "\n" . $info;
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Returns a list of file extensions for a given MIME type as a space
|
|
|
|
|
* separated string or null if the MIME type was unrecognized. Resolves
|
|
|
|
|
* MIME type aliases.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $mime
|
2011-02-06 22:47:10 +00:00
|
|
|
* @return string|null
|
|
|
|
|
*/
|
|
|
|
|
public function getExtensionsForType( $mime ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$mime = strtolower( $mime );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// Check the mime-to-ext map
|
|
|
|
|
if ( isset( $this->mMimeToExt[$mime] ) ) {
|
|
|
|
|
return $this->mMimeToExt[$mime];
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
// Resolve the MIME type to the canonical type
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$mime = $this->mMimeTypeAliases[$mime];
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( isset( $this->mMimeToExt[$mime] ) ) {
|
|
|
|
|
return $this->mMimeToExt[$mime];
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
return null;
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Returns a list of MIME types for a given file extension as a space
|
2011-02-06 22:47:10 +00:00
|
|
|
* separated string or null if the extension was unrecognized.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $ext
|
2011-02-06 22:47:10 +00:00
|
|
|
* @return string|null
|
|
|
|
|
*/
|
|
|
|
|
public function getTypesForExtension( $ext ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$ext = strtolower( $ext );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
|
2005-05-21 07:46:17 +00:00
|
|
|
return $r;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Returns a single MIME type for a given file extension or null if unknown.
|
2011-02-06 22:47:10 +00:00
|
|
|
* This is always the first type from the list returned by getTypesForExtension($ext).
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $ext
|
2011-02-06 22:47:10 +00:00
|
|
|
* @return string|null
|
|
|
|
|
*/
|
|
|
|
|
public function guessTypesForExtension( $ext ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = $this->getTypesForExtension( $ext );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( is_null( $m ) ) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = trim( $m );
|
|
|
|
|
$m = preg_replace( '/\s.*$/', '', $m );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
return $m;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Tests if the extension matches the given MIME type. Returns true if a
|
|
|
|
|
* match was found, null if the MIME type is unknown, and false if the
|
|
|
|
|
* MIME type is known but no matches where found.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $extension
|
|
|
|
|
* @param string $mime
|
2011-02-06 22:47:10 +00:00
|
|
|
* @return bool|null
|
|
|
|
|
*/
|
|
|
|
|
public function isMatchingExtension( $extension, $mime ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$ext = $this->getExtensionsForType( $mime );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( !$ext ) {
|
2014-07-24 14:04:48 +00:00
|
|
|
return null; // Unknown MIME type
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$ext = explode( ' ', $ext );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
$extension = strtolower( $extension );
|
2013-03-24 10:01:51 +00:00
|
|
|
return in_array( $extension, $ext );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Returns true if the MIME type is known to represent an image format
|
2011-02-06 22:47:10 +00:00
|
|
|
* supported by the PHP GD library.
|
2011-05-29 14:01:47 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $mime
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2011-05-29 14:01:47 +00:00
|
|
|
* @return bool
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
|
|
|
|
public function isPHPImageType( $mime ) {
|
|
|
|
|
// As defined by imagegetsize and image_type_to_mime
|
2016-02-17 09:09:32 +00:00
|
|
|
static $types = [
|
2006-01-07 13:09:30 +00:00
|
|
|
'image/gif', 'image/jpeg', 'image/png',
|
2005-05-21 07:46:17 +00:00
|
|
|
'image/x-bmp', 'image/xbm', 'image/tiff',
|
2006-01-07 13:09:30 +00:00
|
|
|
'image/jp2', 'image/jpeg2000', 'image/iff',
|
2005-05-21 07:46:17 +00:00
|
|
|
'image/xbm', 'image/x-xbitmap',
|
2006-01-07 13:09:30 +00:00
|
|
|
'image/vnd.wap.wbmp', 'image/vnd.xiff',
|
2005-05-21 07:46:17 +00:00
|
|
|
'image/x-photoshop',
|
|
|
|
|
'application/x-shockwave-flash',
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
return in_array( $mime, $types );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-08 05:48:45 +00:00
|
|
|
/**
|
|
|
|
|
* Returns true if the extension represents a type which can
|
|
|
|
|
* be reliably detected from its content. Use this to determine
|
|
|
|
|
* whether strict content checks should be applied to reject
|
|
|
|
|
* invalid uploads; if we can't identify the type we won't
|
|
|
|
|
* be able to say if it's invalid.
|
|
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* @todo Be more accurate when using fancy MIME detector plugins;
|
2005-10-08 05:48:45 +00:00
|
|
|
* right now this is the bare minimum getimagesize() list.
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $extension
|
2005-10-08 05:48:45 +00:00
|
|
|
* @return bool
|
|
|
|
|
*/
|
|
|
|
|
function isRecognizableExtension( $extension ) {
|
2016-02-17 09:09:32 +00:00
|
|
|
static $types = [
|
2008-02-05 22:11:36 +00:00
|
|
|
// Types recognized by getimagesize()
|
2005-10-08 05:48:45 +00:00
|
|
|
'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
|
|
|
|
|
'bmp', 'tiff', 'tif', 'jpc', 'jp2',
|
|
|
|
|
'jpx', 'jb2', 'swc', 'iff', 'wbmp',
|
2008-02-05 22:11:36 +00:00
|
|
|
'xbm',
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// Formats we recognize magic numbers for
|
2010-07-28 19:24:00 +00:00
|
|
|
'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
|
|
|
|
|
'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
|
2010-10-20 14:50:30 +00:00
|
|
|
'webp',
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// XML formats we sure hope we recognize reliably
|
|
|
|
|
'svg',
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2005-10-08 05:48:45 +00:00
|
|
|
return in_array( strtolower( $extension ), $types );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Improves a MIME type using the file extension. Some file formats are very generic,
|
|
|
|
|
* so their MIME type is not very meaningful. A more useful MIME type can be derived
|
2012-10-19 20:03:05 +00:00
|
|
|
* by looking at the file extension. Typically, this method would be called on the
|
2011-02-06 22:47:10 +00:00
|
|
|
* result of guessMimeType().
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* @param string $mime The MIME type, typically guessed from a file's content.
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $ext The file extension, as taken from the file name
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* @return string The MIME type
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
|
|
|
|
public function improveTypeFromExtension( $mime, $ext ) {
|
|
|
|
|
if ( $mime === 'unknown/unknown' ) {
|
|
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
|
2011-02-06 22:47:10 +00:00
|
|
|
"$ext file, we should have recognized it\n" );
|
2010-08-31 13:47:24 +00:00
|
|
|
} else {
|
2012-10-19 20:03:05 +00:00
|
|
|
// Not something we can detect, so simply
|
2011-02-06 22:47:10 +00:00
|
|
|
// trust the file extension
|
2010-08-31 13:47:24 +00:00
|
|
|
$mime = $this->guessTypesForExtension( $ext );
|
|
|
|
|
}
|
2014-03-14 21:09:47 +00:00
|
|
|
} elseif ( $mime === 'application/x-opc+zip' ) {
|
2010-08-31 13:47:24 +00:00
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) {
|
2011-02-06 22:47:10 +00:00
|
|
|
// A known file extension for an OPC file,
|
2014-07-24 14:04:48 +00:00
|
|
|
// find the proper MIME type for that file extension
|
2010-08-31 13:47:24 +00:00
|
|
|
$mime = $this->guessTypesForExtension( $ext );
|
|
|
|
|
} else {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
|
2010-09-13 03:10:28 +00:00
|
|
|
".$ext is not a known OPC extension.\n" );
|
2011-02-06 22:47:10 +00:00
|
|
|
$mime = 'application/zip';
|
2010-08-31 13:47:24 +00:00
|
|
|
}
|
Fix mime detection of easily-confused-with text/plain formats
json, csv, and tsv are often detected as text/plain. However that's
not right. This patch causes MediaWiki to look at the file extension
of files detected as text/plain, and if the file extension is
for a "textual" type, use the mime type associated with that extension.
This change also changes the "does mime type match uploaded file
extension" check to use the mime based on the file contents
plus extension, as opposed to just the file contents. Various
documentation suggests this is more appropriate (e.g. line 807
of MimeMagic.php). In my opinion we should use just the file
contents when verifying file is not on blacklist, but use ext
when verifying file type matches extension, and for decided
what handler specific checks to run. Not the detect mime type
with extension doesn't override the detected mime type with
the extension, but only uses the extension if content based
detection is ambigious or not specific enough.
This patch should be reviewed by csteipp before merge for
any potential security implications.
Note: This is partially fixing a regression from 3846d1048766a7,
where previously csv and json files were allowed to be uploaded,
and that change prevented them
Bug: 66036
Bug: 45424
Change-Id: Ib637fe6850a81b26f84dc8c00ab4772f3d3a1f34
2014-06-24 19:15:32 +00:00
|
|
|
} elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
|
|
|
|
|
// Textual types are sometimes not recognized properly.
|
|
|
|
|
// If detected as text/plain, and has an extension which is textual
|
|
|
|
|
// improve to the extension's type. For example, csv and json are often
|
|
|
|
|
// misdetected as text/plain.
|
|
|
|
|
$mime = $this->guessTypesForExtension( $ext );
|
2010-08-31 13:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
# Media handling extensions can improve the MIME detected
|
2016-02-17 09:09:32 +00:00
|
|
|
Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] );
|
2014-06-21 12:00:55 +00:00
|
|
|
|
2010-08-31 13:47:24 +00:00
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
|
|
|
|
|
$mime = $this->mMimeTypeAliases[$mime];
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
|
2010-08-31 13:47:24 +00:00
|
|
|
return $mime;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* MIME type detection. This uses detectMimeType to detect the MIME type
|
2012-10-19 20:03:05 +00:00
|
|
|
* of the file, but applies additional checks to determine some well known
|
2014-07-24 14:04:48 +00:00
|
|
|
* file formats that may be missed or misinterpreted by the default MIME
|
2012-10-19 20:03:05 +00:00
|
|
|
* detection (namely XML based formats like XHTML or SVG, as well as ZIP
|
2011-02-06 22:47:10 +00:00
|
|
|
* based formats like OPC/ODF files).
|
|
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $file The file to check
|
2014-04-08 15:29:17 +00:00
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract it from the filename.
|
|
|
|
|
* Set it to false to ignore the extension. DEPRECATED! Set to false, use
|
2014-07-24 14:04:48 +00:00
|
|
|
* improveTypeFromExtension($mime, $ext) later to improve MIME type.
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* @return string The MIME type of $file
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
|
|
|
|
public function guessMimeType( $file, $ext = true ) {
|
|
|
|
|
if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
|
2010-09-13 03:10:28 +00:00
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
|
2010-08-31 13:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
$mime = $this->doGuessMimeType( $file, $ext );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$mime ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
$mime = $this->detectMimeType( $file, $ext );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
|
|
|
|
|
$mime = $this->mMimeTypeAliases[$mime];
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
return $mime;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Guess the MIME type from the file contents.
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2015-06-17 13:28:51 +00:00
|
|
|
* @todo Remove $ext param
|
|
|
|
|
*
|
2011-02-06 22:47:10 +00:00
|
|
|
* @param string $file
|
|
|
|
|
* @param mixed $ext
|
2012-02-09 21:35:05 +00:00
|
|
|
* @return bool|string
|
2014-06-10 21:50:30 +00:00
|
|
|
* @throws MWException
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
2015-06-17 13:28:51 +00:00
|
|
|
private function doGuessMimeType( $file, $ext ) {
|
2006-01-03 02:14:55 +00:00
|
|
|
// Read a chunk of the file
|
2015-06-10 18:29:05 +00:00
|
|
|
MediaWiki\suppressWarnings();
|
2014-06-10 21:50:30 +00:00
|
|
|
$f = fopen( $file, 'rb' );
|
2015-06-10 18:29:05 +00:00
|
|
|
MediaWiki\restoreWarnings();
|
2012-10-19 20:03:05 +00:00
|
|
|
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$f ) {
|
2011-02-06 22:47:10 +00:00
|
|
|
return 'unknown/unknown';
|
|
|
|
|
}
|
2014-06-10 21:50:30 +00:00
|
|
|
|
|
|
|
|
$fsize = filesize( $file );
|
|
|
|
|
if ( $fsize === false ) {
|
|
|
|
|
return 'unknown/unknown';
|
|
|
|
|
}
|
|
|
|
|
|
2006-01-03 02:14:55 +00:00
|
|
|
$head = fread( $f, 1024 );
|
2014-06-10 21:50:30 +00:00
|
|
|
$tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
|
|
|
|
|
if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
|
|
|
|
|
throw new MWException(
|
|
|
|
|
"Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
|
|
|
|
|
}
|
|
|
|
|
$tail = fread( $f, $tailLength );
|
2006-01-03 02:14:55 +00:00
|
|
|
fclose( $f );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2010-08-31 13:47:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
|
|
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// Hardcode a few magic number checks...
|
2016-02-17 09:09:32 +00:00
|
|
|
$headers = [
|
2008-02-05 22:11:36 +00:00
|
|
|
// Multimedia...
|
|
|
|
|
'MThd' => 'audio/midi',
|
|
|
|
|
'OggS' => 'application/ogg',
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// Image formats...
|
2006-01-03 02:14:55 +00:00
|
|
|
// Note that WMF may have a bare header, no magic number.
|
2008-02-05 22:11:36 +00:00
|
|
|
"\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
|
|
|
|
|
"\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
|
2008-02-09 23:59:26 +00:00
|
|
|
'%PDF' => 'application/pdf',
|
2008-02-05 22:11:36 +00:00
|
|
|
'gimp xcf' => 'image/x-xcf',
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// Some forbidden fruit...
|
|
|
|
|
'MZ' => 'application/octet-stream', // DOS/Windows executable
|
|
|
|
|
"\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
|
|
|
|
|
"\x7fELF" => 'application/octet-stream', // ELF binary
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
foreach ( $headers as $magic => $candidate ) {
|
|
|
|
|
if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
|
|
|
|
|
return $candidate;
|
|
|
|
|
}
|
2006-01-03 02:14:55 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2010-07-28 19:24:00 +00:00
|
|
|
/* Look for WebM and Matroska files */
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
|
2010-07-28 19:24:00 +00:00
|
|
|
$doctype = strpos( $head, "\x42\x82" );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $doctype ) {
|
2010-07-28 19:24:00 +00:00
|
|
|
// Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
|
2013-04-13 11:36:24 +00:00
|
|
|
$data = substr( $head, $doctype + 3, 8 );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( strncmp( $data, "matroska", 8 ) == 0 ) {
|
2010-07-28 19:24:00 +00:00
|
|
|
wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
|
|
|
|
|
return "video/x-matroska";
|
2011-02-06 22:47:10 +00:00
|
|
|
} elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
|
2010-07-28 19:24:00 +00:00
|
|
|
wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
|
|
|
|
|
return "video/webm";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
wfDebug( __METHOD__ . ": unknown EBML file\n" );
|
|
|
|
|
return "unknown/unknown";
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-20 14:50:30 +00:00
|
|
|
/* Look for WebP */
|
2013-11-17 16:43:27 +00:00
|
|
|
if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) {
|
2010-10-20 14:50:30 +00:00
|
|
|
wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
|
|
|
|
|
return "image/webp";
|
|
|
|
|
}
|
|
|
|
|
|
2011-05-21 19:35:16 +00:00
|
|
|
/**
|
2009-11-06 21:03:19 +00:00
|
|
|
* Look for PHP. Check for this before HTML/XML... Warning: this is a
|
|
|
|
|
* heuristic, and won't match a file with a lot of non-PHP before. It
|
|
|
|
|
* will also match text files which could be PHP. :)
|
|
|
|
|
*
|
2011-05-17 22:03:20 +00:00
|
|
|
* @todo FIXME: For this reason, the check is probably useless -- an attacker
|
2009-11-06 21:03:19 +00:00
|
|
|
* could almost certainly just pad the file with a lot of nonsense to
|
|
|
|
|
* circumvent the check in any case where it would be a security
|
|
|
|
|
* problem. On the other hand, it causes harmful false positives (bug
|
|
|
|
|
* 16583). The heuristic has been cut down to exclude three-character
|
|
|
|
|
* strings like "<? ", but should it be axed completely?
|
2008-02-05 22:11:36 +00:00
|
|
|
*/
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( ( strpos( $head, '<?php' ) !== false ) ||
|
2013-02-03 20:05:24 +00:00
|
|
|
( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
|
|
|
|
|
( strpos( $head, "<\x00?\x00 " ) !== false ) ||
|
|
|
|
|
( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
|
|
|
|
|
( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
|
|
|
|
|
( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
|
|
|
|
|
wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
|
2011-02-06 22:47:10 +00:00
|
|
|
return 'application/x-php';
|
2008-02-05 22:11:36 +00:00
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2011-05-21 19:35:16 +00:00
|
|
|
/**
|
2008-02-05 22:11:36 +00:00
|
|
|
* look for XML formats (XHTML and SVG)
|
|
|
|
|
*/
|
2008-02-06 01:23:12 +00:00
|
|
|
$xml = new XmlTypeCheck( $file );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $xml->wellFormed ) {
|
2014-08-23 08:13:19 +00:00
|
|
|
$xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' );
|
|
|
|
|
if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) {
|
|
|
|
|
return $xmlMimeTypes[$xml->getRootElement()];
|
2008-02-05 22:11:36 +00:00
|
|
|
} else {
|
2008-02-06 01:23:12 +00:00
|
|
|
return 'application/xml';
|
2008-02-05 22:11:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-05-21 19:35:16 +00:00
|
|
|
/**
|
2008-02-05 22:11:36 +00:00
|
|
|
* look for shell scripts
|
|
|
|
|
*/
|
2009-12-11 21:07:27 +00:00
|
|
|
$script_type = null;
|
2008-02-05 22:11:36 +00:00
|
|
|
|
|
|
|
|
# detect by shebang
|
2013-03-24 10:01:51 +00:00
|
|
|
if ( substr( $head, 0, 2 ) == "#!" ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
$script_type = "ASCII";
|
2013-03-24 10:01:51 +00:00
|
|
|
} elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
$script_type = "UTF-8";
|
2013-03-24 10:01:51 +00:00
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
$script_type = "UTF-16BE";
|
|
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
$script_type = "UTF-16LE";
|
2008-02-05 22:11:36 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
if ( $script_type ) {
|
2013-02-09 22:03:53 +00:00
|
|
|
if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
|
2008-02-06 01:23:12 +00:00
|
|
|
// Quick and dirty fold down to ASCII!
|
2016-02-17 09:09:32 +00:00
|
|
|
$pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
|
2008-02-06 01:23:12 +00:00
|
|
|
$chars = unpack( $pack[$script_type], substr( $head, 2 ) );
|
|
|
|
|
$head = '';
|
2013-04-20 22:49:30 +00:00
|
|
|
foreach ( $chars as $codepoint ) {
|
|
|
|
|
if ( $codepoint < 128 ) {
|
2008-02-06 01:23:12 +00:00
|
|
|
$head .= chr( $codepoint );
|
|
|
|
|
} else {
|
|
|
|
|
$head .= '?';
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$match = [];
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
|
|
|
|
|
$mime = "application/x-{$match[2]}";
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
return $mime;
|
|
|
|
|
}
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2010-07-02 12:11:39 +00:00
|
|
|
// Check for ZIP variants (before getimagesize)
|
2008-08-12 03:10:07 +00:00
|
|
|
if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
|
2010-07-02 12:11:39 +00:00
|
|
|
return $this->detectZipType( $head, $tail, $ext );
|
2008-08-12 03:10:07 +00:00
|
|
|
}
|
|
|
|
|
|
2015-06-10 18:29:05 +00:00
|
|
|
MediaWiki\suppressWarnings();
|
2008-02-05 22:11:36 +00:00
|
|
|
$gis = getimagesize( $file );
|
2015-06-10 18:29:05 +00:00
|
|
|
MediaWiki\restoreWarnings();
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $gis && isset( $gis['mime'] ) ) {
|
2008-02-05 22:11:36 +00:00
|
|
|
$mime = $gis['mime'];
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
return $mime;
|
2007-06-18 06:33:48 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2008-02-05 22:11:36 +00:00
|
|
|
// Also test DjVu
|
|
|
|
|
$deja = new DjVuImage( $file );
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $deja->isValid() ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
return 'image/vnd.djvu';
|
|
|
|
|
}
|
2008-08-12 03:10:07 +00:00
|
|
|
|
2014-06-21 12:00:55 +00:00
|
|
|
# Media handling extensions can guess the MIME by content
|
|
|
|
|
# It's intentionally here so that if core is wrong about a type (false positive),
|
|
|
|
|
# people will hopefully nag and submit patches :)
|
|
|
|
|
$mime = false;
|
|
|
|
|
# Some strings by reference for performance - assuming well-behaved hooks
|
2014-12-09 07:23:30 +00:00
|
|
|
Hooks::run(
|
2014-06-21 12:00:55 +00:00
|
|
|
'MimeMagicGuessFromContent',
|
2016-02-17 09:09:32 +00:00
|
|
|
[ $this, &$head, &$tail, $file, &$mime ]
|
2014-06-21 12:00:55 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return $mime;
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2012-10-19 20:03:05 +00:00
|
|
|
|
2008-09-25 18:43:33 +00:00
|
|
|
/**
|
|
|
|
|
* Detect application-specific file type of a given ZIP file from its
|
2010-07-02 12:11:39 +00:00
|
|
|
* header data. Currently works for OpenDocument and OpenXML types...
|
2008-09-25 18:43:33 +00:00
|
|
|
* If can't tell, returns 'application/zip'.
|
|
|
|
|
*
|
2014-04-08 15:29:17 +00:00
|
|
|
* @param string $header Some reasonably-sized chunk of file header
|
|
|
|
|
* @param string|null $tail The tail of the file
|
|
|
|
|
* @param string|bool $ext The file extension, or true to extract it from the filename.
|
|
|
|
|
* Set it to false (default) to ignore the extension. DEPRECATED! Set to false,
|
2014-07-24 14:04:48 +00:00
|
|
|
* use improveTypeFromExtension($mime, $ext) later to improve MIME type.
|
2010-06-19 18:47:34 +00:00
|
|
|
*
|
2008-09-25 18:43:33 +00:00
|
|
|
* @return string
|
|
|
|
|
*/
|
2010-07-02 12:11:39 +00:00
|
|
|
function detectZipType( $header, $tail = null, $ext = false ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $ext ) { # TODO: remove $ext param
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
|
2010-09-13 03:10:28 +00:00
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
|
2010-08-31 13:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
2010-06-19 18:47:34 +00:00
|
|
|
$mime = 'application/zip';
|
2016-02-17 09:09:32 +00:00
|
|
|
$opendocTypes = [
|
2008-09-25 18:43:33 +00:00
|
|
|
'chart-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'chart',
|
2008-09-25 18:43:33 +00:00
|
|
|
'formula-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'formula',
|
2008-09-25 18:43:33 +00:00
|
|
|
'graphics-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'graphics',
|
2008-09-25 18:43:33 +00:00
|
|
|
'image-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'image',
|
2008-09-25 18:43:33 +00:00
|
|
|
'presentation-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'presentation',
|
2008-09-25 18:43:33 +00:00
|
|
|
'spreadsheet-template',
|
2009-01-22 00:15:16 +00:00
|
|
|
'spreadsheet',
|
2008-09-25 18:43:33 +00:00
|
|
|
'text-template',
|
|
|
|
|
'text-master',
|
2009-01-22 00:15:16 +00:00
|
|
|
'text-web',
|
2016-02-17 09:09:32 +00:00
|
|
|
'text' ];
|
2008-09-25 18:43:33 +00:00
|
|
|
|
|
|
|
|
// http://lists.oasis-open.org/archives/office/200505/msg00006.html
|
|
|
|
|
$types = '(?:' . implode( '|', $opendocTypes ) . ')';
|
|
|
|
|
$opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
|
2010-06-19 18:47:34 +00:00
|
|
|
|
|
|
|
|
$openxmlRegex = "/^\[Content_Types\].xml/";
|
2010-07-02 12:11:39 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
|
2008-09-25 18:43:33 +00:00
|
|
|
$mime = $matches[1];
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
|
2011-02-06 22:47:10 +00:00
|
|
|
} elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
|
2010-06-19 18:47:34 +00:00
|
|
|
$mime = "application/x-opc+zip";
|
2012-10-19 20:03:05 +00:00
|
|
|
# TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
|
|
|
|
|
if ( $ext !== true && $ext !== false ) {
|
2010-06-19 18:47:34 +00:00
|
|
|
/** This is the mode used by getPropsFromPath
|
2014-07-24 14:04:48 +00:00
|
|
|
* These MIME's are stored in the database, where we don't really want
|
2013-03-07 16:27:38 +00:00
|
|
|
* x-opc+zip, because we use it only for internal purposes
|
|
|
|
|
*/
|
2013-03-24 10:01:51 +00:00
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) {
|
2010-06-19 18:47:34 +00:00
|
|
|
/* A known file extension for an OPC file,
|
2013-03-07 16:27:38 +00:00
|
|
|
* find the proper mime type for that file extension
|
|
|
|
|
*/
|
2010-06-19 18:47:34 +00:00
|
|
|
$mime = $this->guessTypesForExtension( $ext );
|
|
|
|
|
} else {
|
2010-07-02 12:11:39 +00:00
|
|
|
$mime = "application/zip";
|
2010-06-19 18:47:34 +00:00
|
|
|
}
|
|
|
|
|
}
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
|
2012-10-19 20:03:05 +00:00
|
|
|
} elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
|
2013-04-26 14:42:31 +00:00
|
|
|
( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
|
2010-07-02 12:11:39 +00:00
|
|
|
preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
|
2013-03-24 10:01:51 +00:00
|
|
|
if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
|
2010-07-02 12:11:39 +00:00
|
|
|
$mime = "application/msword";
|
2012-10-19 20:03:05 +00:00
|
|
|
}
|
2013-04-26 14:42:31 +00:00
|
|
|
switch ( substr( $header, 512, 6 ) ) {
|
2010-07-02 12:11:39 +00:00
|
|
|
case "\xEC\xA5\xC1\x00\x0E\x00":
|
|
|
|
|
case "\xEC\xA5\xC1\x00\x1C\x00":
|
|
|
|
|
case "\xEC\xA5\xC1\x00\x43\x00":
|
|
|
|
|
$mime = "application/vnd.ms-powerpoint";
|
|
|
|
|
break;
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x00":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x02":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x02":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x02":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x02":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x02":
|
|
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x02":
|
|
|
|
|
$mime = "application/vnd.msexcel";
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
|
2008-09-25 18:43:33 +00:00
|
|
|
} else {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
|
2008-09-25 18:43:33 +00:00
|
|
|
}
|
2010-06-19 18:47:34 +00:00
|
|
|
return $mime;
|
2008-09-25 18:43:33 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Internal MIME type detection. Detection is done using an external
|
2012-10-19 20:03:05 +00:00
|
|
|
* program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo
|
2014-07-18 06:05:26 +00:00
|
|
|
* extension is tried if it is available. If detection fails and $ext
|
|
|
|
|
* is not false, the MIME type is guessed from the file extension,
|
|
|
|
|
* using guessTypesForExtension.
|
2012-10-19 20:03:05 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* If the MIME type is still unknown, getimagesize is used to detect the
|
|
|
|
|
* MIME type if the file is an image. If no MIME type can be determined,
|
2011-02-06 22:47:10 +00:00
|
|
|
* this function returns 'unknown/unknown'.
|
|
|
|
|
*
|
2014-04-08 15:29:17 +00:00
|
|
|
* @param string $file The file to check
|
|
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract it from the filename.
|
|
|
|
|
* Set it to false to ignore the extension. DEPRECATED! Set to false, use
|
2014-07-24 14:04:48 +00:00
|
|
|
* improveTypeFromExtension($mime, $ext) later to improve MIME type.
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* @return string The MIME type of $file
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
2010-08-31 13:47:24 +00:00
|
|
|
private function detectMimeType( $file, $ext = true ) {
|
2014-05-12 14:42:51 +00:00
|
|
|
/** @todo Make $ext default to false. Or better, remove it. */
|
|
|
|
|
if ( $ext ) {
|
|
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. "
|
|
|
|
|
. "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
|
2010-08-31 13:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
2014-08-23 08:13:19 +00:00
|
|
|
$mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' );
|
2009-12-11 21:07:27 +00:00
|
|
|
$m = null;
|
2014-08-23 08:13:19 +00:00
|
|
|
if ( $mimeDetectorCommand ) {
|
2013-04-21 14:59:18 +00:00
|
|
|
$args = wfEscapeShellArg( $file );
|
2014-08-23 08:13:19 +00:00
|
|
|
$m = wfShellExec( "$mimeDetectorCommand $args" );
|
2007-06-18 06:33:48 +00:00
|
|
|
} elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
|
2014-07-18 06:05:26 +00:00
|
|
|
$mime_magic_resource = finfo_open( FILEINFO_MIME );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $mime_magic_resource ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = finfo_file( $mime_magic_resource, $file );
|
|
|
|
|
finfo_close( $mime_magic_resource );
|
|
|
|
|
} else {
|
2013-04-13 11:36:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( $m ) {
|
|
|
|
|
# normalize
|
2015-09-11 13:44:59 +00:00
|
|
|
$m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = trim( $m );
|
|
|
|
|
$m = strtolower( $m );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2010-11-25 20:01:34 +00:00
|
|
|
if ( strpos( $m, 'unknown' ) !== false ) {
|
2009-12-11 21:07:27 +00:00
|
|
|
$m = null;
|
2007-06-18 06:33:48 +00:00
|
|
|
} else {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
|
2005-05-21 07:46:17 +00:00
|
|
|
return $m;
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// If desired, look at extension as a fallback.
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( $ext === true ) {
|
2005-05-21 07:46:17 +00:00
|
|
|
$i = strrpos( $file, '.' );
|
2007-06-18 06:33:48 +00:00
|
|
|
$ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
|
|
|
|
|
}
|
|
|
|
|
if ( $ext ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) {
|
2014-05-12 14:42:51 +00:00
|
|
|
wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, "
|
|
|
|
|
. "we should have recognized it\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
} else {
|
|
|
|
|
$m = $this->guessTypesForExtension( $ext );
|
|
|
|
|
if ( $m ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
|
2008-02-05 22:11:36 +00:00
|
|
|
return $m;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// Unknown type
|
|
|
|
|
wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
|
|
|
|
|
return 'unknown/unknown';
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Determine the media type code for a file, using its MIME type, name and
|
2011-02-06 22:47:10 +00:00
|
|
|
* possibly its contents.
|
|
|
|
|
*
|
2014-07-24 14:04:48 +00:00
|
|
|
* This function relies on the findMediaType(), mapping extensions and MIME
|
2011-02-06 22:47:10 +00:00
|
|
|
* types to media types.
|
|
|
|
|
*
|
|
|
|
|
* @todo analyse file if need be
|
|
|
|
|
* @todo look at multiple extension, separately and together.
|
|
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $path Full path to the image file, in case we have to look at the contents
|
2014-07-24 14:04:48 +00:00
|
|
|
* (if null, only the MIME type is used to determine the media type code).
|
|
|
|
|
* @param string $mime MIME type. If null it will be guessed using guessMimeType.
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @return string A value to be used with the MEDIATYPE_xxx constants.
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
2009-12-11 21:07:27 +00:00
|
|
|
function getMediaType( $path = null, $mime = null ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$mime && !$path ) {
|
2011-02-06 22:47:10 +00:00
|
|
|
return MEDIATYPE_UNKNOWN;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
// If MIME type is unknown, guess it
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$mime ) {
|
2011-02-06 22:47:10 +00:00
|
|
|
$mime = $this->guessMimeType( $path, false );
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// Special code for ogg - detect if it's video (theora),
|
|
|
|
|
// else label it as sound.
|
|
|
|
|
if ( $mime == 'application/ogg' && file_exists( $path ) ) {
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
// Read a chunk of the file
|
|
|
|
|
$f = fopen( $path, "rt" );
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$f ) {
|
|
|
|
|
return MEDIATYPE_UNKNOWN;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
$head = fread( $f, 256 );
|
|
|
|
|
fclose( $f );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-05-01 20:42:59 +00:00
|
|
|
$head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// This is an UGLY HACK, file should be parsed correctly
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( strpos( $head, 'theora' ) !== false ) {
|
|
|
|
|
return MEDIATYPE_VIDEO;
|
|
|
|
|
} elseif ( strpos( $head, 'vorbis' ) !== false ) {
|
|
|
|
|
return MEDIATYPE_AUDIO;
|
|
|
|
|
} elseif ( strpos( $head, 'flac' ) !== false ) {
|
|
|
|
|
return MEDIATYPE_AUDIO;
|
|
|
|
|
} elseif ( strpos( $head, 'speex' ) !== false ) {
|
|
|
|
|
return MEDIATYPE_AUDIO;
|
|
|
|
|
} else {
|
|
|
|
|
return MEDIATYPE_MULTIMEDIA;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
// Check for entry for full MIME type
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $mime ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$type = $this->findMediaType( $mime );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) {
|
|
|
|
|
return $type;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// Check for entry for file extension
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( $path ) {
|
2005-05-21 07:46:17 +00:00
|
|
|
$i = strrpos( $path, '.' );
|
2007-06-18 06:33:48 +00:00
|
|
|
$e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-02-06 22:47:10 +00:00
|
|
|
// TODO: look at multi-extension if this fails, parse from full path
|
2007-06-18 06:33:48 +00:00
|
|
|
$type = $this->findMediaType( '.' . $e );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) {
|
|
|
|
|
return $type;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2014-07-24 14:04:48 +00:00
|
|
|
// Check major MIME type
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $mime ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$i = strpos( $mime, '/' );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $i !== false ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
$major = substr( $mime, 0, $i );
|
|
|
|
|
$type = $this->findMediaType( $major );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) {
|
|
|
|
|
return $type;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$type ) {
|
2011-02-06 22:47:10 +00:00
|
|
|
$type = MEDIATYPE_UNKNOWN;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
return $type;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-10-19 20:03:05 +00:00
|
|
|
/**
|
2014-07-24 14:04:48 +00:00
|
|
|
* Returns a media code matching the given MIME type or file extension.
|
2011-02-06 22:47:10 +00:00
|
|
|
* File extensions are represented by a string starting with a dot (.) to
|
2014-07-24 14:04:48 +00:00
|
|
|
* distinguish them from MIME types.
|
2011-02-06 22:47:10 +00:00
|
|
|
*
|
2013-03-13 07:42:41 +00:00
|
|
|
* This function relies on the mapping defined by $this->mMediaTypes
|
2011-02-06 22:47:10 +00:00
|
|
|
* @access private
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $extMime
|
2012-02-09 21:35:05 +00:00
|
|
|
* @return int|string
|
2011-02-06 22:47:10 +00:00
|
|
|
*/
|
2007-06-18 06:33:48 +00:00
|
|
|
function findMediaType( $extMime ) {
|
2012-10-19 20:03:05 +00:00
|
|
|
if ( strpos( $extMime, '.' ) === 0 ) {
|
2014-07-24 14:04:48 +00:00
|
|
|
// If it's an extension, look up the MIME types
|
2007-06-18 06:33:48 +00:00
|
|
|
$m = $this->getTypesForExtension( substr( $extMime, 1 ) );
|
2011-02-06 22:47:10 +00:00
|
|
|
if ( !$m ) {
|
|
|
|
|
return MEDIATYPE_UNKNOWN;
|
|
|
|
|
}
|
2007-06-18 06:33:48 +00:00
|
|
|
|
|
|
|
|
$m = explode( ' ', $m );
|
2008-04-14 07:45:50 +00:00
|
|
|
} else {
|
2014-07-24 14:04:48 +00:00
|
|
|
// Normalize MIME type
|
2007-06-18 06:33:48 +00:00
|
|
|
if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
|
|
|
|
|
$extMime = $this->mMimeTypeAliases[$extMime];
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$m = [ $extMime ];
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2007-06-18 06:33:48 +00:00
|
|
|
foreach ( $m as $mime ) {
|
|
|
|
|
foreach ( $this->mMediaTypes as $type => $codes ) {
|
2013-02-03 20:05:24 +00:00
|
|
|
if ( in_array( $mime, $codes, true ) ) {
|
2007-06-18 06:33:48 +00:00
|
|
|
return $type;
|
|
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-05-21 07:46:17 +00:00
|
|
|
return MEDIATYPE_UNKNOWN;
|
|
|
|
|
}
|
2008-12-05 05:35:22 +00:00
|
|
|
|
|
|
|
|
/**
|
2012-10-19 20:03:05 +00:00
|
|
|
* Get the MIME types that various versions of Internet Explorer would
|
2008-12-12 15:06:35 +00:00
|
|
|
* detect from a chunk of the content.
|
2008-12-05 05:35:22 +00:00
|
|
|
*
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param string $fileName The file name (unused at present)
|
|
|
|
|
* @param string $chunk The first 256 bytes of the file
|
|
|
|
|
* @param string $proposed The MIME type proposed by the server
|
|
|
|
|
* @return array
|
2008-12-05 05:35:22 +00:00
|
|
|
*/
|
2008-12-12 15:06:35 +00:00
|
|
|
public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
|
|
|
|
|
$ca = $this->getIEContentAnalyzer();
|
|
|
|
|
return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
|
2008-12-05 05:35:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2008-12-12 15:06:35 +00:00
|
|
|
* Get a cached instance of IEContentAnalyzer
|
2011-03-13 22:26:56 +00:00
|
|
|
*
|
|
|
|
|
* @return IEContentAnalyzer
|
2008-12-05 05:35:22 +00:00
|
|
|
*/
|
2008-12-12 15:06:35 +00:00
|
|
|
protected function getIEContentAnalyzer() {
|
|
|
|
|
if ( is_null( $this->mIEAnalyzer ) ) {
|
|
|
|
|
$this->mIEAnalyzer = new IEContentAnalyzer;
|
2008-12-05 05:35:22 +00:00
|
|
|
}
|
2008-12-12 15:06:35 +00:00
|
|
|
return $this->mIEAnalyzer;
|
2008-12-05 05:35:22 +00:00
|
|
|
}
|
2005-05-21 07:46:17 +00:00
|
|
|
}
|