Merge "Fix mime detection of easily-confused-with text/plain formats"

This commit is contained in:
jenkins-bot 2014-07-04 07:43:42 +00:00 committed by Gerrit Code Review
commit 79d908ef74
5 changed files with 51 additions and 9 deletions

View file

@ -485,14 +485,6 @@ class MimeMagic {
* by looking at the file extension. Typically, this method would be called on the
* result of guessMimeType().
*
* Currently, this method does the following:
*
* If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false,
* return the result of guessTypesForExtension($ext).
*
* If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime )
* gives true, return the result of guessTypesForExtension($ext).
*
* @param string $mime The mime type, typically guessed from a file's content.
* @param string $ext The file extension, as taken from the file name
*
@ -518,6 +510,12 @@ class MimeMagic {
".$ext is not a known OPC extension.\n" );
$mime = 'application/zip';
}
} elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
// Textual types are sometimes not recognized properly.
// If detected as text/plain, and has an extension which is textual
// improve to the extension's type. For example, csv and json are often
// misdetected as text/plain.
$mime = $this->guessTypesForExtension( $ext );
}
if ( isset( $this->mMimeTypeAliases[$mime] ) ) {

View file

@ -65,6 +65,9 @@ text/plain [TEXT]
text/html application/xhtml+xml [TEXT]
application/xml text/xml [TEXT]
text [TEXT]
application/json [TEXT]
text/csv [TEXT]
text/tab-separated-values [TEXT]
application/zip application/x-zip [ARCHIVE]
application/x-gzip [ARCHIVE]

View file

@ -35,6 +35,7 @@ application/x-gzip gz
application/x-hdf hdf
application/x-jar jar
application/x-javascript js
application/json json
application/x-koan skp skd skt skm
application/x-latex latex
application/x-netcdf nc cdf
@ -109,6 +110,7 @@ model/mesh msh mesh silo
model/vrml wrl vrml
text/calendar ics ifb
text/css css
text/csv csv
text/html html htm
text/plain txt
text/richtext rtx

View file

@ -435,7 +435,7 @@ abstract class UploadBase {
}
$this->mFileProps = FSFile::getPropsFromPath( $this->mTempPath, $this->mFinalExtension );
$mime = $this->mFileProps['file-mime'];
$mime = $this->mFileProps['mime'];
if ( $wgVerifyMimeType ) {
# XXX: Missing extension will be caught by validateName() via getTitle()

View file

@ -0,0 +1,39 @@
<?php
class MimeMagicTest extends MediaWikiTestCase {
/** @var MimeMagic */
private $mimeMagic;
function setUp() {
$this->mimeMagic = MimeMagic::singleton();
parent::setUp();
}
/**
* @dataProvider providerImproveTypeFromExtension
* @param $ext String File extension (no leading dot)
* @param $oldMime String Initially detected mime
* @param $expectedMime String Mime type after taking extension into account
*/
function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
$actualMime = $this->mimeMagic->improveTypeFromExtension( $oldMime, $ext );
$this->assertEquals( $expectedMime, $actualMime );
}
function providerImproveTypeFromExtension() {
return array(
array( 'gif', 'image/gif', 'image/gif' ),
array( 'gif', 'unknown/unknown', 'unknown/unknown' ),
array( 'wrl', 'unknown/unknown', 'model/vrml' ),
array( 'txt', 'text/plain', 'text/plain' ),
array( 'csv', 'text/plain', 'text/csv' ),
array( 'tsv', 'text/plain', 'text/tab-separated-values' ),
array( 'json', 'text/plain', 'application/json' ),
array( 'foo', 'application/x-opc+zip', 'application/zip' ),
array( 'docx', 'application/x-opc+zip', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ),
array( 'djvu', 'image/x-djvu', 'image/vnd.djvu' ),
array( 'wav', 'audio/wav', 'audio/wav' ),
);
}
}