CSSMin: Do not base64-encode non-binary files when embedding

Do not base64-encode non-binary files (containing only whitespace and
printable ASCII characters, which matches sane SVG files).

For SVG files the percent-encoded URIs are actually slightly longer
than the base64-encoded ones (~10%), but compress a lot better
resulting on 15-20% less data to transfer after gzip compression.

(The effect is best seen on the 'oojs-ui' module, which consists
mostly of SVG icons – especially after commenting out everything other
than 'oojs-ui.svg.css'.)

I tried this for binary files too, just in case; but as expected, they
suffer from a noticeable size increase even with compression (~15%).

Bug: 67341
Change-Id: Iddaf863b6be98570a2bb8e606f13946a96345f65
This commit is contained in:
Bartosz Dziewoński 2014-09-17 23:14:46 +02:00
parent 18a9e7c409
commit fa223d65d6
4 changed files with 41 additions and 8 deletions

View file

@ -15,6 +15,9 @@ production.
for 'languageScripts'.
* Added a new hook, "ContentAlterParserOutput", to allow extensions to modify the
parser output for a content object before links update.
* (bug 67341) SVG images will no longer be base64-encoded when being embedded
in CSS. This results in slight size increase before gzip compression (due to
percent-encoding), but up to 20% decrease after it.
=== Bug fixes in 1.25 ===

View file

@ -38,6 +38,7 @@ class CSSMin {
* which when base64 encoded will result in a 1/3 increase in size.
*/
const EMBED_SIZE_LIMIT = 24576;
const DATA_URI_SIZE_LIMIT = 32768;
const URL_REGEX = 'url\(\s*[\'"]?(?P<file>[^\?\)\'"]*?)(?P<query>\?[^\)\'"]*?|)[\'"]?\s*\)';
const EMBED_REGEX = '\/\*\s*\@embed\s*\*\/';
const COMMENT_REGEX = '\/\*.*?\*\/';
@ -100,10 +101,11 @@ class CSSMin {
}
/**
* Encode an image file as a base64 data URI.
* If the image file has a suitable MIME type and size, encode it as a
* base64 data URI. Return false if the image type is unfamiliar or exceeds
* the size limit.
* Encode an image file as a data URI.
*
* If the image file has a suitable MIME type and size, encode it as a data URI, base64-encoded
* for binary files or just percent-encoded otherwise. Return false if the image type is
* unfamiliar or file exceeds the size limit.
*
* @param string $file Image file to encode.
* @param string|null $type File's MIME type or null. If null, CSSMin will
@ -111,7 +113,7 @@ class CSSMin {
* @param int|bool $sizeLimit If the size of the target file is greater than
* this value, decline to encode the image file and return false
* instead. If $sizeLimit is false, no limit is enforced.
* @return string|bool: Image contents encoded as a data URI or false.
* @return string|bool Image contents encoded as a data URI or false.
*/
public static function encodeImageAsDataURI( $file, $type = null,
$sizeLimit = self::EMBED_SIZE_LIMIT
@ -125,8 +127,23 @@ class CSSMin {
if ( !$type ) {
return false;
}
$data = base64_encode( file_get_contents( $file ) );
return 'data:' . $type . ';base64,' . $data;
$contents = file_get_contents( $file );
// Only whitespace and printable ASCII characters
$isText = (bool)preg_match( '/^[\r\n\t\x20-\x7e]+$/', $contents );
if ( $isText ) {
// Do not base64-encode non-binary files (sane SVGs), unless that'd exceed URI length limit.
// (This often produces longer URLs, but they compress better, yielding a net smaller size.)
$uri = 'data:' . $type . ',' . rawurlencode( $contents );
if ( strlen( $uri ) >= self::DATA_URI_SIZE_LIMIT ) {
$uri = 'data:' . $type . ';base64,' . base64_encode( $contents );
}
} else {
$uri = 'data:' . $type . ';base64,' . base64_encode( $contents );
}
return $uri;
}
/**

View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" width="8" height="8">
<circle cx="4" cy="4" r="2"/>
</svg>

After

Width:  |  Height:  |  Size: 138 B

View file

@ -147,9 +147,12 @@ class CSSMinTest extends MediaWikiTestCase {
// Full paths start with http://localhost/w/.
// Timestamps in output are replaced with 'timestamp'.
// data: URIs for red.gif and green.gif
// data: URIs for red.gif, green.gif, circle.svg
$red = 'data:image/gif;base64,R0lGODlhAQABAIAAAP8AADAAACwAAAAAAQABAAACAkQBADs=';
$green = 'data:image/gif;base64,R0lGODlhAQABAIAAAACAADAAACwAAAAAAQABAAACAkQBADs=';
$svg = 'data:image/svg+xml,%3C%3Fxml%20version%3D%221.0%22%20encoding%3D%22UTF-8%22%3F%3E%0A'
. '%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%228%22%20height%3D'
. '%228%22%3E%0A%3Ccircle%20cx%3D%224%22%20cy%3D%224%22%20r%3D%222%22%2F%3E%0A%3C%2Fsvg%3E%0A';
return array(
array(
@ -233,6 +236,12 @@ class CSSMinTest extends MediaWikiTestCase {
'foo { /* @embed */ background: url(large.png); }',
"foo { background: url(http://localhost/w/large.png?timestamp); }",
),
array(
'SVG files are embedded without base64 encoding',
'foo { /* @embed */ background: url(circle.svg); }',
"foo { background: url($svg); "
. "background: url(http://localhost/w/circle.svg?timestamp)!ie; }",
),
array(
'Two regular files in one rule',
'foo { background: url(red.gif), url(green.gif); }',