Improve CSS checking in SVG filter

Use more in depth CSS parsing to better check for CSS rules
that cause external resources to be loaded.

Backport of 07f3d831def13b718b2155732b3452bec9516231

Bug: T85085
Change-Id: I526a815f8ec8e357abae3dcf5ef4e6c3081ab7c0
This commit is contained in:
Brian Wolff 2025-10-16 11:44:38 -07:00 committed by Reedy
parent 498190ea4b
commit 908c2f9c46
6 changed files with 371 additions and 53 deletions

View file

@ -10,6 +10,7 @@ PHP 8.4 workboard: https://phabricator.wikimedia.org/tag/php_8.4_support/
THIS IS NOT A RELEASE YET
=== Changes since 1.43.5 ===
* (T85085) Improve CSS checking in SVG filter.
* Localisation updates.
== MediaWiki 1.43.5 ==

View file

@ -2661,6 +2661,7 @@ $wgAutoloadLocalClasses = [
'MediaWiki\\Title\\TitleFormatter' => __DIR__ . '/includes/title/TitleFormatter.php',
'MediaWiki\\Title\\TitleParser' => __DIR__ . '/includes/title/TitleParser.php',
'MediaWiki\\Title\\TitleValue' => __DIR__ . '/includes/title/TitleValue.php',
'MediaWiki\\Upload\\SVGCSSChecker' => __DIR__ . '/includes/upload/SVGCSSChecker.php',
'MediaWiki\\User\\ActorCache' => __DIR__ . '/includes/user/ActorCache.php',
'MediaWiki\\User\\ActorMigration' => __DIR__ . '/includes/user/ActorMigration.php',
'MediaWiki\\User\\ActorMigrationBase' => __DIR__ . '/includes/user/ActorMigrationBase.php',

View file

@ -59,6 +59,7 @@
"wikimedia/cldr-plural-rule-parser": "2.0.0",
"wikimedia/common-passwords": "0.5.0",
"wikimedia/composer-merge-plugin": "2.1.0",
"wikimedia/css-sanitizer": "^5.1.0 || ^5.2.0 || ^5.3.0 || ^5.4.0",
"wikimedia/cssjanus": "2.3.0",
"wikimedia/html-formatter": "4.1.0",
"wikimedia/ip-utils": "5.0.0",

View file

@ -0,0 +1,223 @@
<?php
namespace MediaWiki\Upload;
use MediaWiki\Parser\Sanitizer;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Parser\Parser as CSSParser;
/**
* Ensure SVG files cannot load external resources via URLs in CSS.
*
* Beyond that restriction, it aims to be relaxed in the CSS it allows.
*
* Data: urls are also banned except in @font-face. The rationale behind
* this is unclear. The restriction was copied over from the predecessor to
* this class.
*
* @since 1.43.6
*/
class SVGCSSChecker {
/**
* List of \@rules banned.
* \@import for obvious reasons.
* \@charset just in case although i expect it does nothing inside an svg.
*/
private const BANNED_AT_RULE = [
'charset',
'import'
];
/**
* image() and image-set() can use bare strings as url
* src() is not supported by real browsers but is an alias for url() in spec
*/
private const BANNED_FUNCS = [
'src',
'image',
'image-set'
];
/**
* entrypoint to check style="..." attributes
*
* @param string $value
* @return array|bool True if good or array containing error details
*/
public function checkStyleAttribute( string $value ) {
if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
return [ 'invalid-control-character', 0, 0 ];
}
$cssParser = CSSParser::newFromString( $value );
$decList = $cssParser->parseDeclarationList();
$errors = $cssParser->getParseErrors();
if ( $errors ) {
// For style attributes with syntax errors, as a fallback
// we see if MW's wikitext sanitizer would alter the
// style attribute in any way. If no, then we assume it
// is safe. There are enough files with errors in style
// attributes that don't use any risky features like
// css comments or url(), that this is worth it.
$alteredStyle = Sanitizer::checkCss( $value );
if ( $alteredStyle === $value ) {
// No sketchy CSS features used, its ok despite errors
return true;
}
return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
}
$res = $this->validateTokens( $decList->toTokenArray() );
if ( $res !== true ) {
return $res;
}
return true;
}
/**
* entrypoint to check presentational attributes like fill
*
* Presentational attributes can contain CSS like values such as url()
*
* @param string $value
* @return array|bool True if good or array containing error details
*/
public function checkPresentationalAttribute( $value ) {
if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
return [ 'invalid-control-character', 0, 0 ];
}
$cssParser = CSSParser::newFromString( $value );
$cvList = $cssParser->parseComponentValueList();
$errors = $cssParser->getParseErrors();
if ( $errors ) {
return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
}
$res = $this->validateTokens( $cvList->toTokenArray() );
if ( $res !== true ) {
return $res;
}
return true;
}
/**
* Entrypoint to check <style> tags
*
* Note that data urls are allowed in @font-face
*
* @param string $value
* @return array|bool True if good or array containing error details
*/
public function checkStyleTag( $value ) {
if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
return [ 'invalid-control-character', 0, 0 ];
}
$cssParser = CSSParser::newFromString( $value );
$stylesheet = $cssParser->parseStylesheet();
$errors = $cssParser->getParseErrors();
if ( $errors ) {
return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
}
$topLevelRules = $stylesheet->getRuleList();
foreach ( $topLevelRules as $rule ) {
if ( $rule instanceof AtRule ) {
$res = $this->validateAtRule( $rule );
if ( $res !== true ) {
return $res;
}
if ( $rule->getName() === 'font-face' ) {
// @font-face has laxer rules
$res = $this->validateTokens( $rule->toTokenArray(), true );
if ( $res !== true ) {
return $res;
}
continue;
}
}
// Note, this incidentally @namespace foo url( 'https://example.com' );
// We don't care about that but its super obscure so doesn't matter.
$res = $this->validateTokens( $rule->toTokenArray() );
if ( $res !== true ) {
return $res;
}
}
return true;
}
/**
* Validate that an at-rule is not on the ban list
*
* @param AtRule $rule
* @return array|bool
*/
private function validateAtRule( AtRule $rule ) {
$name = strtolower( $rule->getName() );
if (
in_array( $name, self::BANNED_AT_RULE ) ||
preg_match( '/[^-a-z]/', $name )
) {
return [ "banned-at-rule-$name", $rule->getPosition()[0], $rule->getPosition()[1] ];
}
return true;
}
/**
* Validate a list of tokens making sure none of them are url-like
*
* @param Token[] $tokens List of tokens
* @param bool $allowDataFonts Allow data: urls with a font type
* @return array|bool
*/
private function validateTokens( array $tokens, $allowDataFonts = false ) {
// Go through all the tokens, and make sure none of them
// are url(). Except we allow urls that reference the current
// document. data: urls are not allowed because the predecessor
// to this class banned them. It is unclear why, perhaps the worry
// is embedding an SVG inside the data url to bypass sanitizer.
// We also ban the image and image-set() functions because they
// allow setting a url without the url function inside.
// We also ban src() for forwards-compatibility.
for ( $i = 0; $i < count( $tokens ); $i++ ) {
$token = $tokens[$i];
// unquoted urls are a T_URL where quoted urls are T_FUNCTION.
if ( $token->type() === Token::T_URL ) {
if (
!str_starts_with( $token->value(), '#' ) &&
!( $allowDataFonts &&
( str_starts_with( $token->value(), 'data:font/' )
|| str_starts_with( $token->value(), 'data:;base64,' ) ) /* T71008#717580 */
)
) {
return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
}
} elseif ( $token->type() === Token::T_BAD_URL ) {
// In theory browsers should ignore this, but
// better to err on the side of failing when something
// weird is going on.
return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
} elseif ( $token->type() === Token::T_FUNCTION && strtolower( $token->value() ) === 'url' ) {
for ( $j = $i + 1; $j < count( $tokens ) && $tokens[$j]->type() === Token::T_WHITESPACE; $j++ );
if ( $j < count( $tokens ) && $tokens[$j]->type() === Token::T_STRING ) {
if (
str_starts_with( $tokens[$j]->value(), '#' ) ||
( $allowDataFonts &&
( str_starts_with( $tokens[$j]->value(), 'data:font/' )
|| str_starts_with( $tokens[$j]->value(), 'data:;base64,' ) ) /* T71008#717580 */
)
) {
continue;
}
}
return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
} elseif (
$token->type() === Token::T_FUNCTION &&
in_array( strtolower( $token->value() ), self::BANNED_FUNCS )
) {
return [ 'banned-function-' . $token->value(), $token->getPosition()[0], $token->getPosition()[1] ];
}
}
return true;
}
}

View file

@ -37,6 +37,7 @@ use MediaWiki\Request\WebRequest;
use MediaWiki\Shell\Shell;
use MediaWiki\Status\Status;
use MediaWiki\Title\Title;
use MediaWiki\Upload\SVGCSSChecker;
use MediaWiki\User\User;
use MediaWiki\User\UserIdentity;
use Wikimedia\AtEase\AtEase;
@ -1714,15 +1715,17 @@ abstract class UploadBase {
// Check <style> css
if ( $strippedElement === 'style'
&& self::checkCssFragment( Sanitizer::normalizeCss( $data ) )
) {
wfDebug( __METHOD__ . ": hostile css in style element." );
$cssCheck = ( new SVGCSSChecker )->checkStyleTag( $data );
if ( $cssCheck !== true ) {
wfDebug( __METHOD__ . ": hostile css in style element." );
return [ 'uploaded-hostile-svg' ];
return [ 'uploaded-hostile-svg', $cssCheck[0], $cssCheck[1], $cssCheck[2] ];
}
}
static $cssAttrs = [ 'font', 'clip-path', 'fill', 'filter', 'marker',
'marker-end', 'marker-mid', 'marker-start', 'mask', 'stroke' ];
'marker-end', 'marker-mid', 'marker-start', 'mask', 'stroke', 'cursor' ];
foreach ( $attribs as $attrib => $value ) {
// If attributeNamespace is '', it is relative to its element's namespace
@ -1825,7 +1828,7 @@ abstract class UploadBase {
// use CSS styles to bring in remote code.
if ( $stripped === 'style'
&& self::checkCssFragment( Sanitizer::normalizeCss( $value ) )
&& ( new SVGCSSChecker )->checkStyleAttribute( $value ) !== true
) {
wfDebug( __METHOD__ . ": Found svg setting a style with "
. "remote url '$attrib'='$value' in uploaded file." );
@ -1834,7 +1837,7 @@ abstract class UploadBase {
// Several attributes can include css, css character escaping isn't allowed.
if ( in_array( $stripped, $cssAttrs, true )
&& self::checkCssFragment( $value )
&& ( new SVGCSSChecker )->checkPresentationalAttribute( $value ) !== true
) {
wfDebug( __METHOD__ . ": Found svg setting a style with "
. "remote url '$attrib'='$value' in uploaded file." );
@ -1858,52 +1861,6 @@ abstract class UploadBase {
return false; // No scripts detected
}
/**
* Check a block of CSS or CSS fragment for anything that looks like
* it is bringing in remote code.
* @param string $value a string of CSS
* @return bool true if the CSS contains an illegal string, false if otherwise
*/
private static function checkCssFragment( $value ) {
# Forbid external stylesheets, for both reliability and to protect viewer's privacy
if ( stripos( $value, '@import' ) !== false ) {
return true;
}
# We allow @font-face to embed fonts with data: urls, so we snip the string
# 'url' out so that this case won't match when we check for urls below
$pattern = '!(@font-face\s*{[^}]*src:)url(\("data:;base64,)!im';
$value = preg_replace( $pattern, '$1$2', $value );
# Check for remote and executable CSS. Unlike in Sanitizer::checkCss, the CSS
# properties filter and accelerator don't seem to be useful for xss in SVG files.
# Expression and -o-link don't seem to work either, but filtering them here in case.
# Additionally, we catch remote urls like url("http:..., url('http:..., url(http:...,
# but not local ones such as url("#..., url('#..., url(#....
if ( preg_match( '!expression
| -o-link\s*:
| -o-link-source\s*:
| -o-replace\s*:!imx', $value ) ) {
return true;
}
if ( preg_match_all(
"!(\s*(url|image|image-set)\s*\(\s*[\"']?\s*[^#]+.*?\))!sim",
$value,
$matches
) !== 0
) {
# TODO: redo this in one regex. Until then, url("#whatever") matches the first
foreach ( $matches[1] as $match ) {
if ( !preg_match( "!\s*(url|image|image-set)\s*\(\s*(#|'#|\"#)!im", $match ) ) {
return true;
}
}
}
return (bool)preg_match( '/[\000-\010\013\016-\037\177]/', $value );
}
/**
* Divide the element name passed by the XML parser to the callback into URI and prefix.
* @param string $element

View file

@ -533,7 +533,142 @@ class UploadBaseTest extends MediaWikiIntegrationTestCase {
true,
'SVG with non-local filter (T69044)'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><text x="0" y="20" cursor="url(\'http://example.com/foo.png\'), pointer">here</text></svg>',
true,
true,
'External url via cursor attribute'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><text x="0" y="20" style="fill: url( bad">here</text></svg>',
true,
true,
'bad url'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><text x="0" y="20" cursor="url(http://example.com/foo.png), pointer">here</text></svg>',
true,
true,
'External url via cursor attribute non quoted'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><style>@charset "ISO-2022-JP";</style></svg>',
true,
true,
'@charset in stylesheet'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><style>svg { fill: url( "https://example.com/baz#bar" ); }</style></svg>',
true,
true,
'url in stylesheet'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"> <rect width="100" height="100" style="&amp;#47;*;background-image:url(https://www.google.com/images/srpr/logo11w.png)"/> </svg>',
true,
true,
'Double escaping comments (T85085)'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"> <rect width="100" height="100" style="\/*;background-image:url(https://www.google.com/images/srpr/logo11w.png)"/> </svg>',
true,
true,
'escaping comment (T85085)'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"> <rect width="100" height="100" style="*;background-image:url(https://www.google.com/images/srpr/logo11w.png)"/> </svg>',
true,
true,
'escaping comment full-width (T85085)'
],
[
'<?xml version="1.0" encoding="UTF-8"?>
<svg viewbox="0 0 220 120" xmlns="http://www.w3.org/2000/svg">
<style>
text {
mask-image: url( "#foo
;
mask-image: url( "https://example.com/evilmask.png" )
}
</style>
<text x="0" y="15">Evil Test file</text>
</svg>',
true,
true,
'Unclosed url (T85085)'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"> <rect width="100" height="100" style="content: \'/*\'; fill: url( https://example.com ); bar: \'*/\'"/> </svg>',
true,
true,
'comment in string'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"> <rect width="100" height="100" style="invalid;content: \'/*\'; fill: url( https://example.com ); bar: \'*/\'"/> </svg>',
true,
true,
'comment in string with invalid'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><style> @font-face { font-family:"ArialMT"; src:url("data:;base64,T1RUTwACACAAAQA") }</style></svg>',
true,
false,
'Allow embedded fonts with no mime in data url T71008#717580'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><style> @media print { color: red }</style></svg>',
true,
false,
'verify most at rules allowed'
],
[
'<!DOCTYPE svg [<!ENTITY foo "url" >] >
<svg xmlns="http://www.w3.org/2000/svg">
<style>
* { cursor: &foo;( \'https://example.com\' ), pointer}
</style>
<text x="0" y="20">foo</text>
</svg>',
true,
true,
'entity in style'
],
[
'<!DOCTYPE svg [<!ENTITY foo "url" >] >
<svg xmlns="http://www.w3.org/2000/svg">
<text x="0" y="20" cursor="&foo;(http://example.com)">foo</text>
</svg>',
true,
true,
'entity in attribute'
],
[
'<!DOCTYPE svg [<!ENTITY foo "\') url(\'" >] >
<svg xmlns="http://www.w3.org/2000/svg">
<style>
<![CDATA[
* { fill: url( \'#foo&foo;http://example.com\' ); }
* { fill: url( \'#foo&apos;)url(http://example.com\' ); }
]]>
</style>
<text x="0" y="20">foo</text>
</svg>',
true,
false,
'entities are not substituted in cdata'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><g style="fill:#000000;font-family=\'arial-boldmt\';font-size:20px"></g></svg>',
true,
false,
'Allow limited invalid CSS in style attributes'
],
[
'<svg xmlns="http://www.w3.org/2000/svg"><g style="font-size:200px;stroke;stroke-width:0;"></g></svg>',
true,
false,
'Allow limited invalid CSS in style attributes 2'
],
];
// phpcs:enable
}