From 66e58d1ea783e7b02ef64fce1acce03297cbdfd0 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Fri, 8 Oct 2021 11:38:33 -0700 Subject: [PATCH] wfParseUrl: rely on parse_url for proto-relative urls On July 12 2011 in df2306b97 Roan added this workaround for parse_url lacking support for protocol-relative URLs. In 2012 PHP 5.4.7 came out, that added support for protocol-relative URLs to parse_url, see [1]. It's 2021, and after a decade of honorably serving our community, this workaround can be now be retired. It's not a simple deletion since we have custom 'delimiter' bit that wfParseUrl returns, and we need to keep supporting that. [1] https://bugs.php.net/bug.php?id=62844 Change-Id: If9b3bb2a51afa7af1ba8e674eadaa8db4f6f33f9 --- includes/GlobalFunctions.php | 24 +++++++------------ .../GlobalFunctions/wfParseUrlTest.php | 8 +++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 3760c563595..61bf30318f8 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -776,22 +776,13 @@ function wfUrlProtocolsWithoutProtRel() { function wfParseUrl( $url ) { global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php - // Protocol-relative URLs are handled really badly by parse_url(). It's so - // bad that the easiest way to handle them is to just prepend 'http:' and - // strip the protocol out later. - $wasRelative = substr( $url, 0, 2 ) == '//'; - if ( $wasRelative ) { - $url = "http:$url"; - } $bits = parse_url( $url ); - // parse_url() returns an array without scheme for some invalid URLs, e.g. - // parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ] - if ( !$bits || !isset( $bits['scheme'] ) ) { + if ( !$bits ) { return false; } // parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase. - $bits['scheme'] = strtolower( $bits['scheme'] ); + $bits['scheme'] = strtolower( $bits['scheme'] ?? '' ); // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) { @@ -804,7 +795,13 @@ function wfParseUrl( $url ) { $bits['host'] = $bits['path']; $bits['path'] = ''; } + } elseif ( !strlen( $bits['scheme'] ) && isset( $bits['host'] ) ) { + // This means $url was protocol-relative, e.g. //example.com + $bits['delimiter'] = '//'; } else { + // Option 1: scheme is not in $wgUrlProtocols + // Option 2: parse_url() returns an array without scheme or host for some invalid URLs, + // e.g. parse_url("%0Ahttp://example.com") == [ 'path' => '%0Ahttp://example.com' ] return false; } @@ -823,11 +820,6 @@ function wfParseUrl( $url ) { } } - // If the URL was protocol-relative, fix scheme and delimiter - if ( $wasRelative ) { - $bits['scheme'] = ''; - $bits['delimiter'] = '//'; - } return $bits; } diff --git a/tests/phpunit/includes/GlobalFunctions/wfParseUrlTest.php b/tests/phpunit/includes/GlobalFunctions/wfParseUrlTest.php index 8774bb20218..c8f6275f9ea 100644 --- a/tests/phpunit/includes/GlobalFunctions/wfParseUrlTest.php +++ b/tests/phpunit/includes/GlobalFunctions/wfParseUrlTest.php @@ -192,6 +192,14 @@ class WfParseUrlTest extends MediaWikiIntegrationTestCase { 'fragment' => 'quux', ] ], + [ + '%0Ahttp://example.com', + false, + ], + [ + 'http:///test.com', + false, + ] ]; } }