wfParseUrl: rely on parse_url for proto-relative urls

On July 12 2011 in df2306b97 Roan added this workaround
for parse_url lacking support for protocol-relative URLs.

In 2012 PHP 5.4.7 came out, that added support for
protocol-relative URLs to parse_url, see [1].

It's 2021, and after a decade of honorably serving our
community, this workaround can be now be retired.

It's not a simple deletion since we have custom 'delimiter'
bit that wfParseUrl returns, and we need to keep supporting
that.

[1] https://bugs.php.net/bug.php?id=62844
Change-Id: If9b3bb2a51afa7af1ba8e674eadaa8db4f6f33f9
This commit is contained in:
Petr Pchelko 2021-10-08 11:38:33 -07:00
parent 6210feb3db
commit 66e58d1ea7
2 changed files with 16 additions and 16 deletions

View file

@ -776,22 +776,13 @@ function wfUrlProtocolsWithoutProtRel() {
function wfParseUrl( $url ) {
global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
// Protocol-relative URLs are handled really badly by parse_url(). It's so
// bad that the easiest way to handle them is to just prepend 'http:' and
// strip the protocol out later.
$wasRelative = substr( $url, 0, 2 ) == '//';
if ( $wasRelative ) {
$url = "http:$url";
}
$bits = parse_url( $url );
// parse_url() returns an array without scheme for some invalid URLs, e.g.
// parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
if ( !$bits || !isset( $bits['scheme'] ) ) {
if ( !$bits ) {
return false;
}
// parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase.
$bits['scheme'] = strtolower( $bits['scheme'] );
$bits['scheme'] = strtolower( $bits['scheme'] ?? '' );
// most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
@ -804,7 +795,13 @@ function wfParseUrl( $url ) {
$bits['host'] = $bits['path'];
$bits['path'] = '';
}
} elseif ( !strlen( $bits['scheme'] ) && isset( $bits['host'] ) ) {
// This means $url was protocol-relative, e.g. //example.com
$bits['delimiter'] = '//';
} else {
// Option 1: scheme is not in $wgUrlProtocols
// Option 2: parse_url() returns an array without scheme or host for some invalid URLs,
// e.g. parse_url("%0Ahttp://example.com") == [ 'path' => '%0Ahttp://example.com' ]
return false;
}
@ -823,11 +820,6 @@ function wfParseUrl( $url ) {
}
}
// If the URL was protocol-relative, fix scheme and delimiter
if ( $wasRelative ) {
$bits['scheme'] = '';
$bits['delimiter'] = '//';
}
return $bits;
}

View file

@ -192,6 +192,14 @@ class WfParseUrlTest extends MediaWikiIntegrationTestCase {
'fragment' => 'quux',
]
],
[
'%0Ahttp://example.com',
false,
],
[
'http:///test.com',
false,
]
];
}
}