Refactor URL-parsing global functions to class
The new class, UrlUtils, is usable standalone or as a service. Using it as a service will just automatically load a few settings from site configuration. In addition to just making our code cleaner, this will enable making some of Setup.php's dynamic configuration more sane. Test coverage is all lines except invalid URLs -- I couldn't find any. Bug: T305093 Change-Id: I706ef8a50aafb518e13222719575d274c3583b90
This commit is contained in:
parent
f21b52bec3
commit
472a914c63
16 changed files with 1432 additions and 789 deletions
|
|
@ -111,6 +111,23 @@ because of Phabricator reports.
|
|||
|
||||
=== Deprecations in 1.39 ===
|
||||
* PageProps::getInstance(), deprecated since 1.38, emits deprecations warnings.
|
||||
* The following global functions are deprecated in favor of the listed UrlUtils
|
||||
methods.
|
||||
- wfExpandUrl -> UrlUtils::expand
|
||||
- wfGetServerUrl -> UrlUtils::getServer
|
||||
- wfAssembleUrl -> UrlUtils::assemble
|
||||
- wfRemoveDotSegments -> UrlUtils::removeDotSegments
|
||||
- wfUrlProtocols -> UrlUtils::validProtocols
|
||||
- wfUrlProtocolsWithoutProtRel -> UrlUtils::validAbsoluteProtocols
|
||||
- wfParseUrl -> UrlUtils::parse
|
||||
- wfExpandIRI -> UrlUtils::expandIRI
|
||||
- wfMatchesDomainList -> UrlUtils::matchesDomainList
|
||||
These methods are exact replacements except that
|
||||
1) they return null instead of false or empty string on error (where
|
||||
applicable);
|
||||
2) UrlUtils::validProtocols does not take a parameter (documentation said not
|
||||
to pass one to wfUrlProtocols anyway);
|
||||
3) they use type hints (don't try passing null instead of string, etc.).
|
||||
* …
|
||||
|
||||
=== Other changes in 1.39 ===
|
||||
|
|
|
|||
|
|
@ -175,6 +175,7 @@ class AutoLoader {
|
|||
'MediaWiki\\SpecialPage\\' => __DIR__ . '/specialpage/',
|
||||
'MediaWiki\\Tidy\\' => __DIR__ . '/tidy/',
|
||||
'MediaWiki\\User\\' => __DIR__ . '/user/',
|
||||
'MediaWiki\\Utils\\' => __DIR__ . '/utils/',
|
||||
'MediaWiki\\Widget\\' => __DIR__ . '/widget/',
|
||||
'Wikimedia\\' => __DIR__ . '/libs/',
|
||||
'Wikimedia\\Http\\' => __DIR__ . '/libs/http/',
|
||||
|
|
|
|||
|
|
@ -187,11 +187,14 @@ define( 'APCOND_ISBOT', 9 );
|
|||
|
||||
/** @{
|
||||
* Protocol constants for wfExpandUrl()
|
||||
* PROTO_FALLBACK is @since 1.39
|
||||
*/
|
||||
define( 'PROTO_HTTP', 'http://' );
|
||||
define( 'PROTO_HTTPS', 'https://' );
|
||||
define( 'PROTO_RELATIVE', '//' );
|
||||
define( 'PROTO_CURRENT', null );
|
||||
define( 'PROTO_FALLBACK', null );
|
||||
// Legacy alias for PROTO_FALLBACK from when the current request's protocol was always the fallback
|
||||
define( 'PROTO_CURRENT', PROTO_FALLBACK );
|
||||
define( 'PROTO_CANONICAL', 1 );
|
||||
define( 'PROTO_INTERNAL', 2 );
|
||||
/** @} */
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ use MediaWiki\Logger\LoggerFactory;
|
|||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\ProcOpenError;
|
||||
use MediaWiki\Shell\Shell;
|
||||
use MediaWiki\Utils\UrlUtils;
|
||||
use Wikimedia\AtEase\AtEase;
|
||||
use Wikimedia\ParamValidator\TypeDef\ExpiryDef;
|
||||
use Wikimedia\RequestTimeout\RequestTimeout;
|
||||
|
|
@ -456,6 +457,36 @@ function wfAppendQuery( $url, $query ) {
|
|||
return $url;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Get a UrlUtils from services, or construct your own
|
||||
* @internal
|
||||
* @return UrlUtils from services if initialized, otherwise make one from globals
|
||||
*/
|
||||
function wfGetUrlUtils(): UrlUtils {
|
||||
global $wgServer, $wgCanonicalServer, $wgInternalServer, $wgRequest, $wgHttpsPort,
|
||||
$wgUrlProtocols;
|
||||
|
||||
if ( MediaWikiServices::hasInstance() ) {
|
||||
$services = MediaWikiServices::getInstance();
|
||||
if ( $services->hasService( 'UrlUtils' ) ) {
|
||||
return $services->getUrlUtils();
|
||||
}
|
||||
}
|
||||
|
||||
return new UrlUtils( [
|
||||
// UrlUtils throws if the relevant $wg(|Canonical|Internal) variable is null, but the old
|
||||
// implementations implicitly converted it to an empty string (presumably by mistake).
|
||||
// Preserve the old behavior for compatibility.
|
||||
UrlUtils::SERVER => $wgServer ?? '',
|
||||
UrlUtils::CANONICAL_SERVER => $wgCanonicalServer ?? '',
|
||||
UrlUtils::INTERNAL_SERVER => $wgInternalServer ?? '',
|
||||
UrlUtils::FALLBACK_PROTOCOL => $wgRequest ? $wgRequest->getProtocol()
|
||||
: WebRequest::detectProtocol(),
|
||||
UrlUtils::HTTPS_PORT => $wgHttpsPort,
|
||||
UrlUtils::VALID_PROTOCOLS => $wgUrlProtocols,
|
||||
] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand a potentially local URL to a fully-qualified URL. Assumes $wgServer
|
||||
* is correct.
|
||||
|
|
@ -470,9 +501,7 @@ function wfAppendQuery( $url, $query ) {
|
|||
* For protocol-relative URLs, use the protocol of $wgCanonicalServer
|
||||
* PROTO_INTERNAL: Like PROTO_CANONICAL, but uses $wgInternalServer instead of $wgCanonicalServer
|
||||
*
|
||||
* @todo this won't work with current-path-relative URLs
|
||||
* like "subdir/foo.html", etc.
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::expand()
|
||||
* @param string $url Either fully-qualified or a local path + query
|
||||
* @param string|int|null $defaultProto One of the PROTO_* constants. Determines the
|
||||
* protocol to use if $url or $wgServer is protocol-relative
|
||||
|
|
@ -480,86 +509,20 @@ function wfAppendQuery( $url, $query ) {
|
|||
* no valid URL can be constructed
|
||||
*/
|
||||
function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) {
|
||||
global $wgServer, $wgCanonicalServer, $wgInternalServer, $wgRequest,
|
||||
$wgHttpsPort;
|
||||
if ( $defaultProto === PROTO_CANONICAL ) {
|
||||
$serverUrl = $wgCanonicalServer;
|
||||
} elseif ( $defaultProto === PROTO_INTERNAL && $wgInternalServer !== false ) {
|
||||
// Make $wgInternalServer fall back to $wgServer if not set
|
||||
$serverUrl = $wgInternalServer;
|
||||
} else {
|
||||
$serverUrl = $wgServer;
|
||||
if ( $defaultProto === PROTO_CURRENT ) {
|
||||
$defaultProto = $wgRequest->getProtocol() . '://';
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze $serverUrl to obtain its protocol
|
||||
$bits = wfParseUrl( $serverUrl );
|
||||
$serverHasProto = $bits && $bits['scheme'] != '';
|
||||
|
||||
if ( $defaultProto === PROTO_CANONICAL || $defaultProto === PROTO_INTERNAL ) {
|
||||
if ( $serverHasProto ) {
|
||||
$defaultProto = $bits['scheme'] . '://';
|
||||
} else {
|
||||
// $wgCanonicalServer or $wgInternalServer doesn't have a protocol.
|
||||
// This really isn't supposed to happen. Fall back to HTTP in this
|
||||
// ridiculous case.
|
||||
$defaultProto = PROTO_HTTP;
|
||||
}
|
||||
}
|
||||
|
||||
$defaultProtoWithoutSlashes = $defaultProto !== null ? substr( $defaultProto, 0, -2 ) : '';
|
||||
|
||||
if ( substr( $url, 0, 2 ) == '//' ) {
|
||||
$url = $defaultProtoWithoutSlashes . $url;
|
||||
} elseif ( substr( $url, 0, 1 ) == '/' ) {
|
||||
// If $serverUrl is protocol-relative, prepend $defaultProtoWithoutSlashes,
|
||||
// otherwise leave it alone.
|
||||
if ( $serverHasProto ) {
|
||||
$url = $serverUrl . $url;
|
||||
} else {
|
||||
// If an HTTPS URL is synthesized from a protocol-relative $wgServer, allow the
|
||||
// user to override the port number (T67184)
|
||||
if ( $defaultProto === PROTO_HTTPS && $wgHttpsPort != 443 ) {
|
||||
if ( isset( $bits['port'] ) ) {
|
||||
throw new Exception( 'A protocol-relative $wgServer may not contain a port number' );
|
||||
}
|
||||
$url = $defaultProtoWithoutSlashes . $serverUrl . ':' . $wgHttpsPort . $url;
|
||||
} else {
|
||||
$url = $defaultProtoWithoutSlashes . $serverUrl . $url;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$bits = wfParseUrl( $url );
|
||||
|
||||
if ( $bits && isset( $bits['path'] ) ) {
|
||||
$bits['path'] = wfRemoveDotSegments( $bits['path'] );
|
||||
return wfAssembleUrl( $bits );
|
||||
} elseif ( $bits ) {
|
||||
# No path to expand
|
||||
return $url;
|
||||
} elseif ( substr( $url, 0, 1 ) != '/' ) {
|
||||
# URL is a relative path
|
||||
return wfRemoveDotSegments( $url );
|
||||
}
|
||||
|
||||
# Expanded URL is not valid.
|
||||
return false;
|
||||
return wfGetUrlUtils()->expand( (string)$url, $defaultProto ) ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the wiki's "server", i.e. the protocol and host part of the URL, with a
|
||||
* protocol specified using a PROTO_* constant as in wfExpandUrl()
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::getServer()
|
||||
* @since 1.32
|
||||
* @param string|int|null $proto One of the PROTO_* constants.
|
||||
* @return string The URL
|
||||
*/
|
||||
function wfGetServerUrl( $proto ) {
|
||||
$url = wfExpandUrl( '/', $proto );
|
||||
return substr( $url, 0, -1 );
|
||||
return wfGetUrlUtils()->getServer( $proto ) ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -569,52 +532,13 @@ function wfGetServerUrl( $proto ) {
|
|||
* This is the basic structure used (brackets contain keys for $urlParts):
|
||||
* [scheme][delimiter][user]:[pass]@[host]:[port][path]?[query]#[fragment]
|
||||
*
|
||||
* @todo Need to integrate this into wfExpandUrl (see T34168)
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::assemble()
|
||||
* @since 1.19
|
||||
* @param array $urlParts URL parts, as output from wfParseUrl
|
||||
* @return string URL assembled from its component parts
|
||||
*/
|
||||
function wfAssembleUrl( $urlParts ) {
|
||||
$result = '';
|
||||
|
||||
if ( isset( $urlParts['delimiter'] ) ) {
|
||||
if ( isset( $urlParts['scheme'] ) ) {
|
||||
$result .= $urlParts['scheme'];
|
||||
}
|
||||
|
||||
$result .= $urlParts['delimiter'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['host'] ) ) {
|
||||
if ( isset( $urlParts['user'] ) ) {
|
||||
$result .= $urlParts['user'];
|
||||
if ( isset( $urlParts['pass'] ) ) {
|
||||
$result .= ':' . $urlParts['pass'];
|
||||
}
|
||||
$result .= '@';
|
||||
}
|
||||
|
||||
$result .= $urlParts['host'];
|
||||
|
||||
if ( isset( $urlParts['port'] ) ) {
|
||||
$result .= ':' . $urlParts['port'];
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['path'] ) ) {
|
||||
$result .= $urlParts['path'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['query'] ) && $urlParts['query'] !== '' ) {
|
||||
$result .= '?' . $urlParts['query'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['fragment'] ) ) {
|
||||
$result .= '#' . $urlParts['fragment'];
|
||||
}
|
||||
|
||||
return $result;
|
||||
return wfGetUrlUtils()->assemble( (array)$urlParts );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -622,138 +546,38 @@ function wfAssembleUrl( $urlParts ) {
|
|||
* '/a/./b/../c/' becomes '/a/c/'. For details on the algorithm, please see
|
||||
* RFC3986 section 5.2.4.
|
||||
*
|
||||
* @todo Need to integrate this into wfExpandUrl (see T34168)
|
||||
*
|
||||
* @since 1.19
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::removeDotSegments()
|
||||
* @param string $urlPath URL path, potentially containing dot-segments
|
||||
* @return string URL path with all dot-segments removed
|
||||
*/
|
||||
function wfRemoveDotSegments( $urlPath ) {
|
||||
$output = '';
|
||||
$inputOffset = 0;
|
||||
$inputLength = strlen( $urlPath );
|
||||
|
||||
while ( $inputOffset < $inputLength ) {
|
||||
$prefixLengthOne = substr( $urlPath, $inputOffset, 1 );
|
||||
$prefixLengthTwo = substr( $urlPath, $inputOffset, 2 );
|
||||
$prefixLengthThree = substr( $urlPath, $inputOffset, 3 );
|
||||
$prefixLengthFour = substr( $urlPath, $inputOffset, 4 );
|
||||
$trimOutput = false;
|
||||
|
||||
if ( $prefixLengthTwo == './' ) {
|
||||
# Step A, remove leading "./"
|
||||
$inputOffset += 2;
|
||||
} elseif ( $prefixLengthThree == '../' ) {
|
||||
# Step A, remove leading "../"
|
||||
$inputOffset += 3;
|
||||
} elseif ( ( $prefixLengthTwo == '/.' ) && ( $inputOffset + 2 == $inputLength ) ) {
|
||||
# Step B, replace leading "/.$" with "/"
|
||||
$inputOffset += 1;
|
||||
$urlPath[$inputOffset] = '/';
|
||||
} elseif ( $prefixLengthThree == '/./' ) {
|
||||
# Step B, replace leading "/./" with "/"
|
||||
$inputOffset += 2;
|
||||
} elseif ( $prefixLengthThree == '/..' && ( $inputOffset + 3 == $inputLength ) ) {
|
||||
# Step C, replace leading "/..$" with "/" and
|
||||
# remove last path component in output
|
||||
$inputOffset += 2;
|
||||
$urlPath[$inputOffset] = '/';
|
||||
$trimOutput = true;
|
||||
} elseif ( $prefixLengthFour == '/../' ) {
|
||||
# Step C, replace leading "/../" with "/" and
|
||||
# remove last path component in output
|
||||
$inputOffset += 3;
|
||||
$trimOutput = true;
|
||||
} elseif ( ( $prefixLengthOne == '.' ) && ( $inputOffset + 1 == $inputLength ) ) {
|
||||
# Step D, remove "^.$"
|
||||
$inputOffset += 1;
|
||||
} elseif ( ( $prefixLengthTwo == '..' ) && ( $inputOffset + 2 == $inputLength ) ) {
|
||||
# Step D, remove "^..$"
|
||||
$inputOffset += 2;
|
||||
} else {
|
||||
# Step E, move leading path segment to output
|
||||
if ( $prefixLengthOne == '/' ) {
|
||||
$slashPos = strpos( $urlPath, '/', $inputOffset + 1 );
|
||||
} else {
|
||||
$slashPos = strpos( $urlPath, '/', $inputOffset );
|
||||
}
|
||||
if ( $slashPos === false ) {
|
||||
$output .= substr( $urlPath, $inputOffset );
|
||||
$inputOffset = $inputLength;
|
||||
} else {
|
||||
$output .= substr( $urlPath, $inputOffset, $slashPos - $inputOffset );
|
||||
$inputOffset += $slashPos - $inputOffset;
|
||||
}
|
||||
}
|
||||
|
||||
if ( $trimOutput ) {
|
||||
$slashPos = strrpos( $output, '/' );
|
||||
if ( $slashPos === false ) {
|
||||
$output = '';
|
||||
} else {
|
||||
$output = substr( $output, 0, $slashPos );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $output;
|
||||
return wfGetUrlUtils()->removeDotSegments( (string)$urlPath );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a regular expression of url protocols
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::validProtocols()
|
||||
* @param bool $includeProtocolRelative If false, remove '//' from the returned protocol list.
|
||||
* DO NOT USE this directly, use wfUrlProtocolsWithoutProtRel() instead
|
||||
* @return string
|
||||
*/
|
||||
function wfUrlProtocols( $includeProtocolRelative = true ) {
|
||||
global $wgUrlProtocols;
|
||||
|
||||
// Cache return values separately based on $includeProtocolRelative
|
||||
static $withProtRel = null, $withoutProtRel = null;
|
||||
$cachedValue = $includeProtocolRelative ? $withProtRel : $withoutProtRel;
|
||||
if ( $cachedValue !== null ) {
|
||||
return $cachedValue;
|
||||
}
|
||||
|
||||
// Support old-style $wgUrlProtocols strings, for backwards compatibility
|
||||
// with LocalSettings files from 1.5
|
||||
if ( is_array( $wgUrlProtocols ) ) {
|
||||
$protocols = [];
|
||||
foreach ( $wgUrlProtocols as $protocol ) {
|
||||
// Filter out '//' if !$includeProtocolRelative
|
||||
if ( $includeProtocolRelative || $protocol !== '//' ) {
|
||||
$protocols[] = preg_quote( $protocol, '/' );
|
||||
}
|
||||
}
|
||||
|
||||
$retval = implode( '|', $protocols );
|
||||
} else {
|
||||
// Ignore $includeProtocolRelative in this case
|
||||
// This case exists for pre-1.6 compatibility, and we can safely assume
|
||||
// that '//' won't appear in a pre-1.6 config because protocol-relative
|
||||
// URLs weren't supported until 1.18
|
||||
$retval = $wgUrlProtocols;
|
||||
}
|
||||
|
||||
// Cache return value
|
||||
if ( $includeProtocolRelative ) {
|
||||
$withProtRel = $retval;
|
||||
} else {
|
||||
$withoutProtRel = $retval;
|
||||
}
|
||||
return $retval;
|
||||
$method = $includeProtocolRelative ? 'validProtocols' : 'validAbsoluteProtocols';
|
||||
return wfGetUrlUtils()->$method();
|
||||
}
|
||||
|
||||
/**
|
||||
* Like wfUrlProtocols(), but excludes '//' from the protocol list. Use this if
|
||||
* you need a regex that matches all URL protocols but does not match protocol-
|
||||
* relative URLs
|
||||
* @deprecated since 1.39, use UrlUtils::validAbsoluteProtocols()
|
||||
* @return string
|
||||
*/
|
||||
function wfUrlProtocolsWithoutProtRel() {
|
||||
return wfUrlProtocols( false );
|
||||
return wfGetUrlUtils()->validAbsoluteProtocols();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -766,6 +590,7 @@ function wfUrlProtocolsWithoutProtRel() {
|
|||
* 4) Rejects some invalid URLs that parse_url doesn't, e.g. the empty string or URLs starting with
|
||||
* a line feed character.
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::parse()
|
||||
* @param string $url A URL to parse
|
||||
* @return string[]|bool Bits of the URL in an associative array, or false on failure.
|
||||
* Possible fields:
|
||||
|
|
@ -782,61 +607,7 @@ function wfUrlProtocolsWithoutProtRel() {
|
|||
* - fragment: the part after #, can be missing.
|
||||
*/
|
||||
function wfParseUrl( $url ) {
|
||||
global $wgUrlProtocols; // Allow all protocols defined by the UrlProtocols setting.
|
||||
|
||||
// Protocol-relative URLs are handled really badly by parse_url(). It's so
|
||||
// bad that the easiest way to handle them is to just prepend 'http:' and
|
||||
// strip the protocol out later.
|
||||
$wasRelative = substr( $url, 0, 2 ) == '//';
|
||||
if ( $wasRelative ) {
|
||||
$url = "http:$url";
|
||||
}
|
||||
$bits = parse_url( $url );
|
||||
// parse_url() returns an array without scheme for some invalid URLs, e.g.
|
||||
// parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
|
||||
if ( !$bits || !isset( $bits['scheme'] ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase.
|
||||
$bits['scheme'] = strtolower( $bits['scheme'] );
|
||||
|
||||
// most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
|
||||
if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
|
||||
$bits['delimiter'] = '://';
|
||||
} elseif ( in_array( $bits['scheme'] . ':', $wgUrlProtocols ) ) {
|
||||
$bits['delimiter'] = ':';
|
||||
// parse_url detects for news: and mailto: the host part of an url as path
|
||||
// We have to correct this wrong detection
|
||||
if ( isset( $bits['path'] ) ) {
|
||||
$bits['host'] = $bits['path'];
|
||||
$bits['path'] = '';
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Provide an empty host for eg. file:/// urls (see T30627) */
|
||||
if ( !isset( $bits['host'] ) ) {
|
||||
$bits['host'] = '';
|
||||
|
||||
// See T47069
|
||||
if ( isset( $bits['path'] ) ) {
|
||||
/* parse_url loses the third / for file:///c:/ urls (but not on variants) */
|
||||
if ( substr( $bits['path'], 0, 1 ) !== '/' ) {
|
||||
$bits['path'] = '/' . $bits['path'];
|
||||
}
|
||||
} else {
|
||||
$bits['path'] = '';
|
||||
}
|
||||
}
|
||||
|
||||
// If the URL was protocol-relative, fix scheme and delimiter
|
||||
if ( $wasRelative ) {
|
||||
$bits['scheme'] = '';
|
||||
$bits['delimiter'] = '//';
|
||||
}
|
||||
return $bits;
|
||||
return wfGetUrlUtils()->parse( (string)$url ) ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -844,39 +615,24 @@ function wfParseUrl( $url ) {
|
|||
* encoded non-ASCII Unicode characters with their UTF-8 original forms
|
||||
* for more compact display and legibility for local audiences.
|
||||
*
|
||||
* @todo handle punycode domains too
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::expandIRI()
|
||||
* @param string $url
|
||||
* @return string
|
||||
*/
|
||||
function wfExpandIRI( $url ) {
|
||||
return preg_replace_callback(
|
||||
'/((?:%[89A-F][0-9A-F])+)/i',
|
||||
static function ( array $matches ) {
|
||||
return urldecode( $matches[1] );
|
||||
},
|
||||
wfExpandUrl( $url )
|
||||
);
|
||||
return wfGetUrlUtils()->expandIRI( (string)$url ) ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a given URL has a domain that occurs in a given set of domains
|
||||
*
|
||||
* @deprecated since 1.39, use UrlUtils::expandIRI()
|
||||
* @param string $url
|
||||
* @param array $domains Array of domains (strings)
|
||||
* @return bool True if the host part of $url ends in one of the strings in $domains
|
||||
*/
|
||||
function wfMatchesDomainList( $url, $domains ) {
|
||||
$bits = wfParseUrl( $url );
|
||||
if ( is_array( $bits ) && isset( $bits['host'] ) ) {
|
||||
$host = '.' . $bits['host'];
|
||||
foreach ( (array)$domains as $domain ) {
|
||||
$domain = '.' . $domain;
|
||||
if ( substr( $host, -strlen( $domain ) ) === $domain ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return wfGetUrlUtils()->matchesDomainList( (string)$url, (array)$domains );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ use MediaWiki\User\UserNamePrefixSearch;
|
|||
use MediaWiki\User\UserNameUtils;
|
||||
use MediaWiki\User\UserOptionsLookup;
|
||||
use MediaWiki\User\UserOptionsManager;
|
||||
use MediaWiki\Utils\UrlUtils;
|
||||
use MediaWiki\Watchlist\WatchlistManager;
|
||||
use MessageCache;
|
||||
use MimeAnalyzer;
|
||||
|
|
@ -1778,6 +1779,14 @@ class MediaWikiServices extends ServiceContainer {
|
|||
return $this->getService( 'UploadRevisionImporter' );
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 1.39
|
||||
* @return UrlUtils
|
||||
*/
|
||||
public function getUrlUtils(): UrlUtils {
|
||||
return $this->getService( 'UrlUtils' );
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 1.36
|
||||
* @return UserCache
|
||||
|
|
|
|||
|
|
@ -166,6 +166,7 @@ use MediaWiki\User\UserNamePrefixSearch;
|
|||
use MediaWiki\User\UserNameUtils;
|
||||
use MediaWiki\User\UserOptionsLookup;
|
||||
use MediaWiki\User\UserOptionsManager;
|
||||
use MediaWiki\Utils\UrlUtils;
|
||||
use MediaWiki\Watchlist\WatchlistManager;
|
||||
use Wikimedia\DependencyStore\KeyValueDependencyStore;
|
||||
use Wikimedia\DependencyStore\SqlModuleDependencyStore;
|
||||
|
|
@ -1858,6 +1859,18 @@ return [
|
|||
);
|
||||
},
|
||||
|
||||
'UrlUtils' => static function ( MediaWikiServices $services ): UrlUtils {
|
||||
$config = $services->getMainConfig();
|
||||
return new UrlUtils( [
|
||||
UrlUtils::SERVER => $config->get( 'Server' ),
|
||||
UrlUtils::CANONICAL_SERVER => $config->get( 'CanonicalServer' ),
|
||||
UrlUtils::INTERNAL_SERVER => $config->get( 'InternalServer' ),
|
||||
UrlUtils::FALLBACK_PROTOCOL => RequestContext::getMain()->getRequest()->getProtocol(),
|
||||
UrlUtils::HTTPS_PORT => $config->get( 'HttpsPort' ),
|
||||
UrlUtils::VALID_PROTOCOLS => $config->get( 'UrlProtocols' ),
|
||||
] );
|
||||
},
|
||||
|
||||
'UserCache' => static function ( MediaWikiServices $services ): UserCache {
|
||||
return new UserCache(
|
||||
LoggerFactory::getInstance( 'UserCache' ),
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class MWDebug {
|
|||
protected static $deprecationWarnings = [];
|
||||
|
||||
/**
|
||||
* @var string[] Deprecation filter regexes
|
||||
* @var array Keys are regexes, values are optional callbacks to call if the filter is hit
|
||||
*/
|
||||
protected static $deprecationFilters = [];
|
||||
|
||||
|
|
@ -367,8 +367,11 @@ class MWDebug {
|
|||
* caller column.
|
||||
*/
|
||||
public static function sendRawDeprecated( $msg, $sendToLog = true, $callerFunc = '' ) {
|
||||
foreach ( self::$deprecationFilters as $filter ) {
|
||||
foreach ( self::$deprecationFilters as $filter => $callback ) {
|
||||
if ( preg_match( $filter, $msg ) ) {
|
||||
if ( is_callable( $callback ) ) {
|
||||
$callback();
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -396,12 +399,15 @@ class MWDebug {
|
|||
* Use this to filter deprecation warnings when testing deprecated code.
|
||||
*
|
||||
* @param string $regex
|
||||
* @param ?callable $callback To call if $regex is hit
|
||||
*/
|
||||
public static function filterDeprecationForTest( $regex ) {
|
||||
public static function filterDeprecationForTest(
|
||||
string $regex, ?callable $callback = null
|
||||
): void {
|
||||
if ( !defined( 'MW_PHPUNIT_TEST' ) && !defined( 'MW_PARSER_TEST' ) ) {
|
||||
throw new RuntimeException( __METHOD__ . ' can only be used in tests' );
|
||||
}
|
||||
self::$deprecationFilters[] = $regex;
|
||||
self::$deprecationFilters[$regex] = $callback;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
515
includes/utils/UrlUtils.php
Normal file
515
includes/utils/UrlUtils.php
Normal file
|
|
@ -0,0 +1,515 @@
|
|||
<?php
|
||||
|
||||
namespace MediaWiki\Utils;
|
||||
|
||||
use BadMethodCallException;
|
||||
use Exception;
|
||||
use InvalidArgumentException;
|
||||
use MediaWiki\MainConfigSchema;
|
||||
use MWDebug;
|
||||
|
||||
/**
|
||||
* A service to expand, parse, and otherwise manipulate URLs.
|
||||
*
|
||||
* @since 1.39
|
||||
* @newable
|
||||
*/
|
||||
class UrlUtils {
|
||||
public const SERVER = 'server';
|
||||
public const CANONICAL_SERVER = 'canonicalServer';
|
||||
public const INTERNAL_SERVER = 'internalServer';
|
||||
public const FALLBACK_PROTOCOL = 'fallbackProtocol';
|
||||
public const HTTPS_PORT = 'httpsPort';
|
||||
public const VALID_PROTOCOLS = 'validProtocols';
|
||||
|
||||
/** @var ?string */
|
||||
private $server = null;
|
||||
|
||||
/** @var ?string */
|
||||
private $canonicalServer = null;
|
||||
|
||||
/** @var ?string */
|
||||
private $internalServer = null;
|
||||
/** @var string */
|
||||
private $fallbackProtocol = 'http';
|
||||
|
||||
/** @var int */
|
||||
private $httpsPort = 443;
|
||||
|
||||
/** @var array */
|
||||
private $validProtocols = MainConfigSchema::UrlProtocols['default'];
|
||||
|
||||
/** @var ?string */
|
||||
private $validProtocolsCache = null;
|
||||
|
||||
/** @var ?string */
|
||||
private $validAbsoluteProtocolsCache = null;
|
||||
|
||||
/**
|
||||
* @stable to call
|
||||
* @param array $options All keys are optional, but if you omit SERVER then calling expand()
|
||||
* (and getServer(), expandIRI(), and matchesDomainList()) will throw. Recognized keys:
|
||||
* * self::SERVER: The protocol and server portion of the URLs to expand, with no other parts
|
||||
* (port, path, etc.). Example: 'https://example.com'. Protocol-relative URLs are
|
||||
* allowed.
|
||||
* * self::CANONICAL_SERVER: If SERVER is protocol-relative, this can be set to a
|
||||
* fully-qualified version for use when PROTO_CANONICAL is passed to expand(). Defaults
|
||||
* to SERVER, with 'http:' prepended if SERVER is protocol-relative.
|
||||
* * self::INTERNAL_SERVER: An alternative to SERVER that's used when PROTO_INTERNAL is
|
||||
* passed to expand(). It's intended for sites that have a different server name exposed
|
||||
* to CDNs. Defaults to SERVER.
|
||||
* * self::FALLBACK_PROTOCOL: Used by expand() when no $defaultProto parameter is provided.
|
||||
* Defaults to 'http'. The instance created by ServiceWiring sets this to 'https' if the
|
||||
* current request is detected to be via HTTPS, and 'http' otherwise.
|
||||
* * self::HTTPS_PORT: Defaults to 443. Used when a protocol-relative URL is expanded to
|
||||
* https.
|
||||
* * self::VALID_PROTOCOLS: An array of recognized URL protocols. The default can be found
|
||||
* in MainConfigSchema::UrlProtocols['default'].
|
||||
*/
|
||||
public function __construct( array $options = [] ) {
|
||||
foreach ( $options as $key => $value ) {
|
||||
switch ( $key ) {
|
||||
case self::SERVER:
|
||||
case self::CANONICAL_SERVER:
|
||||
case self::INTERNAL_SERVER:
|
||||
case self::FALLBACK_PROTOCOL:
|
||||
case self::HTTPS_PORT:
|
||||
case self::VALID_PROTOCOLS:
|
||||
$this->$key = $value;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new InvalidArgumentException( "Unrecognized option \"$key\"" );
|
||||
}
|
||||
}
|
||||
|
||||
if ( $this->server !== null ) {
|
||||
if ( $this->canonicalServer === null ) {
|
||||
$this->canonicalServer = $this->expand( $this->server, PROTO_HTTP );
|
||||
}
|
||||
if ( $this->internalServer === null ) {
|
||||
$this->internalServer = $this->server;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand a potentially local URL to a fully-qualified URL.
|
||||
*
|
||||
* The meaning of the PROTO_* constants is as follows:
|
||||
* PROTO_HTTP: Output a URL starting with http://
|
||||
* PROTO_HTTPS: Output a URL starting with https://
|
||||
* PROTO_RELATIVE: Output a URL starting with // (protocol-relative URL)
|
||||
* PROTO_FALLBACK: Output a URL starting with the FALLBACK_PROTOCOL option
|
||||
* PROTO_CURRENT: Legacy alias for PROTO_FALLBACK
|
||||
* PROTO_CANONICAL: For URLs without a domain, like /w/index.php, use CANONICAL_SERVER. For
|
||||
* protocol-relative URLs, use the protocol of CANONICAL_SERVER
|
||||
* PROTO_INTERNAL: Like PROTO_CANONICAL, but uses INTERNAL_SERVER instead of CANONICAL_SERVER
|
||||
*
|
||||
* @todo this won't work with current-path-relative URLs like "subdir/foo.html", etc.
|
||||
*
|
||||
* @throws BadMethodCallException if no server was passed to the constructor
|
||||
* @param string $url Either fully-qualified or a local path + query
|
||||
* @param string|int|null $defaultProto One of the PROTO_* constants. Determines the
|
||||
* protocol to use if $url or SERVER is protocol-relative
|
||||
* @return ?string Fully-qualified URL, current-path-relative URL or false if
|
||||
* no valid URL can be constructed
|
||||
*/
|
||||
public function expand( string $url, $defaultProto = PROTO_FALLBACK ): ?string {
|
||||
if ( $defaultProto === PROTO_CANONICAL ) {
|
||||
$serverUrl = $this->canonicalServer;
|
||||
} elseif ( $defaultProto === PROTO_INTERNAL ) {
|
||||
$serverUrl = $this->internalServer;
|
||||
} else {
|
||||
$serverUrl = $this->server;
|
||||
if ( $defaultProto === PROTO_FALLBACK ) {
|
||||
$defaultProto = $this->fallbackProtocol . '://';
|
||||
}
|
||||
}
|
||||
|
||||
if ( substr( $url, 0, 1 ) === '/' ) {
|
||||
if ( $serverUrl === null ) {
|
||||
throw new BadMethodCallException( 'Cannot call expand() if the appropriate ' .
|
||||
'SERVER/CANONICAL_SERVER/INTERNAL_SERVER option was not passed to the ' .
|
||||
'constructor' );
|
||||
}
|
||||
|
||||
// Analyze $serverUrl to obtain its protocol
|
||||
$bits = $this->parse( $serverUrl );
|
||||
$serverProto = $bits && $bits['scheme'] != '' ? $bits['scheme'] . '://' : null;
|
||||
|
||||
if ( $defaultProto === PROTO_CANONICAL || $defaultProto === PROTO_INTERNAL ) {
|
||||
// Fall back to HTTP in the ridiculous case that CanonicalServer or InternalServer
|
||||
// doesn't have a protocol
|
||||
$defaultProto = $serverProto ?? PROTO_HTTP;
|
||||
}
|
||||
|
||||
$defaultProtoWithoutSlashes = $defaultProto === null ? '' : substr( $defaultProto, 0, -2 );
|
||||
|
||||
if ( substr( $url, 0, 2 ) == '//' ) {
|
||||
$url = $defaultProtoWithoutSlashes . $url;
|
||||
} else {
|
||||
// If $serverUrl is protocol-relative, prepend $defaultProtoWithoutSlashes,
|
||||
// otherwise leave it alone.
|
||||
if ( $serverProto ) {
|
||||
$url = $serverUrl . $url;
|
||||
} else {
|
||||
// If an HTTPS URL is synthesized from a protocol-relative Server, allow the
|
||||
// user to override the port number (T67184)
|
||||
if ( $defaultProto === PROTO_HTTPS && $this->httpsPort != 443 ) {
|
||||
if ( isset( $bits['port'] ) ) {
|
||||
throw new Exception(
|
||||
'A protocol-relative server may not contain a port number' );
|
||||
}
|
||||
$url = "$defaultProtoWithoutSlashes$serverUrl:{$this->httpsPort}$url";
|
||||
} else {
|
||||
$url = "$defaultProtoWithoutSlashes$serverUrl$url";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$bits = $this->parse( $url );
|
||||
|
||||
if ( $bits && isset( $bits['path'] ) ) {
|
||||
$bits['path'] = $this->removeDotSegments( $bits['path'] );
|
||||
return $this->assemble( $bits );
|
||||
} elseif ( $bits ) {
|
||||
# No path to expand
|
||||
return $url;
|
||||
} elseif ( substr( $url, 0, 1 ) != '/' ) {
|
||||
# URL is a relative path
|
||||
return $this->removeDotSegments( $url );
|
||||
}
|
||||
|
||||
# Expanded URL is not valid.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the wiki's "server", i.e. the protocol and host part of the URL, with a
|
||||
* protocol specified using a PROTO_* constant as in expand()
|
||||
*
|
||||
* @throws BadMethodCallException if no server was passed to the constructor
|
||||
* @param string|int|null $proto One of the PROTO_* constants.
|
||||
* @return ?string The URL, or null on failure
|
||||
*/
|
||||
public function getServer( $proto ): ?string {
|
||||
$url = $this->expand( '/', $proto );
|
||||
if ( $url === null ) {
|
||||
return null;
|
||||
}
|
||||
return substr( $url, 0, -1 );
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will reassemble a URL parsed with parse(). This is useful if you need to edit
|
||||
* part of a URL and put it back together.
|
||||
*
|
||||
* This is the basic structure used (brackets contain keys for $urlParts):
|
||||
* [scheme][delimiter][user]:[pass]@[host]:[port][path]?[query]#[fragment]
|
||||
*
|
||||
* @todo Need to integrate this into expand() (see T34168)
|
||||
*
|
||||
* @param array $urlParts URL parts, as output from wfParseUrl
|
||||
* @return string URL assembled from its component parts
|
||||
*/
|
||||
public function assemble( array $urlParts ): string {
|
||||
$result = '';
|
||||
|
||||
if ( isset( $urlParts['delimiter'] ) ) {
|
||||
if ( isset( $urlParts['scheme'] ) ) {
|
||||
$result .= $urlParts['scheme'];
|
||||
}
|
||||
|
||||
$result .= $urlParts['delimiter'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['host'] ) ) {
|
||||
if ( isset( $urlParts['user'] ) ) {
|
||||
$result .= $urlParts['user'];
|
||||
if ( isset( $urlParts['pass'] ) ) {
|
||||
$result .= ':' . $urlParts['pass'];
|
||||
}
|
||||
$result .= '@';
|
||||
}
|
||||
|
||||
$result .= $urlParts['host'];
|
||||
|
||||
if ( isset( $urlParts['port'] ) ) {
|
||||
$result .= ':' . $urlParts['port'];
|
||||
}
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['path'] ) ) {
|
||||
$result .= $urlParts['path'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['query'] ) && $urlParts['query'] !== '' ) {
|
||||
$result .= '?' . $urlParts['query'];
|
||||
}
|
||||
|
||||
if ( isset( $urlParts['fragment'] ) ) {
|
||||
$result .= '#' . $urlParts['fragment'];
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all dot-segments in the provided URL path. For example, '/a/./b/../c/' becomes
|
||||
* '/a/c/'. For details on the algorithm, please see RFC3986 section 5.2.4.
|
||||
*
|
||||
* @todo Need to integrate this into expand() (see T34168)
|
||||
*
|
||||
* @param string $urlPath URL path, potentially containing dot-segments
|
||||
* @return string URL path with all dot-segments removed
|
||||
*/
|
||||
public function removeDotSegments( string $urlPath ): string {
|
||||
$output = '';
|
||||
$inputOffset = 0;
|
||||
$inputLength = strlen( $urlPath );
|
||||
|
||||
while ( $inputOffset < $inputLength ) {
|
||||
$prefixLengthOne = substr( $urlPath, $inputOffset, 1 );
|
||||
$prefixLengthTwo = substr( $urlPath, $inputOffset, 2 );
|
||||
$prefixLengthThree = substr( $urlPath, $inputOffset, 3 );
|
||||
$prefixLengthFour = substr( $urlPath, $inputOffset, 4 );
|
||||
$trimOutput = false;
|
||||
|
||||
if ( $prefixLengthTwo == './' ) {
|
||||
# Step A, remove leading "./"
|
||||
$inputOffset += 2;
|
||||
} elseif ( $prefixLengthThree == '../' ) {
|
||||
# Step A, remove leading "../"
|
||||
$inputOffset += 3;
|
||||
} elseif ( ( $prefixLengthTwo == '/.' ) && ( $inputOffset + 2 == $inputLength ) ) {
|
||||
# Step B, replace leading "/.$" with "/"
|
||||
$inputOffset += 1;
|
||||
$urlPath[$inputOffset] = '/';
|
||||
} elseif ( $prefixLengthThree == '/./' ) {
|
||||
# Step B, replace leading "/./" with "/"
|
||||
$inputOffset += 2;
|
||||
} elseif ( $prefixLengthThree == '/..' && ( $inputOffset + 3 == $inputLength ) ) {
|
||||
# Step C, replace leading "/..$" with "/" and
|
||||
# remove last path component in output
|
||||
$inputOffset += 2;
|
||||
$urlPath[$inputOffset] = '/';
|
||||
$trimOutput = true;
|
||||
} elseif ( $prefixLengthFour == '/../' ) {
|
||||
# Step C, replace leading "/../" with "/" and
|
||||
# remove last path component in output
|
||||
$inputOffset += 3;
|
||||
$trimOutput = true;
|
||||
} elseif ( ( $prefixLengthOne == '.' ) && ( $inputOffset + 1 == $inputLength ) ) {
|
||||
# Step D, remove "^.$"
|
||||
$inputOffset += 1;
|
||||
} elseif ( ( $prefixLengthTwo == '..' ) && ( $inputOffset + 2 == $inputLength ) ) {
|
||||
# Step D, remove "^..$"
|
||||
$inputOffset += 2;
|
||||
} else {
|
||||
# Step E, move leading path segment to output
|
||||
if ( $prefixLengthOne == '/' ) {
|
||||
$slashPos = strpos( $urlPath, '/', $inputOffset + 1 );
|
||||
} else {
|
||||
$slashPos = strpos( $urlPath, '/', $inputOffset );
|
||||
}
|
||||
if ( $slashPos === false ) {
|
||||
$output .= substr( $urlPath, $inputOffset );
|
||||
$inputOffset = $inputLength;
|
||||
} else {
|
||||
$output .= substr( $urlPath, $inputOffset, $slashPos - $inputOffset );
|
||||
$inputOffset += $slashPos - $inputOffset;
|
||||
}
|
||||
}
|
||||
|
||||
if ( $trimOutput ) {
|
||||
$slashPos = strrpos( $output, '/' );
|
||||
if ( $slashPos === false ) {
|
||||
$output = '';
|
||||
} else {
|
||||
$output = substr( $output, 0, $slashPos );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a regular expression of recognized URL protocols
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function validProtocols(): string {
|
||||
if ( $this->validProtocolsCache !== null ) {
|
||||
return $this->validProtocolsCache; // @codeCoverageIgnore
|
||||
}
|
||||
$this->validProtocolsCache = $this->validProtocolsInternal( true );
|
||||
return $this->validProtocolsCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like validProtocols(), but excludes '//' from the protocol list. Use this if you need a
|
||||
* regex that matches all URL protocols but does not match protocol-relative URLs
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function validAbsoluteProtocols(): string {
|
||||
if ( $this->validAbsoluteProtocolsCache !== null ) {
|
||||
return $this->validAbsoluteProtocolsCache; // @codeCoverageIgnore
|
||||
}
|
||||
$this->validAbsoluteProtocolsCache = $this->validProtocolsInternal( false );
|
||||
return $this->validAbsoluteProtocolsCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a regular expression of URL protocols
|
||||
*
|
||||
* @param bool $includeProtocolRelative If false, remove '//' from the returned protocol list.
|
||||
* @return string
|
||||
*/
|
||||
private function validProtocolsInternal( bool $includeProtocolRelative ): string {
|
||||
if ( !is_array( $this->validProtocols ) ) {
|
||||
MWDebug::deprecated( '$wgUrlProtocols that is not an array', '1.39' );
|
||||
return (string)$this->validProtocols;
|
||||
}
|
||||
|
||||
$protocols = [];
|
||||
foreach ( $this->validProtocols as $protocol ) {
|
||||
// Filter out '//' if !$includeProtocolRelative
|
||||
if ( $includeProtocolRelative || $protocol !== '//' ) {
|
||||
$protocols[] = preg_quote( $protocol, '/' );
|
||||
}
|
||||
}
|
||||
|
||||
return implode( '|', $protocols );
|
||||
}
|
||||
|
||||
/**
|
||||
* parse_url() work-alike, but non-broken. Differences:
|
||||
*
|
||||
* 1) Handles protocols that don't use :// (e.g., mailto: and news:, as well as
|
||||
* protocol-relative URLs) correctly.
|
||||
* 2) Adds a "delimiter" element to the array (see (2)).
|
||||
* 3) Verifies that the protocol is on the UrlProtocols allowed list.
|
||||
* 4) Rejects some invalid URLs that parse_url doesn't, e.g. the empty string or URLs starting
|
||||
* with a line feed character.
|
||||
*
|
||||
* @param string $url A URL to parse
|
||||
* @return ?string[] Bits of the URL in an associative array, or null on failure.
|
||||
* Possible fields:
|
||||
* - scheme: URI scheme (protocol), e.g. 'http', 'mailto'. Lowercase, always present, but can
|
||||
* be an empty string for protocol-relative URLs.
|
||||
* - delimiter: either '://', ':' or '//'. Always present.
|
||||
* - host: domain name / IP. Always present, but could be an empty string, e.g. for file: URLs.
|
||||
* - port: port number. Will be missing when port is not explicitly specified.
|
||||
* - user: user name, e.g. for HTTP Basic auth URLs such as http://user:pass@example.com/
|
||||
* Missing when there is no username.
|
||||
* - pass: password, same as above.
|
||||
* - path: path including the leading /. Will be missing when empty (e.g. 'http://example.com')
|
||||
* - query: query string (as a string; see wfCgiToArray() for parsing it), can be missing.
|
||||
* - fragment: the part after #, can be missing.
|
||||
*/
|
||||
public function parse( string $url ): ?array {
|
||||
// Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the
|
||||
// easiest way to handle them is to just prepend 'http:' and strip the protocol out later.
|
||||
$wasRelative = substr( $url, 0, 2 ) == '//';
|
||||
if ( $wasRelative ) {
|
||||
$url = "http:$url";
|
||||
}
|
||||
$bits = parse_url( $url );
|
||||
// parse_url() returns an array without scheme for some invalid URLs, e.g.
|
||||
// parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
|
||||
if ( !$bits || !isset( $bits['scheme'] ) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase.
|
||||
$bits['scheme'] = strtolower( $bits['scheme'] );
|
||||
|
||||
// most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
|
||||
if ( in_array( $bits['scheme'] . '://', $this->validProtocols ) ) {
|
||||
$bits['delimiter'] = '://';
|
||||
} elseif ( in_array( $bits['scheme'] . ':', $this->validProtocols ) ) {
|
||||
$bits['delimiter'] = ':';
|
||||
// parse_url detects for news: and mailto: the host part of an url as path
|
||||
// We have to correct this wrong detection
|
||||
if ( isset( $bits['path'] ) ) {
|
||||
$bits['host'] = $bits['path'];
|
||||
$bits['path'] = '';
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Provide an empty host for, e.g., file:/// urls (see T30627)
|
||||
if ( !isset( $bits['host'] ) ) {
|
||||
$bits['host'] = '';
|
||||
|
||||
// See T47069
|
||||
if ( isset( $bits['path'] ) ) {
|
||||
/* parse_url loses the third / for file:///c:/ urls (but not on variants) */
|
||||
if ( substr( $bits['path'], 0, 1 ) !== '/' ) {
|
||||
$bits['path'] = '/' . $bits['path'];
|
||||
}
|
||||
} else {
|
||||
$bits['path'] = '';
|
||||
}
|
||||
}
|
||||
|
||||
// If the URL was protocol-relative, fix scheme and delimiter
|
||||
if ( $wasRelative ) {
|
||||
$bits['scheme'] = '';
|
||||
$bits['delimiter'] = '//';
|
||||
}
|
||||
return $bits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take a URL, make sure it's expanded to fully qualified, and replace any encoded non-ASCII
|
||||
* Unicode characters with their UTF-8 original forms for more compact display and legibility
|
||||
* for local audiences.
|
||||
*
|
||||
* @todo handle punycode domains too
|
||||
*
|
||||
* @throws BadMethodCallException if no server was passed to the constructor
|
||||
* @param string $url
|
||||
* @return ?string
|
||||
*/
|
||||
public function expandIRI( string $url ): ?string {
|
||||
$expanded = $this->expand( $url );
|
||||
if ( $expanded === null ) {
|
||||
return null;
|
||||
}
|
||||
return preg_replace_callback(
|
||||
'/((?:%[89A-F][0-9A-F])+)/i',
|
||||
static function ( array $matches ) {
|
||||
return urldecode( $matches[1] );
|
||||
},
|
||||
$expanded
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a given URL has a domain that occurs in a given set of domains
|
||||
*
|
||||
* @throws BadMethodCallException if no server was passed to the constructor
|
||||
* @param string $url
|
||||
* @param array $domains Array of domains (strings)
|
||||
* @return bool True if the host part of $url ends in one of the strings in $domains
|
||||
*/
|
||||
public function matchesDomainList( string $url, array $domains ): bool {
|
||||
$bits = $this->parse( $url );
|
||||
if ( is_array( $bits ) && isset( $bits['host'] ) ) {
|
||||
$host = '.' . $bits['host'];
|
||||
foreach ( $domains as $domain ) {
|
||||
$domain = '.' . $domain;
|
||||
if ( substr( $host, -strlen( $domain ) ) === $domain ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -248,6 +248,9 @@ $wgAutoloadClasses += [
|
|||
# tests/phpunit/unit/includes/libs/filebackend/fsfile
|
||||
'TempFSFileTestTrait' => "$testDir/phpunit/unit/includes/libs/filebackend/fsfile/TempFSFileTestTrait.php",
|
||||
|
||||
# tests/phpunit/unit/includes/utils
|
||||
'UrlUtilsTest' => "$testDir/phpunit/unit/includes/utils/UrlUtilsTest.php",
|
||||
|
||||
# tests/phpunit/includes/unit/password
|
||||
'PasswordTestCase' => "$testDir/phpunit/unit/includes/password/PasswordTestCase.php",
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,12 @@ trait MediaWikiTestCaseTrait {
|
|||
/** @var int|null */
|
||||
private $originalPhpErrorFilter;
|
||||
|
||||
/** @var array */
|
||||
private $expectedDeprecations = [];
|
||||
|
||||
/** @var array */
|
||||
private $actualDeprecations = [];
|
||||
|
||||
/**
|
||||
* Returns a PHPUnit constraint that matches (with `===`) anything other than a fixed set of values.
|
||||
* This can be used to list accepted values, e.g.
|
||||
|
|
@ -143,6 +149,32 @@ trait MediaWikiTestCaseTrait {
|
|||
MWDebug::filterDeprecationForTest( $regex );
|
||||
}
|
||||
|
||||
/**
|
||||
* Expect a deprecation notice, but suppress it and continue operation so we can test that the
|
||||
* deprecated functionality works as intended for compatibility.
|
||||
*
|
||||
* @since 1.39
|
||||
*
|
||||
* @param string $regex Deprecation message that must be triggered.
|
||||
*/
|
||||
public function expectDeprecationAndContinue( string $regex ): void {
|
||||
$this->expectedDeprecations[] = $regex;
|
||||
MWDebug::filterDeprecationForTest( $regex, function () use ( $regex ): void {
|
||||
$this->actualDeprecations[] = $regex;
|
||||
} );
|
||||
}
|
||||
|
||||
/**
|
||||
* @after
|
||||
*/
|
||||
public function checkExpectedDeprecationsOnTearDown(): void {
|
||||
if ( $this->expectedDeprecations ) {
|
||||
$this->assertSame( [],
|
||||
array_diff( $this->expectedDeprecations, $this->actualDeprecations ),
|
||||
'Expected deprecation warning(s) were not emitted' );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether file contains given data.
|
||||
* @param string $fileName
|
||||
|
|
|
|||
|
|
@ -498,62 +498,14 @@ class GlobalTest extends MediaWikiIntegrationTestCase {
|
|||
* @dataProvider provideWfMatchesDomainList
|
||||
* @covers ::wfMatchesDomainList
|
||||
*/
|
||||
public function testWfMatchesDomainList( $url, $domains, $expected, $description ) {
|
||||
public function testWfMatchesDomainList( $url, $domains, $expected ) {
|
||||
$actual = wfMatchesDomainList( $url, $domains );
|
||||
$this->assertEquals( $expected, $actual, $description );
|
||||
$this->assertEquals( $expected, $actual );
|
||||
}
|
||||
|
||||
public static function provideWfMatchesDomainList() {
|
||||
$a = [];
|
||||
$protocols = [ 'HTTP' => 'http:', 'HTTPS' => 'https:', 'protocol-relative' => '' ];
|
||||
foreach ( $protocols as $pDesc => $p ) {
|
||||
$a = array_merge( $a, [
|
||||
[
|
||||
"$p//www.example.com",
|
||||
[],
|
||||
false,
|
||||
"No matches for empty domains array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//www.example.com",
|
||||
[ 'www.example.com' ],
|
||||
true,
|
||||
"Exact match in domains array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//www.example.com",
|
||||
[ 'example.com' ],
|
||||
true,
|
||||
"Match without subdomain in domains array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//www.example2.com",
|
||||
[ 'www.example.com', 'www.example2.com', 'www.example3.com' ],
|
||||
true,
|
||||
"Exact match with other domains in array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//www.example2.com",
|
||||
[ 'example.com', 'example2.com', 'example3,com' ],
|
||||
true,
|
||||
"Match without subdomain with other domains in array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//www.example4.com",
|
||||
[ 'example.com', 'example2.com', 'example3,com' ],
|
||||
false,
|
||||
"Domain not in array, $pDesc URL"
|
||||
],
|
||||
[
|
||||
"$p//nds-nl.wikipedia.org",
|
||||
[ 'nl.wikipedia.org' ],
|
||||
false,
|
||||
"Non-matching substring of domain, $pDesc URL"
|
||||
],
|
||||
] );
|
||||
}
|
||||
|
||||
return $a;
|
||||
// Same tests as the UrlUtils method to ensure they don't fall out of sync
|
||||
return UrlUtilsTest::provideMatchesDomainList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
<?php
|
||||
use MediaWiki\Utils\UrlUtils;
|
||||
|
||||
/**
|
||||
* @group GlobalFunctions
|
||||
* @covers ::wfExpandUrl
|
||||
|
|
@ -16,100 +18,18 @@ class WfExpandUrlTest extends MediaWikiIntegrationTestCase {
|
|||
}
|
||||
|
||||
public static function provideExpandableUrls() {
|
||||
$modes = [ 'http', 'https' ];
|
||||
$servers = [
|
||||
'http://example.com',
|
||||
'https://example.com',
|
||||
'//example.com',
|
||||
];
|
||||
$defaultProtos = [
|
||||
'http' => PROTO_HTTP,
|
||||
'https' => PROTO_HTTPS,
|
||||
'protocol-relative' => PROTO_RELATIVE,
|
||||
'current' => PROTO_CURRENT,
|
||||
'canonical' => PROTO_CANONICAL,
|
||||
];
|
||||
|
||||
foreach ( $modes as $currentProto ) {
|
||||
foreach ( $servers as $server ) {
|
||||
foreach ( $modes as $canServerMode ) {
|
||||
$canServer = "$canServerMode://example2.com";
|
||||
$conf = [
|
||||
'wgServer' => $server,
|
||||
'wgCanonicalServer' => $canServer,
|
||||
'wgHttpsPort' => 443,
|
||||
];
|
||||
foreach ( $defaultProtos as $protoDesc => $defaultProto ) {
|
||||
$case = "current: $currentProto, default: $protoDesc, server: $server, canonical: $canServer";
|
||||
yield "No-op fully-qualified http URL ($case)" => [
|
||||
'http://example.com',
|
||||
$conf, $currentProto, $defaultProto,
|
||||
'http://example.com',
|
||||
];
|
||||
yield "No-op fully-qualified https URL ($case)" => [
|
||||
'https://example.com',
|
||||
$conf, $currentProto, $defaultProto,
|
||||
'https://example.com',
|
||||
];
|
||||
yield "No-op rootless path-only URL ($case)" => [
|
||||
"wiki/FooBar",
|
||||
$conf, $currentProto, $defaultProto,
|
||||
'wiki/FooBar',
|
||||
];
|
||||
|
||||
// Determine expected protocol
|
||||
if ( $protoDesc === 'protocol-relative' ) {
|
||||
$p = '';
|
||||
} elseif ( $protoDesc === 'current' ) {
|
||||
$p = "$currentProto:";
|
||||
} elseif ( $protoDesc === 'canonical' ) {
|
||||
$p = "$canServerMode:";
|
||||
} else {
|
||||
$p = $protoDesc . ':';
|
||||
}
|
||||
yield "Expand protocol-relative URL ($case)" => [
|
||||
'//wikipedia.org',
|
||||
$conf, $currentProto, $defaultProto,
|
||||
"$p//wikipedia.org",
|
||||
];
|
||||
|
||||
// Determine expected server name
|
||||
if ( $protoDesc === 'canonical' ) {
|
||||
$srv = $canServer;
|
||||
} elseif ( $server === '//example.com' ) {
|
||||
$srv = $p . $server;
|
||||
} else {
|
||||
$srv = $server;
|
||||
}
|
||||
yield "Expand path that starts with slash ($case)" => [
|
||||
'/wiki/FooBar',
|
||||
$conf, $currentProto, $defaultProto,
|
||||
"$srv/wiki/FooBar",
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Same tests as the UrlUtils method to ensure they don't fall out of sync
|
||||
foreach (
|
||||
UrlUtilsTest::provideExpand() as $key => [ $input, $options, $defaultProto, $expected ]
|
||||
) {
|
||||
$conf = [
|
||||
'wgServer' => $options[UrlUtils::SERVER] ?? null,
|
||||
'wgCanonicalServer' => $options[UrlUtils::CANONICAL_SERVER] ?? null,
|
||||
'wgInternalServer' => $options[UrlUtils::INTERNAL_SERVER] ?? null,
|
||||
'wgHttpsPort' => $options[UrlUtils::HTTPS_PORT] ?? 443,
|
||||
];
|
||||
yield $key =>
|
||||
[ $input, $conf, $options[UrlUtils::FALLBACK_PROTOCOL], $defaultProto, $expected ];
|
||||
}
|
||||
|
||||
$confRel111 = [
|
||||
'wgServer' => '//wiki.example.com',
|
||||
'wgCanonicalServer' => 'http://wiki.example.com',
|
||||
'wgHttpsPort' => 111,
|
||||
];
|
||||
yield "No-op foreign URL, ignore custom port config" => [
|
||||
'https://foreign.example.com/foo',
|
||||
$confRel111, 'https', PROTO_HTTPS,
|
||||
'https://foreign.example.com/foo',
|
||||
];
|
||||
yield "No-op foreign URL, preserve existing port" => [
|
||||
'https://foreign.example.com:222/foo',
|
||||
$confRel111, 'https', PROTO_HTTPS,
|
||||
'https://foreign.example.com:222/foo',
|
||||
];
|
||||
yield "Expand path with custom HTTPS port" => [
|
||||
'/foo',
|
||||
$confRel111, 'https', PROTO_HTTPS,
|
||||
'https://wiki.example.com:111/foo',
|
||||
];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,177 +49,9 @@ class WfParseUrlTest extends MediaWikiIntegrationTestCase {
|
|||
|
||||
/**
|
||||
* Provider of URLs for testing wfParseUrl()
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function provideURLs() {
|
||||
return [
|
||||
[
|
||||
'//example.org',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
],
|
||||
[
|
||||
'http://example.org',
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://example.org',
|
||||
[
|
||||
'scheme' => 'https',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
],
|
||||
[
|
||||
'http://id:key@example.org:123/path?foo=bar#baz',
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.org',
|
||||
'port' => 123,
|
||||
'path' => '/path',
|
||||
'query' => 'foo=bar',
|
||||
'fragment' => 'baz',
|
||||
]
|
||||
],
|
||||
[
|
||||
'file://example.org/etc/php.ini',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
'path' => '/etc/php.ini',
|
||||
]
|
||||
],
|
||||
[
|
||||
'file:///etc/php.ini',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => '',
|
||||
'path' => '/etc/php.ini',
|
||||
]
|
||||
],
|
||||
[
|
||||
'file:///c:/',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => '',
|
||||
'path' => '/c:/',
|
||||
]
|
||||
],
|
||||
[
|
||||
'mailto:id@example.org',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => 'id@example.org',
|
||||
'path' => '',
|
||||
]
|
||||
],
|
||||
[
|
||||
'mailto:id@example.org?subject=Foo',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => 'id@example.org',
|
||||
'path' => '',
|
||||
'query' => 'subject=Foo',
|
||||
]
|
||||
],
|
||||
[
|
||||
'mailto:?subject=Foo',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => '',
|
||||
'path' => '',
|
||||
'query' => 'subject=Foo',
|
||||
]
|
||||
],
|
||||
[
|
||||
'invalid://test/',
|
||||
false
|
||||
],
|
||||
// T212067
|
||||
[
|
||||
'//evil.com?example.org/foo/bar',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org/foo/bar',
|
||||
]
|
||||
],
|
||||
[
|
||||
'//evil.com?example.org/foo/bar?baz#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org/foo/bar?baz',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
],
|
||||
[
|
||||
'//evil.com?example.org?baz#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org?baz',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
],
|
||||
[
|
||||
'//evil.com?example.org#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
],
|
||||
[
|
||||
'%0Ahttp://example.com',
|
||||
false,
|
||||
],
|
||||
[
|
||||
'http:///test.com',
|
||||
false,
|
||||
],
|
||||
// T294559
|
||||
[
|
||||
'//xy.wikimedia.org/wiki/Foo:1234',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'xy.wikimedia.org',
|
||||
'path' => '/wiki/Foo:1234'
|
||||
]
|
||||
],
|
||||
[
|
||||
'//xy.wikimedia.org:8888/wiki/Foo:1234',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'xy.wikimedia.org',
|
||||
'path' => '/wiki/Foo:1234',
|
||||
'port' => 8888,
|
||||
]
|
||||
],
|
||||
];
|
||||
// Same tests as the UrlUtils method to ensure they don't fall out of sync
|
||||
return UrlUtilsTest::provideParse();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,111 +18,9 @@ class WfAssembleUrlTest extends MediaWikiUnitTestCase {
|
|||
|
||||
/**
|
||||
* Provider of URL parts for testing wfAssembleUrl()
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function provideURLParts() {
|
||||
$schemes = [
|
||||
'' => [],
|
||||
'//' => [
|
||||
'delimiter' => '//',
|
||||
],
|
||||
'http://' => [
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
],
|
||||
];
|
||||
|
||||
$hosts = [
|
||||
'' => [],
|
||||
'example.com' => [
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'example.com:123' => [
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
'id@example.com' => [
|
||||
'user' => 'id',
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'id@example.com:123' => [
|
||||
'user' => 'id',
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
'id:key@example.com' => [
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'id:key@example.com:123' => [
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
];
|
||||
|
||||
$cases = [];
|
||||
foreach ( $schemes as $scheme => $schemeParts ) {
|
||||
foreach ( $hosts as $host => $hostParts ) {
|
||||
foreach ( [ '', '/', '/0', '/path' ] as $path ) {
|
||||
foreach ( [ '', '0', 'query' ] as $query ) {
|
||||
foreach ( [ '', '0', 'fragment' ] as $fragment ) {
|
||||
$parts = array_merge(
|
||||
$schemeParts,
|
||||
$hostParts
|
||||
);
|
||||
$url = $scheme .
|
||||
$host .
|
||||
$path;
|
||||
|
||||
if ( $path !== '' ) {
|
||||
$parts['path'] = $path;
|
||||
}
|
||||
if ( $query !== '' ) {
|
||||
$parts['query'] = $query;
|
||||
$url .= '?' . $query;
|
||||
}
|
||||
if ( $fragment !== '' ) {
|
||||
$parts['fragment'] = $fragment;
|
||||
$url .= '#' . $fragment;
|
||||
}
|
||||
|
||||
$cases[] = [
|
||||
$parts,
|
||||
$url,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$complexURL = 'http://id:key@example.org:321' .
|
||||
'/over/there?name=ferret&foo=bar#nose';
|
||||
$cases[] = [
|
||||
wfParseUrl( $complexURL ),
|
||||
$complexURL,
|
||||
];
|
||||
|
||||
// Account for parse_url() on PHP >= 8 returning an empty query field
|
||||
// for URLs ending with '?' such as "http://url.with.empty.query/foo?"
|
||||
// (T268852)
|
||||
$urlWithEmptyQuery = [
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'host' => 'url.with.empty.query',
|
||||
'path' => '/foo',
|
||||
'query' => '',
|
||||
];
|
||||
|
||||
$cases[] = [
|
||||
$urlWithEmptyQuery,
|
||||
'http://url.with.empty.query/foo'
|
||||
];
|
||||
|
||||
return $cases;
|
||||
// Same tests as the UrlUtils method to ensure they don't fall out of sync
|
||||
return UrlUtilsTest::provideAssemble();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,76 +18,9 @@ class WfRemoveDotSegmentsTest extends MediaWikiUnitTestCase {
|
|||
|
||||
/**
|
||||
* Provider of URL paths for testing wfRemoveDotSegments()
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function providePaths() {
|
||||
return [
|
||||
[ '/a/b/c/./../../g', '/a/g' ],
|
||||
[ 'mid/content=5/../6', 'mid/6' ],
|
||||
[ '/a//../b', '/a/b' ],
|
||||
[ '/.../a', '/.../a' ],
|
||||
[ '.../a', '.../a' ],
|
||||
[ '', '' ],
|
||||
[ '/', '/' ],
|
||||
[ '//', '//' ],
|
||||
[ '.', '' ],
|
||||
[ '..', '' ],
|
||||
[ '...', '...' ],
|
||||
[ '/.', '/' ],
|
||||
[ '/..', '/' ],
|
||||
[ './', '' ],
|
||||
[ '../', '' ],
|
||||
[ './a', 'a' ],
|
||||
[ '../a', 'a' ],
|
||||
[ '../../a', 'a' ],
|
||||
[ '.././a', 'a' ],
|
||||
[ './../a', 'a' ],
|
||||
[ '././a', 'a' ],
|
||||
[ '../../', '' ],
|
||||
[ '.././', '' ],
|
||||
[ './../', '' ],
|
||||
[ '././', '' ],
|
||||
[ '../..', '' ],
|
||||
[ '../.', '' ],
|
||||
[ './..', '' ],
|
||||
[ './.', '' ],
|
||||
[ '/../../a', '/a' ],
|
||||
[ '/.././a', '/a' ],
|
||||
[ '/./../a', '/a' ],
|
||||
[ '/././a', '/a' ],
|
||||
[ '/../../', '/' ],
|
||||
[ '/.././', '/' ],
|
||||
[ '/./../', '/' ],
|
||||
[ '/././', '/' ],
|
||||
[ '/../..', '/' ],
|
||||
[ '/../.', '/' ],
|
||||
[ '/./..', '/' ],
|
||||
[ '/./.', '/' ],
|
||||
[ 'b/../../a', '/a' ],
|
||||
[ 'b/.././a', '/a' ],
|
||||
[ 'b/./../a', '/a' ],
|
||||
[ 'b/././a', 'b/a' ],
|
||||
[ 'b/../../', '/' ],
|
||||
[ 'b/.././', '/' ],
|
||||
[ 'b/./../', '/' ],
|
||||
[ 'b/././', 'b/' ],
|
||||
[ 'b/../..', '/' ],
|
||||
[ 'b/../.', '/' ],
|
||||
[ 'b/./..', '/' ],
|
||||
[ 'b/./.', 'b/' ],
|
||||
[ '/b/../../a', '/a' ],
|
||||
[ '/b/.././a', '/a' ],
|
||||
[ '/b/./../a', '/a' ],
|
||||
[ '/b/././a', '/b/a' ],
|
||||
[ '/b/../../', '/' ],
|
||||
[ '/b/.././', '/' ],
|
||||
[ '/b/./../', '/' ],
|
||||
[ '/b/././', '/b/' ],
|
||||
[ '/b/../..', '/' ],
|
||||
[ '/b/../.', '/' ],
|
||||
[ '/b/./..', '/' ],
|
||||
[ '/b/./.', '/b/' ],
|
||||
];
|
||||
// Same tests as the UrlUtils method to ensure they don't fall out of sync
|
||||
return UrlUtilsTest::provideRemoveDotSegments();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
753
tests/phpunit/unit/includes/utils/UrlUtilsTest.php
Normal file
753
tests/phpunit/unit/includes/utils/UrlUtilsTest.php
Normal file
|
|
@ -0,0 +1,753 @@
|
|||
<?php
|
||||
use MediaWiki\Utils\UrlUtils;
|
||||
|
||||
/**
|
||||
* @coversDefaultClass \MediaWiki\Utils\UrlUtils
|
||||
* @covers ::__construct
|
||||
*/
|
||||
class UrlUtilsTest extends MediaWikiUnitTestCase {
|
||||
private const DEFAULT_PROTOS = [
|
||||
'http' => PROTO_HTTP,
|
||||
'https' => PROTO_HTTPS,
|
||||
'protocol-relative' => PROTO_RELATIVE,
|
||||
'current' => PROTO_CURRENT,
|
||||
'fallback' => PROTO_FALLBACK,
|
||||
'canonical' => PROTO_CANONICAL,
|
||||
'internal' => PROTO_INTERNAL,
|
||||
];
|
||||
|
||||
public function testConstructError(): void {
|
||||
$this->expectException( InvalidArgumentException::class );
|
||||
$this->expectExceptionMessage( 'Unrecognized option "unrecognized"' );
|
||||
new UrlUtils( [ 'unrecognized' => true ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::expand
|
||||
* @dataProvider provideExpandException
|
||||
* @param array $options
|
||||
* @param string|int|null $defaultProto
|
||||
* @param string $expectedClass Expected class of exception
|
||||
* @param string $expectedMsg Expected exception message
|
||||
*/
|
||||
public function testExpandException(
|
||||
array $options, $defaultProto, string $expectedClass, string $expectedMsg
|
||||
): void {
|
||||
$this->expectException( $expectedClass );
|
||||
$this->expectExceptionMessage( $expectedMsg );
|
||||
|
||||
$urlUtils = new UrlUtils( $options );
|
||||
$urlUtils->expand( '/', $defaultProto );
|
||||
}
|
||||
|
||||
public static function provideExpandException(): Generator {
|
||||
foreach ( self::DEFAULT_PROTOS as $protoDesc => $defaultProto ) {
|
||||
$options = [
|
||||
UrlUtils::SERVER => null,
|
||||
UrlUtils::CANONICAL_SERVER => 'http://example.com',
|
||||
UrlUtils::INTERNAL_SERVER => 'http://example.com',
|
||||
];
|
||||
|
||||
if ( $defaultProto === PROTO_CANONICAL ) {
|
||||
$options[UrlUtils::CANONICAL_SERVER] = null;
|
||||
} elseif ( $defaultProto === PROTO_INTERNAL ) {
|
||||
$options[UrlUtils::INTERNAL_SERVER] = null;
|
||||
}
|
||||
|
||||
yield "defaultProto $protoDesc with appropriate options unset" => [
|
||||
$options, $defaultProto, BadMethodCallException::class,
|
||||
'Cannot call expand() if the appropriate SERVER/CANONICAL_SERVER/INTERNAL_SERVER' .
|
||||
' option was not passed to the constructor',
|
||||
];
|
||||
}
|
||||
|
||||
yield 'protocol-relative server with port and custom HTTPS port' => [
|
||||
[ UrlUtils::SERVER => '//example.com:123', UrlUtils::HTTPS_PORT => 456 ],
|
||||
PROTO_HTTPS,
|
||||
Exception::class,
|
||||
'A protocol-relative server may not contain a port number',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::expand
|
||||
* @dataProvider provideExpand
|
||||
* @param string $input
|
||||
* @param array $options
|
||||
* @param string|int|null $defaultProto
|
||||
* @param ?string $expected
|
||||
*/
|
||||
public function testExpand(
|
||||
string $input, array $options, $defaultProto, ?string $expected
|
||||
): void {
|
||||
$urlUtils = new UrlUtils( $options );
|
||||
$this->assertSame( $expected, $urlUtils->expand( $input, $defaultProto ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to reduce nesting in provideExpand()
|
||||
*
|
||||
* @return Generator
|
||||
*/
|
||||
private static function provideExpandCases(): Generator {
|
||||
$servers = [
|
||||
'http://example.com',
|
||||
'https://example.com',
|
||||
'//example.com',
|
||||
];
|
||||
|
||||
foreach ( $servers as $server ) {
|
||||
foreach ( [ 'http://example2.com', 'https://example2.com' ] as $canServer ) {
|
||||
foreach ( [ 'http://internal.com', 'https://internal.com' ] as $intServer ) {
|
||||
foreach ( [ 'http', 'https' ] as $fallbackProto ) {
|
||||
foreach ( [ 111, 443 ] as $httpsPort ) {
|
||||
$options = [
|
||||
UrlUtils::SERVER => $server,
|
||||
UrlUtils::CANONICAL_SERVER => $canServer,
|
||||
UrlUtils::INTERNAL_SERVER => $intServer,
|
||||
UrlUtils::FALLBACK_PROTOCOL => $fallbackProto,
|
||||
UrlUtils::HTTPS_PORT => $httpsPort,
|
||||
];
|
||||
foreach ( self::DEFAULT_PROTOS as $protoDesc => $defaultProto ) {
|
||||
yield [ $options, $protoDesc, $defaultProto ];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static function provideExpand(): Generator {
|
||||
$modes = [ 'http', 'https' ];
|
||||
|
||||
foreach ( self::provideExpandCases() as [ $options, $protoDesc, $defaultProto ] ) {
|
||||
$case = "default: $protoDesc";
|
||||
foreach ( $options as $key => $val ) {
|
||||
$case .= ", $key: $val";
|
||||
}
|
||||
|
||||
yield "No-op fully-qualified http URL ($case)" => [
|
||||
'http://example.com',
|
||||
$options, $defaultProto,
|
||||
'http://example.com',
|
||||
];
|
||||
yield "No-op fully-qualified https URL ($case)" => [
|
||||
'https://example.com',
|
||||
$options, $defaultProto,
|
||||
'https://example.com',
|
||||
];
|
||||
yield "No-op fully-qualified https URL with port ($case)" => [
|
||||
'https://example.com:222',
|
||||
$options, $defaultProto,
|
||||
'https://example.com:222',
|
||||
];
|
||||
yield "No-op fully-qualified https URL with standard port ($case)" => [
|
||||
'https://example.com:443',
|
||||
$options, $defaultProto,
|
||||
'https://example.com:443',
|
||||
];
|
||||
yield "No-op rootless path-only URL ($case)" => [
|
||||
"wiki/FooBar",
|
||||
$options, $defaultProto,
|
||||
'wiki/FooBar',
|
||||
];
|
||||
|
||||
// Determine expected protocol
|
||||
switch ( $protoDesc ) {
|
||||
case 'protocol-relative':
|
||||
$p = '';
|
||||
break;
|
||||
|
||||
case 'fallback':
|
||||
case 'current':
|
||||
$p = $options[UrlUtils::FALLBACK_PROTOCOL] . ':';
|
||||
break;
|
||||
|
||||
case 'canonical':
|
||||
$p = strtok( $options[UrlUtils::CANONICAL_SERVER], ':' ) . ':';
|
||||
break;
|
||||
|
||||
case 'internal':
|
||||
$p = strtok( $options[UrlUtils::INTERNAL_SERVER], ':' ) . ':';
|
||||
break;
|
||||
|
||||
case 'http':
|
||||
case 'https':
|
||||
$p = "$protoDesc:";
|
||||
}
|
||||
yield "Expand protocol-relative URL ($case)" => [
|
||||
'//wikipedia.org',
|
||||
$options, $defaultProto,
|
||||
"$p//wikipedia.org",
|
||||
];
|
||||
|
||||
// Determine expected server name
|
||||
if ( $protoDesc === 'canonical' ) {
|
||||
$srv = $options[UrlUtils::CANONICAL_SERVER];
|
||||
} elseif ( $protoDesc === 'internal' ) {
|
||||
$srv = $options[UrlUtils::INTERNAL_SERVER];
|
||||
} elseif ( substr( $options[UrlUtils::SERVER], 0, 2 ) === '//' ) {
|
||||
$srv = $p . $options[UrlUtils::SERVER];
|
||||
} else {
|
||||
$srv = $options[UrlUtils::SERVER];
|
||||
}
|
||||
|
||||
// Add a port only if it's not the default port, the server
|
||||
// protocol is relative, and we're actually trying to produce an
|
||||
// HTTPS URL
|
||||
if ( $options[UrlUtils::HTTPS_PORT] !== 443 &&
|
||||
substr( $options[UrlUtils::SERVER], 0, 2 ) === '//' &&
|
||||
( $defaultProto === PROTO_HTTPS || ( $defaultProto === PROTO_FALLBACK &&
|
||||
$options[UrlUtils::FALLBACK_PROTOCOL] === 'https' ) )
|
||||
) {
|
||||
$srv .= ':' . $options[UrlUtils::HTTPS_PORT];
|
||||
}
|
||||
|
||||
yield "Expand path that starts with slash ($case)" => [
|
||||
'/wiki/FooBar',
|
||||
$options, $defaultProto,
|
||||
"$srv/wiki/FooBar",
|
||||
];
|
||||
}
|
||||
|
||||
// Silly corner cases
|
||||
yield 'CanonicalServer with no protocol' => [
|
||||
'/',
|
||||
[
|
||||
UrlUtils::CANONICAL_SERVER => '//dont.do.this',
|
||||
UrlUtils::FALLBACK_PROTOCOL => 'https',
|
||||
],
|
||||
PROTO_CANONICAL,
|
||||
'http://dont.do.this/',
|
||||
];
|
||||
yield 'InternalServer with no protocol' => [
|
||||
'/',
|
||||
[
|
||||
UrlUtils::INTERNAL_SERVER => '//dont.do.this',
|
||||
UrlUtils::FALLBACK_PROTOCOL => 'https',
|
||||
],
|
||||
PROTO_INTERNAL,
|
||||
'http://dont.do.this/',
|
||||
];
|
||||
|
||||
// XXX Find something that this will actually return null for
|
||||
|
||||
// XXX Add dot-removing tests
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::getServer
|
||||
* @dataProvider provideGetServer
|
||||
* @param array $options
|
||||
* @param string|int|null $defaultProto
|
||||
* @param string $expected
|
||||
*/
|
||||
public function testGetServer( array $options, $defaultProto, string $expected ): void {
|
||||
$urlUtils = new UrlUtils( $options );
|
||||
$this->assertSame( $expected, $urlUtils->getServer( $defaultProto ) );
|
||||
}
|
||||
|
||||
public static function provideGetServer(): Generator {
|
||||
foreach ( self::provideExpand() as $desc => [ $input, $options, $defaultProto, $expected ]
|
||||
) {
|
||||
if ( $input !== '/wiki/FooBar' ) {
|
||||
continue;
|
||||
}
|
||||
$desc = str_replace( 'Expand path that starts with slash (', '', $desc );
|
||||
$desc = str_replace( ')', '', $desc );
|
||||
$expected = str_replace( '/wiki/FooBar', '', $expected );
|
||||
yield $desc => [ $options, $defaultProto, $expected ];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::assemble
|
||||
* @dataProvider provideAssemble
|
||||
* @param array $bits
|
||||
* @param string $expected
|
||||
*/
|
||||
public function testAssemble( array $bits, string $expected ): void {
|
||||
$urlUtils = new UrlUtils( [ UrlUtils::VALID_PROTOCOLS => [
|
||||
'//',
|
||||
'http://',
|
||||
'https://',
|
||||
'file://',
|
||||
'mailto:',
|
||||
] ] );
|
||||
$this->assertSame( $expected, $urlUtils->assemble( $bits ) );
|
||||
}
|
||||
|
||||
public static function provideAssemble(): Generator {
|
||||
$schemes = [
|
||||
'' => [],
|
||||
'//' => [
|
||||
'delimiter' => '//',
|
||||
],
|
||||
'http://' => [
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
],
|
||||
];
|
||||
|
||||
$hosts = [
|
||||
'' => [],
|
||||
'example.com' => [
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'example.com:123' => [
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
'id@example.com' => [
|
||||
'user' => 'id',
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'id@example.com:123' => [
|
||||
'user' => 'id',
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
'id:key@example.com' => [
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.com',
|
||||
],
|
||||
'id:key@example.com:123' => [
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.com',
|
||||
'port' => 123,
|
||||
],
|
||||
];
|
||||
|
||||
foreach ( $schemes as $scheme => $schemeParts ) {
|
||||
foreach ( $hosts as $host => $hostParts ) {
|
||||
foreach ( [ '', '/', '/0', '/path' ] as $path ) {
|
||||
foreach ( [ '', '0', 'query' ] as $query ) {
|
||||
foreach ( [ '', '0', 'fragment' ] as $fragment ) {
|
||||
$parts = array_merge(
|
||||
$schemeParts,
|
||||
$hostParts
|
||||
);
|
||||
$url = $scheme .
|
||||
$host .
|
||||
$path;
|
||||
|
||||
if ( $path !== '' ) {
|
||||
$parts['path'] = $path;
|
||||
}
|
||||
if ( $query !== '' ) {
|
||||
$parts['query'] = $query;
|
||||
$url .= '?' . $query;
|
||||
}
|
||||
if ( $fragment !== '' ) {
|
||||
$parts['fragment'] = $fragment;
|
||||
$url .= '#' . $fragment;
|
||||
}
|
||||
|
||||
yield [ $parts, $url ];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
yield [
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.org',
|
||||
'port' => 321,
|
||||
'path' => '/over/there',
|
||||
'query' => 'name=ferret&foo=bar',
|
||||
'fragment' => 'nose',
|
||||
],
|
||||
'http://id:key@example.org:321/over/there?name=ferret&foo=bar#nose',
|
||||
];
|
||||
|
||||
// Account for parse_url() on PHP >= 8 returning an empty query field for URLs ending with
|
||||
// '?' such as "http://url.with.empty.query/foo?" (T268852)
|
||||
yield [
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'host' => 'url.with.empty.query',
|
||||
'path' => '/foo',
|
||||
'query' => '',
|
||||
],
|
||||
'http://url.with.empty.query/foo',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::removeDotSegments
|
||||
* @dataProvider provideRemoveDotSegments
|
||||
* @param string $input
|
||||
* @param string $expected
|
||||
*/
|
||||
public function testRemoveDotSegments( string $input, string $expected ): void {
|
||||
$this->assertSame( $expected, ( new UrlUtils )->removeDotSegments( $input ) );
|
||||
}
|
||||
|
||||
public static function provideRemoveDotSegments(): Generator {
|
||||
yield [ '/a/b/c/./../../g', '/a/g' ];
|
||||
yield [ 'mid/content=5/../6', 'mid/6' ];
|
||||
yield [ '/a//../b', '/a/b' ];
|
||||
yield [ '/.../a', '/.../a' ];
|
||||
yield [ '.../a', '.../a' ];
|
||||
yield [ '', '' ];
|
||||
yield [ '/', '/' ];
|
||||
yield [ '//', '//' ];
|
||||
yield [ '.', '' ];
|
||||
yield [ '..', '' ];
|
||||
yield [ '...', '...' ];
|
||||
yield [ '/.', '/' ];
|
||||
yield [ '/..', '/' ];
|
||||
yield [ './', '' ];
|
||||
yield [ '../', '' ];
|
||||
yield [ './a', 'a' ];
|
||||
yield [ '../a', 'a' ];
|
||||
yield [ '../../a', 'a' ];
|
||||
yield [ '.././a', 'a' ];
|
||||
yield [ './../a', 'a' ];
|
||||
yield [ '././a', 'a' ];
|
||||
yield [ '../../', '' ];
|
||||
yield [ '.././', '' ];
|
||||
yield [ './../', '' ];
|
||||
yield [ '././', '' ];
|
||||
yield [ '../..', '' ];
|
||||
yield [ '../.', '' ];
|
||||
yield [ './..', '' ];
|
||||
yield [ './.', '' ];
|
||||
yield [ '/../../a', '/a' ];
|
||||
yield [ '/.././a', '/a' ];
|
||||
yield [ '/./../a', '/a' ];
|
||||
yield [ '/././a', '/a' ];
|
||||
yield [ '/../../', '/' ];
|
||||
yield [ '/.././', '/' ];
|
||||
yield [ '/./../', '/' ];
|
||||
yield [ '/././', '/' ];
|
||||
yield [ '/../..', '/' ];
|
||||
yield [ '/../.', '/' ];
|
||||
yield [ '/./..', '/' ];
|
||||
yield [ '/./.', '/' ];
|
||||
yield [ 'b/../../a', '/a' ];
|
||||
yield [ 'b/.././a', '/a' ];
|
||||
yield [ 'b/./../a', '/a' ];
|
||||
yield [ 'b/././a', 'b/a' ];
|
||||
yield [ 'b/../../', '/' ];
|
||||
yield [ 'b/.././', '/' ];
|
||||
yield [ 'b/./../', '/' ];
|
||||
yield [ 'b/././', 'b/' ];
|
||||
yield [ 'b/../..', '/' ];
|
||||
yield [ 'b/../.', '/' ];
|
||||
yield [ 'b/./..', '/' ];
|
||||
yield [ 'b/./.', 'b/' ];
|
||||
yield [ '/b/../../a', '/a' ];
|
||||
yield [ '/b/.././a', '/a' ];
|
||||
yield [ '/b/./../a', '/a' ];
|
||||
yield [ '/b/././a', '/b/a' ];
|
||||
yield [ '/b/../../', '/' ];
|
||||
yield [ '/b/.././', '/' ];
|
||||
yield [ '/b/./../', '/' ];
|
||||
yield [ '/b/././', '/b/' ];
|
||||
yield [ '/b/../..', '/' ];
|
||||
yield [ '/b/../.', '/' ];
|
||||
yield [ '/b/./..', '/' ];
|
||||
yield [ '/b/./.', '/b/' ];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::validProtocols
|
||||
* @covers ::validAbsoluteProtocols
|
||||
* @covers ::validProtocolsInternal
|
||||
* @dataProvider provideValidProtocols
|
||||
* @param string $method 'validProtocols' or 'validAbsoluteProtocols'
|
||||
* @param array|string $validProtocols Value of option passed to UrlUtils
|
||||
* @param string $expected
|
||||
*/
|
||||
public function testValidProtocols( string $method, $validProtocols, string $expected ): void {
|
||||
if ( !is_array( $validProtocols ) ) {
|
||||
$this->expectDeprecationAndContinue(
|
||||
'/Use of \$wgUrlProtocols that is not an array was deprecated in MediaWiki 1\.39/' );
|
||||
}
|
||||
$urlUtils = new UrlUtils( [ UrlUtils::VALID_PROTOCOLS => $validProtocols ] );
|
||||
$this->assertSame( $expected, $urlUtils->$method() );
|
||||
}
|
||||
|
||||
public static function provideValidProtocols(): Generator {
|
||||
foreach ( [ 'validProtocols', 'validAbsoluteProtocols' ] as $method ) {
|
||||
yield "$method with string for UrlProtocols" =>
|
||||
[ $method, 'some|string', 'some|string' ];
|
||||
yield "$method simple case" => [ $method, [ 'foo', 'bar' ], 'foo|bar' ];
|
||||
yield "$method reserved characters" => [ $method,
|
||||
[ '^si|lly', 'in/valid', 'p[ro*t?o.c+o(ls$' ],
|
||||
'\^si\|lly|in\/valid|p\[ro\*t\?o\.c\+o\(ls\$' ];
|
||||
}
|
||||
yield 'validProtocols with relative' =>
|
||||
[ 'validProtocols', [ 'a', '//', 'b' ], 'a|\/\/|b' ];
|
||||
yield 'validAbsoluteProtocols with relative' =>
|
||||
[ 'validAbsoluteProtocols', [ 'a', '//', 'b' ], 'a|b' ];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::parse
|
||||
* @dataProvider provideParse
|
||||
* @param string $url
|
||||
* @param ?array $expected
|
||||
*/
|
||||
public function testParse( string $url, ?array $expected ): void {
|
||||
$urlUtils = new UrlUtils( [ UrlUtils::VALID_PROTOCOLS => [
|
||||
'//',
|
||||
'http://',
|
||||
'https://',
|
||||
'file://',
|
||||
'mailto:',
|
||||
] ] );
|
||||
$actual = $urlUtils->parse( $url );
|
||||
if ( $expected ) {
|
||||
ksort( $expected );
|
||||
}
|
||||
if ( $actual ) {
|
||||
ksort( $actual );
|
||||
}
|
||||
$this->assertSame( $expected, $actual );
|
||||
}
|
||||
|
||||
public static function provideParse(): Generator {
|
||||
yield [
|
||||
'//example.org',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'http://example.org',
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'https://example.org',
|
||||
[
|
||||
'scheme' => 'https',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'http://id:key@example.org:123/path?foo=bar#baz',
|
||||
[
|
||||
'scheme' => 'http',
|
||||
'delimiter' => '://',
|
||||
'user' => 'id',
|
||||
'pass' => 'key',
|
||||
'host' => 'example.org',
|
||||
'port' => 123,
|
||||
'path' => '/path',
|
||||
'query' => 'foo=bar',
|
||||
'fragment' => 'baz',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'file://example.org/etc/php.ini',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => 'example.org',
|
||||
'path' => '/etc/php.ini',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'file:///etc/php.ini',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => '',
|
||||
'path' => '/etc/php.ini',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'file:///c:/',
|
||||
[
|
||||
'scheme' => 'file',
|
||||
'delimiter' => '://',
|
||||
'host' => '',
|
||||
'path' => '/c:/',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'mailto:id@example.org',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => 'id@example.org',
|
||||
'path' => '',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'mailto:id@example.org?subject=Foo',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => 'id@example.org',
|
||||
'path' => '',
|
||||
'query' => 'subject=Foo',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'mailto:?subject=Foo',
|
||||
[
|
||||
'scheme' => 'mailto',
|
||||
'delimiter' => ':',
|
||||
'host' => '',
|
||||
'path' => '',
|
||||
'query' => 'subject=Foo',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'invalid://test/',
|
||||
null
|
||||
];
|
||||
// T212067
|
||||
yield [
|
||||
'//evil.com?example.org/foo/bar',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org/foo/bar',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'//evil.com?example.org/foo/bar?baz#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org/foo/bar?baz',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'//evil.com?example.org?baz#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org?baz',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'//evil.com?example.org#quux',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'evil.com',
|
||||
'query' => 'example.org',
|
||||
'fragment' => 'quux',
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'%0Ahttp://example.com',
|
||||
null,
|
||||
];
|
||||
yield [
|
||||
'http:///test.com',
|
||||
null,
|
||||
];
|
||||
// T294559
|
||||
yield [
|
||||
'//xy.wikimedia.org/wiki/Foo:1234',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'xy.wikimedia.org',
|
||||
'path' => '/wiki/Foo:1234'
|
||||
]
|
||||
];
|
||||
yield [
|
||||
'//xy.wikimedia.org:8888/wiki/Foo:1234',
|
||||
[
|
||||
'scheme' => '',
|
||||
'delimiter' => '//',
|
||||
'host' => 'xy.wikimedia.org',
|
||||
'path' => '/wiki/Foo:1234',
|
||||
'port' => 8888,
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::expandIRI
|
||||
*/
|
||||
public function testExpandIRI(): void {
|
||||
$this->assertSame( "https://te.wikibooks.org/wiki/ఉబుంటు_వాడుకరి_మార్గదర్శని",
|
||||
( new UrlUtils )->expandIRI( "https://te.wikibooks.org/wiki/"
|
||||
. "%E0%B0%89%E0%B0%AC%E0%B1%81%E0%B0%82%E0%B0%9F%E0%B1%81_"
|
||||
. "%E0%B0%B5%E0%B0%BE%E0%B0%A1%E0%B1%81%E0%B0%95%E0%B0%B0%E0%B0%BF_"
|
||||
. "%E0%B0%AE%E0%B0%BE%E0%B0%B0%E0%B1%8D%E0%B0%97%E0%B0%A6%E0%B0%B0"
|
||||
. "%E0%B1%8D%E0%B0%B6%E0%B0%A8%E0%B0%BF" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::matchesDomainList
|
||||
* @dataProvider provideMatchesDomainList
|
||||
* @param string $url
|
||||
* @param array $domains
|
||||
* @param bool $expected
|
||||
*/
|
||||
public function testMatchesDomainList( string $url, array $domains, bool $expected ): void {
|
||||
$this->assertSame( $expected, ( new UrlUtils )->matchesDomainList( $url, $domains ) );
|
||||
}
|
||||
|
||||
public static function provideMatchesDomainList(): Generator {
|
||||
$protocols = [ 'HTTP' => 'http:', 'HTTPS' => 'https:', 'protocol-relative' => '' ];
|
||||
foreach ( $protocols as $pDesc => $p ) {
|
||||
yield "No matches for empty domains array, $pDesc URL" => [
|
||||
"$p//www.example.com",
|
||||
[],
|
||||
false,
|
||||
];
|
||||
yield "Exact match in domains array, $pDesc URL" => [
|
||||
"$p//www.example.com",
|
||||
[ 'www.example.com' ],
|
||||
true,
|
||||
];
|
||||
yield "Match without subdomain in domains array, $pDesc URL" => [
|
||||
"$p//www.example.com",
|
||||
[ 'example.com' ],
|
||||
true,
|
||||
];
|
||||
yield "Exact match with other domains in array, $pDesc URL" => [
|
||||
"$p//www.example2.com",
|
||||
[ 'www.example.com', 'www.example2.com', 'www.example3.com' ],
|
||||
true,
|
||||
];
|
||||
yield "Match without subdomain with other domains in array, $pDesc URL" => [
|
||||
"$p//www.example2.com",
|
||||
[ 'example.com', 'example2.com', 'example3,com' ],
|
||||
true,
|
||||
];
|
||||
yield "Domain not in array, $pDesc URL" => [
|
||||
"$p//www.example4.com",
|
||||
[ 'example.com', 'example2.com', 'example3,com' ],
|
||||
false,
|
||||
];
|
||||
yield "Non-matching substring of domain, $pDesc URL" => [
|
||||
"$p//nds-nl.wikipedia.org",
|
||||
[ 'nl.wikipedia.org' ],
|
||||
false,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in a new issue