425 lines
12 KiB
PHP
425 lines
12 KiB
PHP
<?php
|
|
/**
|
|
* Backend for uploading files from a HTTP resource.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Upload
|
|
*/
|
|
|
|
use MediaWiki\Context\RequestContext;
|
|
use MediaWiki\HookContainer\HookRunner;
|
|
use MediaWiki\MainConfigNames;
|
|
use MediaWiki\MediaWikiServices;
|
|
use MediaWiki\Permissions\Authority;
|
|
use MediaWiki\Request\WebRequest;
|
|
use MediaWiki\Status\Status;
|
|
|
|
/**
|
|
* Implements uploading from a HTTP resource.
|
|
*
|
|
* @ingroup Upload
|
|
* @author Bryan Tong Minh
|
|
* @author Michael Dale
|
|
*/
|
|
class UploadFromUrl extends UploadBase {
|
|
/** @var string */
|
|
protected $mUrl;
|
|
|
|
/** @var resource|null|false */
|
|
protected $mTmpHandle;
|
|
|
|
/** @var array<string,bool> */
|
|
protected static $allowedUrls = [];
|
|
|
|
/**
|
|
* Checks if the user is allowed to use the upload-by-URL feature. If the
|
|
* user is not allowed, return the name of the user right as a string. If
|
|
* the user is allowed, have the parent do further permissions checking.
|
|
*
|
|
* @param Authority $performer
|
|
*
|
|
* @return bool|string
|
|
*/
|
|
public static function isAllowed( Authority $performer ) {
|
|
if ( !$performer->isAllowed( 'upload_by_url' ) ) {
|
|
return 'upload_by_url';
|
|
}
|
|
|
|
return parent::isAllowed( $performer );
|
|
}
|
|
|
|
/**
|
|
* Checks if the upload from URL feature is enabled
|
|
* @return bool
|
|
*/
|
|
public static function isEnabled() {
|
|
$allowCopyUploads = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::AllowCopyUploads );
|
|
|
|
return $allowCopyUploads && parent::isEnabled();
|
|
}
|
|
|
|
/**
|
|
* Checks whether the URL is for an allowed host
|
|
* The domains in the allowlist can include wildcard characters (*) in place
|
|
* of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'.
|
|
*
|
|
* @param string $url
|
|
* @return bool
|
|
*/
|
|
public static function isAllowedHost( $url ) {
|
|
$domains = self::getAllowedHosts();
|
|
if ( !count( $domains ) ) {
|
|
return true;
|
|
}
|
|
$parsedUrl = wfGetUrlUtils()->parse( $url );
|
|
if ( !$parsedUrl ) {
|
|
return false;
|
|
}
|
|
$valid = false;
|
|
foreach ( $domains as $domain ) {
|
|
// See if the domain for the upload matches this allowed domain
|
|
$domainPieces = explode( '.', $domain );
|
|
$uploadDomainPieces = explode( '.', $parsedUrl['host'] );
|
|
if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) {
|
|
$valid = true;
|
|
// See if all the pieces match or not (excluding wildcards)
|
|
foreach ( $domainPieces as $index => $piece ) {
|
|
if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) {
|
|
$valid = false;
|
|
}
|
|
}
|
|
if ( $valid ) {
|
|
// We found a match, so quit comparing against the list
|
|
break;
|
|
}
|
|
}
|
|
/* Non-wildcard test
|
|
if ( $parsedUrl['host'] === $domain ) {
|
|
$valid = true;
|
|
break;
|
|
}
|
|
*/
|
|
}
|
|
|
|
return $valid;
|
|
}
|
|
|
|
/**
|
|
* Provides a caching key for an upload from url set of parameters
|
|
* Used to set the status of an async job in UploadFromUrlJob
|
|
* and retreive it in frontend clients like ApiUpload. Will return the
|
|
* empty string if not all parameters are present.
|
|
*
|
|
* @param array $params
|
|
* @return string
|
|
*/
|
|
public static function getCacheKey( $params ) {
|
|
if ( !isset( $params['filename'] ) || !isset( $params['url'] ) ) {
|
|
return "";
|
|
} else {
|
|
// We use sha1 here to ensure we have a fixed-length string of printable
|
|
// characters. There is no cryptography involved, so we just need a
|
|
// relatively fast function.
|
|
return sha1( sprintf( "%s|||%s", $params['filename'], $params['url'] ) );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the caching key from a web request
|
|
* @param WebRequest &$request
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function getCacheKeyFromRequest( &$request ) {
|
|
$uploadCacheKey = $request->getText( 'wpCacheKey', $request->getText( 'key', '' ) );
|
|
if ( $uploadCacheKey !== '' ) {
|
|
return $uploadCacheKey;
|
|
}
|
|
$desiredDestName = $request->getText( 'wpDestFile' );
|
|
if ( !$desiredDestName ) {
|
|
$desiredDestName = $request->getText( 'wpUploadFileURL' );
|
|
}
|
|
return self::getCacheKey(
|
|
[
|
|
'filename' => $desiredDestName,
|
|
'url' => trim( $request->getVal( 'wpUploadFileURL' ) )
|
|
]
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @return string[]
|
|
*/
|
|
private static function getAllowedHosts(): array {
|
|
$config = MediaWikiServices::getInstance()->getMainConfig();
|
|
$domains = $config->get( MainConfigNames::CopyUploadsDomains );
|
|
|
|
if ( $config->get( MainConfigNames::CopyUploadAllowOnWikiDomainConfig ) ) {
|
|
$page = wfMessage( 'copyupload-allowed-domains' )->inContentLanguage()->plain();
|
|
|
|
foreach ( explode( "\n", $page ) as $line ) {
|
|
// Strip comments
|
|
$line = preg_replace( "/^\\s*([^#]*)\\s*((.*)?)$/", "\\1", $line );
|
|
// Trim whitespace
|
|
$line = trim( $line );
|
|
|
|
if ( $line !== '' ) {
|
|
$domains[] = $line;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $domains;
|
|
}
|
|
|
|
/**
|
|
* Checks whether the URL is not allowed.
|
|
*
|
|
* @param string $url
|
|
* @return bool
|
|
*/
|
|
public static function isAllowedUrl( $url ) {
|
|
if ( !isset( self::$allowedUrls[$url] ) ) {
|
|
$allowed = true;
|
|
( new HookRunner( MediaWikiServices::getInstance()->getHookContainer() ) )
|
|
->onIsUploadAllowedFromUrl( $url, $allowed );
|
|
self::$allowedUrls[$url] = $allowed;
|
|
}
|
|
|
|
return self::$allowedUrls[$url];
|
|
}
|
|
|
|
/**
|
|
* Get the URL of the file to be uploaded
|
|
* @return string
|
|
*/
|
|
public function getUrl() {
|
|
return $this->mUrl;
|
|
}
|
|
|
|
/**
|
|
* Entry point for API upload
|
|
*
|
|
* @param string $name
|
|
* @param string $url
|
|
*/
|
|
public function initialize( $name, $url ) {
|
|
$this->mUrl = $url;
|
|
|
|
$tempPath = $this->makeTemporaryFile();
|
|
# File size and removeTempFile will be filled in later
|
|
$this->initializePathInfo( $name, $tempPath, 0, false );
|
|
}
|
|
|
|
/**
|
|
* Entry point for SpecialUpload
|
|
* @param WebRequest &$request
|
|
*/
|
|
public function initializeFromRequest( &$request ) {
|
|
$desiredDestName = $request->getText( 'wpDestFile' );
|
|
if ( !$desiredDestName ) {
|
|
$desiredDestName = $request->getText( 'wpUploadFileURL' );
|
|
}
|
|
$this->initialize(
|
|
$desiredDestName,
|
|
trim( $request->getVal( 'wpUploadFileURL' ) )
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param WebRequest $request
|
|
* @return bool
|
|
*/
|
|
public static function isValidRequest( $request ) {
|
|
$user = RequestContext::getMain()->getUser();
|
|
|
|
$url = $request->getVal( 'wpUploadFileURL' );
|
|
|
|
return $url
|
|
&& MediaWikiServices::getInstance()
|
|
->getPermissionManager()
|
|
->userHasRight( $user, 'upload_by_url' );
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public function getSourceType() {
|
|
return 'url';
|
|
}
|
|
|
|
/**
|
|
* Download the file
|
|
*
|
|
* @param array $httpOptions Array of options for MWHttpRequest.
|
|
* This could be used to override the timeout on the http request.
|
|
* @return Status
|
|
*/
|
|
public function fetchFile( $httpOptions = [] ) {
|
|
$status = $this->canFetchFile();
|
|
if ( !$status->isGood() ) {
|
|
return $status;
|
|
}
|
|
return $this->reallyFetchFile( $httpOptions );
|
|
}
|
|
|
|
/**
|
|
* verify we can actually download the file
|
|
*
|
|
* @return Status
|
|
*/
|
|
public function canFetchFile() {
|
|
if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) {
|
|
return Status::newFatal( 'http-invalid-url', $this->mUrl );
|
|
}
|
|
|
|
if ( !self::isAllowedHost( $this->mUrl ) ) {
|
|
return Status::newFatal( 'upload-copy-upload-invalid-domain' );
|
|
}
|
|
if ( !self::isAllowedUrl( $this->mUrl ) ) {
|
|
return Status::newFatal( 'upload-copy-upload-invalid-url' );
|
|
}
|
|
return Status::newGood();
|
|
}
|
|
|
|
/**
|
|
* Create a new temporary file in the URL subdirectory of wfTempDir().
|
|
*
|
|
* @return string Path to the file
|
|
*/
|
|
protected function makeTemporaryFile() {
|
|
$tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory()
|
|
->newTempFSFile( 'URL', 'urlupload_' );
|
|
$tmpFile->bind( $this );
|
|
|
|
return $tmpFile->getPath();
|
|
}
|
|
|
|
/**
|
|
* Callback: save a chunk of the result of a HTTP request to the temporary file
|
|
*
|
|
* @param mixed $req
|
|
* @param string $buffer
|
|
* @return int Number of bytes handled
|
|
*/
|
|
public function saveTempFileChunk( $req, $buffer ) {
|
|
wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' );
|
|
$nbytes = fwrite( $this->mTmpHandle, $buffer );
|
|
|
|
if ( $nbytes == strlen( $buffer ) ) {
|
|
$this->mFileSize += $nbytes;
|
|
} else {
|
|
// Well... that's not good!
|
|
wfDebugLog(
|
|
'fileupload',
|
|
'Short write ' . $nbytes . '/' . strlen( $buffer ) .
|
|
' bytes, aborting with ' . $this->mFileSize . ' uploaded so far'
|
|
);
|
|
fclose( $this->mTmpHandle );
|
|
$this->mTmpHandle = false;
|
|
}
|
|
|
|
return $nbytes;
|
|
}
|
|
|
|
/**
|
|
* Download the file, save it to the temporary file and update the file
|
|
* size and set $mRemoveTempFile to true.
|
|
*
|
|
* @param array $httpOptions Array of options for MWHttpRequest
|
|
* @return Status
|
|
*/
|
|
protected function reallyFetchFile( $httpOptions = [] ) {
|
|
$copyUploadProxy = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CopyUploadProxy );
|
|
$copyUploadTimeout = MediaWikiServices::getInstance()->getMainConfig()
|
|
->get( MainConfigNames::CopyUploadTimeout );
|
|
|
|
// Note the temporary file should already be created by makeTemporaryFile()
|
|
$this->mTmpHandle = fopen( $this->mTempPath, 'wb' );
|
|
if ( !$this->mTmpHandle ) {
|
|
return Status::newFatal( 'tmp-create-error' );
|
|
}
|
|
wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' );
|
|
|
|
$this->mRemoveTempFile = true;
|
|
$this->mFileSize = 0;
|
|
|
|
$options = $httpOptions + [ 'followRedirects' => false ];
|
|
|
|
if ( $copyUploadProxy !== false ) {
|
|
$options['proxy'] = $copyUploadProxy;
|
|
}
|
|
|
|
if ( $copyUploadTimeout && !isset( $options['timeout'] ) ) {
|
|
$options['timeout'] = $copyUploadTimeout;
|
|
}
|
|
wfDebugLog(
|
|
'fileupload',
|
|
'Starting download from "' . $this->mUrl . '" ' .
|
|
'<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>'
|
|
);
|
|
|
|
// Manually follow any redirects up to the limit and reset the output file before each new request to prevent
|
|
// capturing the redirect response as part of the file.
|
|
$attemptsLeft = $options['maxRedirects'] ?? 5;
|
|
$targetUrl = $this->mUrl;
|
|
$requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory();
|
|
while ( $attemptsLeft > 0 ) {
|
|
$req = $requestFactory->create( $targetUrl, $options, __METHOD__ );
|
|
$req->setCallback( [ $this, 'saveTempFileChunk' ] );
|
|
$status = $req->execute();
|
|
if ( !$req->isRedirect() ) {
|
|
break;
|
|
}
|
|
$targetUrl = $req->getFinalUrl();
|
|
// Remove redirect response content from file.
|
|
ftruncate( $this->mTmpHandle, 0 );
|
|
rewind( $this->mTmpHandle );
|
|
$attemptsLeft--;
|
|
}
|
|
|
|
if ( $attemptsLeft == 0 ) {
|
|
return Status::newFatal( 'upload-too-many-redirects' );
|
|
}
|
|
|
|
if ( $this->mTmpHandle ) {
|
|
// File got written ok...
|
|
fclose( $this->mTmpHandle );
|
|
$this->mTmpHandle = null;
|
|
} else {
|
|
// We encountered a write error during the download...
|
|
return Status::newFatal( 'tmp-write-error' );
|
|
}
|
|
|
|
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop
|
|
if ( $status->isOK() ) {
|
|
wfDebugLog( 'fileupload', 'Download by URL completed successfully.' );
|
|
} else {
|
|
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop
|
|
wfDebugLog( 'fileupload', $status->getWikiText( false, false, 'en' ) );
|
|
wfDebugLog(
|
|
'fileupload',
|
|
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop
|
|
'Download by URL completed with HTTP status ' . $req->getStatus()
|
|
);
|
|
}
|
|
|
|
// @phan-suppress-next-line PhanTypeMismatchReturnNullable,PhanPossiblyUndeclaredVariable Always set after loop
|
|
return $status;
|
|
}
|
|
}
|