Avoid HTTP protocol errors when fastcgi_finish_request() is unavailable

This fixes problem that arise with apache2/mod_php due to deferred updates

* Do not send unnecessary and invalid "Content-Encoding: identity" header
* Do not send "Connection: close" if HTTP2 is detected (per the HTTP spec)
  pending, which reduces the use of output buffer and HTTP header tricks
* Make sure that no output is emitted in doPostOutputShutdown() from any
  deferred updates since the response will have already been flushed to
  the client by that point
* Make the Content-Length header logic in outputResponsePayload() account
  for cases where there is a non-empty output buffer, cases where there
  are several output buffers (bail out), and limit the use of the header
  to HTTP 200/404 responses (avoids violation of the HTTP spec)
* Make sure OutputHandler::handle() does not send payloads for responses
  that must not have one (e.g. "204 No Content")
* If an output buffer using OutputHandler::handle is active, then let it
  handle the setting of Content-Length rather than outputResponsePayload()
* Do not bother trying to disable zlib.output_compression, since that did
  not actually stop the client from getting blocked
* Set "no-gzip" via apache_setenv() unconditionally

Bug: T235554
Change-Id: I26f16457698c2c45e561b0c79c78a74e7f47126c
This commit is contained in:
Aaron Schulz 2021-03-22 17:25:17 -07:00
parent 6ce397faf5
commit f4f0ad970e
2 changed files with 161 additions and 68 deletions

View file

@ -26,7 +26,6 @@ use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use MediaWiki\Permissions\PermissionStatus;
use Psr\Log\LoggerInterface;
use Wikimedia\AtEase;
use Wikimedia\Rdbms\ChronologyProtector;
use Wikimedia\Rdbms\DBConnectionError;
@ -46,10 +45,12 @@ class MediaWiki {
/** @var int Class DEFER_* constant; how non-blocking post-response tasks should run */
private $postSendStrategy;
/** @var int Use fastcgi_finish_request() */
/** Call fastcgi_finish_request() to make post-send updates async */
private const DEFER_FASTCGI_FINISH_REQUEST = 1;
/** @var int Use ob_end_flush() after explicitly setting the Content-Length */
/** Set Content-Length and call ob_end_flush()/flush() to make post-send updates async */
private const DEFER_SET_LENGTH_AND_FLUSH = 2;
/** Do not try to make post-send updates async (e.g. for CLI mode) */
private const DEFER_CLI_MODE = 3;
/**
* @param IContextSource|null $context
@ -57,7 +58,10 @@ class MediaWiki {
public function __construct( IContextSource $context = null ) {
$this->context = $context ?: RequestContext::getMain();
$this->config = $this->context->getConfig();
if ( function_exists( 'fastcgi_finish_request' ) ) {
if ( $this->config->get( 'CommandLineMode' ) ) {
$this->postSendStrategy = self::DEFER_CLI_MODE;
} elseif ( function_exists( 'fastcgi_finish_request' ) ) {
$this->postSendStrategy = self::DEFER_FASTCGI_FINISH_REQUEST;
} else {
$this->postSendStrategy = self::DEFER_SET_LENGTH_AND_FLUSH;
@ -624,7 +628,11 @@ class MediaWiki {
$n = intval( $jobRunRate );
}
// Note that DeferredUpdates will catch and log an errors (T88312)
if ( wfReadOnly() ) {
return;
}
// Note that DeferredUpdates will catch and log any errors (T88312)
DeferredUpdates::addUpdate( new TransactionRoundDefiningUpdate( function () use ( $n ) {
$logger = LoggerFactory::getInstance( 'runJobs' );
if ( $this->config->get( 'RunJobsAsync' ) ) {
@ -825,37 +833,41 @@ class MediaWiki {
MWExceptionHandler::logException( $e, MWExceptionHandler::CAUGHT_BY_ENTRYPOINT );
}
// Disable WebResponse setters for post-send processing (T191537).
WebResponse::disableForPostSend();
// Defer everything else if possible...
$callback = function () {
try {
$this->restInPeace();
} catch ( Throwable $e ) {
// If this is post-send, then displaying errors can cause broken HTML
MWExceptionHandler::rollbackMasterChangesAndLog(
$e,
MWExceptionHandler::CAUGHT_BY_ENTRYPOINT
);
}
};
if ( $this->postSendStrategy === self::DEFER_FASTCGI_FINISH_REQUEST ) {
// Flush the output to the client, continue processing, and avoid further output
fastcgi_finish_request();
$callback();
} else {
// Flush PHP and web server output buffers
if ( !$this->config->get( 'CommandLineMode' ) ) {
AtEase\AtEase::suppressWarnings();
if ( ob_get_status() ) {
ob_end_flush();
}
flush();
AtEase\AtEase::restoreWarnings();
} elseif ( $this->postSendStrategy === self::DEFER_SET_LENGTH_AND_FLUSH ) {
// Flush the output to the client, continue processing, and avoid further output
if ( ob_get_level() ) {
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
@ob_end_flush();
}
$callback();
// Flush the web server output buffer to the client/proxy if possible
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
@flush();
}
// Since the headers and output where already flushed, disable WebResponse setters
// during post-send processing to warnings and unexpected behavior (T191537)
WebResponse::disableForPostSend();
// Run post-send updates while preventing further output for sanity...
ob_start( function () {
return ''; // do not output uncaught exceptions
} );
try {
$this->restInPeace();
} catch ( Throwable $e ) {
MWExceptionHandler::rollbackMasterChangesAndLog(
$e,
MWExceptionHandler::CAUGHT_BY_ENTRYPOINT
);
}
$length = ob_get_length();
if ( $length > 0 ) {
trigger_error( __METHOD__ . ": suppressed $length byte(s)", E_USER_NOTICE );
}
ob_end_clean();
}
/**
@ -1032,44 +1044,72 @@ class MediaWiki {
}
/**
* Set the actual output and attempt to flush it to the client if necessary
* Print a response body to the current buffer (if there is one) or the server (otherwise)
*
* No PHP buffers should be active at this point
* This method should be called after doPreOutputCommit() and before doPostOutputShutdown()
*
* @param string $content
* Any accompanying Content-Type header is assumed to have already been set
*
* @param string $content Response content, usually from OutputPage::output()
*/
private function outputResponsePayload( $content ) {
// By default, usually one output buffer is active now, either the internal PHP buffer
// started by "output_buffering" in php.ini or the buffer started by MW_SETUP_CALLBACK.
// The MW_SETUP_CALLBACK buffer has an unlimited chunk size, while the internal PHP
// buffer only has an unlimited chunk size if output_buffering="On". If the buffer was
// filled up to the chunk size with printed data, then HTTP headers will have already
// been sent. Also, if the entry point had to stream content to the client, then HTTP
// headers will have already been sent as well, regardless of chunk size.
// Disable mod_deflate compression since it interferes with the output buffer set
// by MW_SETUP_CALLBACK and can also cause the client to wait on deferred updates
if ( function_exists( 'apache_setenv' ) ) {
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
@apache_setenv( 'no-gzip', 1 );
}
if (
// "Content-Length" is used to prevent clients from waiting on deferred updates
$this->postSendStrategy === self::DEFER_SET_LENGTH_AND_FLUSH &&
// The HTTP response code clearly allows for a meaningful body
in_array( http_response_code(), [ 200, 404 ], true ) &&
// The queue of (post-send) deferred updates is non-empty
DeferredUpdates::pendingUpdatesCount() &&
// Any buffered output is not spread out accross multiple output buffers
ob_get_level() <= 1 &&
// It is not too late to set additional HTTP headers
!headers_sent()
) {
$response = $this->context->getRequest()->response();
// Make the browser indicate the page as "loaded" as soon as it gets all the content
$response->header( 'Connection: close' );
// The client should not be blocked on "post-send" updates. If apache or ob_* decide
// that a response should be gzipped, the entire script will have to finish before
// any data can be sent. Disable compression if there are any post-send updates.
$response->header( 'Content-Encoding: identity' );
AtEase\AtEase::suppressWarnings();
ini_set( 'zlib.output_compression', 0 );
if ( function_exists( 'apache_setenv' ) ) {
apache_setenv( 'no-gzip', '1' );
$obStatus = ob_get_status();
if ( !isset( $obStatus['name'] ) ) {
// No output buffer is active
$response->header( 'Content-Length: ' . strlen( $content ) );
} elseif ( $obStatus['name'] === 'default output handler' ) {
// Internal PHP "output_buffering" output buffer (note that the internal PHP
// "zlib.output_compression" output buffer is named "zlib output compression")
$response->header( 'Content-Length: ' . ( ob_get_length() + strlen( $content ) ) );
}
AtEase\AtEase::restoreWarnings();
// Also set the Content-Length so that apache does not block waiting on PHP to finish.
// If OutputPage is disabled, then either there is no body (e.g. HTTP 304) and thus no
// Content-Length, or it was taken care of already.
if ( !$this->context->getOutput()->isDisabled() ) {
ob_start();
print $content;
$response->header( 'Content-Length: ' . ob_get_length() );
ob_end_flush();
// The MW_SETUP_CALLBACK output buffer ("MediaWiki\OutputHandler::handle") sets
// "Content-Length" where applicable. Other output buffer types might not set this
// header, and since they might mangle or compress the payload, it is not possible
// to determine the final payload size here.
// Tell the client to immediately end the connection as soon as the response payload
// has been read (informed by any "Content-Length" header). This prevents the client
// from waiting on deferred updates.
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
if ( ( $_SERVER['SERVER_PROTOCOL'] ?? '' ) === 'HTTP/1.1' ) {
$response->header( 'Connection: close' );
}
// @TODO: this still blocks on HEAD responses and 304 responses to GETs
} else {
print $content;
}
// Print the content *after* adjusting HTTP headers and disabling mod_deflate since
// calling "print" will send the output to the client if there is no output buffer or
// if the output buffer chunk size is reached
print $content;
}
/**

View file

@ -29,22 +29,51 @@ class OutputHandler {
/**
* Standard output handler for use with ob_start.
*
* Output buffers using this method should only be started from MW_SETUP_CALLBACK,
* and only if there are no parent output buffers.
*
* @param string $s Web response output
* @return string
*/
public static function handle( $s ) {
global $wgDisableOutputCompression, $wgMangleFlashPolicy;
if ( $wgMangleFlashPolicy ) {
$s = self::mangleFlashPolicy( $s );
}
if ( !$wgDisableOutputCompression && !ini_get( 'zlib.output_compression' ) ) {
if ( !defined( 'MW_NO_OUTPUT_COMPRESSION' ) ) {
$s = self::handleGzip( $s );
}
if ( !ini_get( 'output_handler' ) ) {
self::emitContentLength( strlen( $s ) );
}
// Sanity check if a compression output buffer is already enabled via php.ini. Such
// buffers exists at the start of the request and are reflected by ob_get_level().
$phpHandlesCompression = (
ini_get( 'output_handler' ) === 'ob_gzhandler' ||
ini_get( 'zlib.output_handler' ) === 'ob_gzhandler' ||
!in_array(
strtolower( ini_get( 'zlib.output_compression' ) ),
[ '', 'off', '0' ]
)
);
if (
// Compression is not already handled by an internal PHP buffer
!$phpHandlesCompression &&
// Compression is not disabled by the application entry point
!defined( 'MW_NO_OUTPUT_COMPRESSION' ) &&
// Compression is not disabled by site configuration
$wgDisableOutputCompression
) {
$s = self::handleGzip( $s );
}
if (
// Response body length does not depend on internal PHP compression buffer
!$phpHandlesCompression &&
// Response body length does not depend on mangling by a custom buffer
!ini_get( 'output_handler' ) &&
!ini_get( 'zlib.output_handler' )
) {
self::emitContentLength( strlen( $s ) );
}
return $s;
}
@ -81,7 +110,9 @@ class OutputHandler {
/**
* Handler that compresses data with gzip if allowed by the Accept header.
*
* Unlike ob_gzhandler, it works for HEAD requests too.
* Unlike ob_gzhandler, it works for HEAD requests too. This assumes that the application
* processes them as normal GET request and that the webserver is tasked with stripping out
* the response body before sending the response the client.
*
* @param string $s Web response output
* @return string
@ -105,6 +136,13 @@ class OutputHandler {
return $s;
}
if ( $s === '' ) {
// Do not gzip empty HTTP responses since that would not only bloat the body
// length, but it would result in invalid HTTP responses when the HTTP status code
// is one that must not be accompanied by a body (e.g. "204 No Content").
return $s;
}
if ( wfClientAcceptsGzip() ) {
wfDebug( __METHOD__ . "() is compressing output" );
header( 'Content-Encoding: gzip' );
@ -143,14 +181,29 @@ class OutputHandler {
}
/**
* Add a Content-Length header if possible. This makes it cooperate with CDN better.
* Set the Content-Length header if possible
*
* This sets Content-Length for the following cases:
* - When the response body is meaningful (HTTP 200/404).
* - On any HTTP 1.0 request response. This improves cooperation with certain CDNs.
*
* This assumes that HEAD requests are processed as GET requests by MediaWiki and that
* the webserver is tasked with stripping out the body.
*
* Setting Content-Length can prevent clients from getting stuck waiting on PHP to finish
* while deferred updates are running.
*
* @param int $length
*/
private static function emitContentLength( $length ) {
if ( !headers_sent()
&& isset( $_SERVER['SERVER_PROTOCOL'] )
&& $_SERVER['SERVER_PROTOCOL'] == 'HTTP/1.0'
if ( headers_sent() ) {
wfDebug( __METHOD__ . "() headers already sent" );
return;
}
if (
in_array( http_response_code(), [ 200, 404 ], true ) ||
( $_SERVER['SERVER_PROTOCOL'] ?? null ) === 'HTTP/1.0'
) {
header( "Content-Length: $length" );
}