wiki.techinc.nl/includes/ResourceLoader/ResourceLoader.php

2107 lines
70 KiB
PHP
Raw Normal View History

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
2010-09-05 13:31:34 +00:00
* @file
* @author Roan Kattouw
* @author Trevor Parscal
*/
namespace MediaWiki\ResourceLoader;
use BagOStuff;
use Config;
use DeferredUpdates;
use Exception;
use ExtensionRegistry;
use HashBagOStuff;
use HttpStatus;
use InvalidArgumentException;
use Less_Environment;
use Less_Parser;
use MediaWiki\CommentStore\CommentStore;
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\Html\Html;
use MediaWiki\Html\HtmlJsCode;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Profiler\ProfilingContext;
use MediaWiki\Request\HeaderCallback;
use MediaWiki\Title\Title;
use MediaWiki\User\UserOptionsLookup;
use MediaWiki\WikiMap\WikiMap;
use MWExceptionHandler;
use MWExceptionRenderer;
use Net_URL2;
use ObjectCache;
use OutputPage;
use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use ResourceFileCache;
use RuntimeException;
use stdClass;
use Throwable;
use UnexpectedValueException;
use WebRequest;
use Wikimedia\DependencyStore\DependencyStore;
use Wikimedia\DependencyStore\KeyValueDependencyStore;
use Wikimedia\Minify\CSSMin;
use Wikimedia\Minify\JavaScriptMinifier;
use Wikimedia\Rdbms\DBConnectionError;
use Wikimedia\RequestTimeout\TimeoutException;
use Wikimedia\ScopedCallback;
use Wikimedia\Timestamp\ConvertibleTimestamp;
use Wikimedia\WrappedString;
use Xml;
2010-09-05 13:31:34 +00:00
/**
* @defgroup ResourceLoader ResourceLoader
*
* For higher level documentation, see <https://www.mediawiki.org/wiki/ResourceLoader/Architecture>.
*/
/**
* @defgroup ResourceLoaderHooks ResourceLoader Hooks
* @ingroup ResourceLoader
* @ingroup Hooks
*/
/**
* ResourceLoader is a loading system for JavaScript and CSS resources.
*
* For higher level documentation, see <https://www.mediawiki.org/wiki/ResourceLoader/Architecture>.
*
* @ingroup ResourceLoader
* @since 1.17
*/
class ResourceLoader implements LoggerAwareInterface {
/** @var int */
public const CACHE_VERSION = 9;
/** @var string JavaScript / CSS pragma to disable minification. * */
public const FILTER_NOMIN = '/*@nomin*/';
/** @var string */
private const RL_DEP_STORE_PREFIX = 'ResourceLoaderModule';
/** @var int How long to preserve indirect dependency metadata in our backend store. */
ResourceLoader: Remove DependencyStore::renew == Background When file dependency information is lost, the startup module computes a hash that is based on an incomplete summary of bundled resources. This means it arrives at a "wrong" hash. Once a browser actually asks for that version of the module, though, we rediscover the dependency information, and subsequent startup responses will include arrive once again at the same correct hash. These 5-minute windows of time where the browser cache of anyone visiting is churned over are not great, and so we try to avoid them. The status quo is the dedicated module_deps table in core with no expiry. This means a potential concern is building up gargage over time for modules and extensions that no longer exist or are no longer deployed on that wiki. In practice this has not been much of an issue, we haven't run the cleanupRemovedModules.php or purgeModuleDeps.php scripts in years. Once in 2017 to fix corrupt rows (T158105), and once in 2020 to estimate needed space if we had expiries <https://phabricator.wikimedia.org/T113916#6142457>. Hence we're moving to mainstash via KeyValueDepStore, and not to memcached. But for that we might as well start using experies. To not compromise on losing dep info regularly and causing avoidable browser cache for modules that are hot and very much still existing, we adopted `renew()` in 5282a0296 when drafting KeyValueDepStore, so that we keep moving the TTL of active rows forward and let the rest naturally expire. == Problem The changeTTL writes are so heavy and undebounced, that it fully saturates the hardware disk, unable to keep up simply with the amount of streaming append-only writes to disk. https://phabricator.wikimedia.org/T312902 == Future Perhaps we can make this work if SqlBagOStuff in "MainStash" mode was more efficient and lenient around changeTTL. E.g. rather than simultanously ensure presence of the row itself for perfect eventual consistency, maybe it could just be a light "touch" to ensure the TTL of any such row has a given minimum TTL. Alternatively, if we don't make it part of the generalised SqlBag/MainStash interface but something speciifc to KeyValueDepStore, we could also do something several orders of magnitudes more efficient, such as only touching it once a day or once a week, instead of several hundred times a second after every read performing a write that amplifies the read back into a full row write, with thus a very large and repetative binlog. == This change As interim measure, I propose we remove renew() and instead increase the TTL from 1 week to 1 year. This is still shorter than "indefinite" which is what the module_deps table does in the status quo, and that was never an issue in practice in terms of space. This is because the list of modules modules is quite stable. It's limited to modules that are both file-backed (so no gadgets) and also have non-trivial file dependencies (such as styles.less -> foo.css -> bar.svg). == Impact The installer and update.php (DatabaseUpdater) already clear `module_deps` and `objectcache` so this is a non-issue for third parties. For WMF, it means that the maintenance script we never ran, can be removed as it will now automatically clean up this stuff after a year of inactivity, with a small cache churn cost to pay at that time. Bug: T113916 Bug: T312902 Change-Id: Ie11bdfdcf5e6724bc19ac24e4353aaea316029fd
2022-07-11 21:20:22 +00:00
private const RL_MODULE_DEP_TTL = BagOStuff::TTL_YEAR;
/** @var int */
private const MAXAGE_RECOVER = 60;
/** @var int|null */
protected static $debugMode = null;
/** @var Config */
private $config;
/** @var MessageBlobStore */
private $blobStore;
/** @var DependencyStore */
private $depStore;
/** @var LoggerInterface */
private $logger;
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
/** @var HookContainer */
private $hookContainer;
/** @var HookRunner */
private $hookRunner;
/** @var string */
private $loadScript;
/** @var int */
private $maxageVersioned;
/** @var int */
private $maxageUnversioned;
/** @var bool */
private $useFileCache;
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
/** @var Module[] Map of (module name => Module) */
private $modules = [];
/** @var array[] Map of (module name => associative info array) */
private $moduleInfos = [];
/** @var string[] List of module names that contain QUnit tests */
private $testModuleNames = [];
/** @var string[] Map of (source => path); E.g. [ 'source-id' => 'http://.../load.php' ] */
private $sources = [];
/** @var array Errors accumulated during a respond() call. Exposed for testing. */
protected $errors = [];
/**
* @var string[] Buffer for extra response headers during a makeModuleResponse() call.
* Exposed for testing.
*/
protected $extraHeaders = [];
/** @var array Map of (module-variant => buffered DependencyStore updates) */
private $depStoreUpdateBuffer = [];
/**
* @var array Styles that are skin-specific and supplement or replace the
* default skinStyles of a FileModule. See $wgResourceModuleSkinStyles.
*/
private $moduleSkinStyles = [];
/**
* @internal For ServiceWiring only (TODO: Make stable as part of T32956).
* @param Config $config Generic pass-through for use by extension callbacks
* and other MediaWiki-specific module classes.
* @param LoggerInterface|null $logger [optional]
* @param DependencyStore|null $tracker [optional]
* @param array $params [optional]
* - loadScript: URL path to the load.php entrypoint.
* Default: `'/load.php'`.
* - maxageVersioned: HTTP cache max-age in seconds for URLs with a "version" parameter.
* This applies to most load.php responses, and may have a long duration (e.g. weeks or
* months), because a change in the module bundle will naturally produce a different URL
* and thus automatically bust the CDN and web browser caches.
* Default: 30 days.
* - maxageUnversioned: HTTP cache max-age in seconds for URLs without a "version" parameter.
* This should have a short duration (e.g. minutes), and affects the startup manifest which
* controls how quickly changes (in the module registry, dependency tree, or module content)
* will propagate to clients.
* Default: 5 minutes.
* - useFileCache: Enable use of MediaWiki's FileCache feature.
* See also $wgUseFileCache and ResourceFileCache.
* Default: `false`.
*/
public function __construct(
Config $config,
LoggerInterface $logger = null,
DependencyStore $tracker = null,
array $params = []
) {
$this->loadScript = $params['loadScript'] ?? '/load.php';
$this->maxageVersioned = $params['maxageVersioned'] ?? 30 * 24 * 60 * 60;
$this->maxageUnversioned = $params['maxageUnversioned'] ?? 5 * 60;
$this->useFileCache = $params['useFileCache'] ?? false;
$this->config = $config;
$this->logger = $logger ?: new NullLogger();
$services = MediaWikiServices::getInstance();
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
$this->hookContainer = $services->getHookContainer();
$this->hookRunner = new HookRunner( $this->hookContainer );
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
// Add 'local' source first
$this->addSource( 'local', $this->loadScript );
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
// Special module that always exists
$this->register( 'startup', [ 'class' => StartUpModule::class ] );
$this->setMessageBlobStore(
new MessageBlobStore( $this, $this->logger, $services->getMainWANObjectCache() )
);
$tracker = $tracker ?: new KeyValueDependencyStore( new HashBagOStuff() );
$this->setDependencyStore( $tracker );
}
2010-09-04 12:53:01 +00:00
/**
* @return Config
*/
public function getConfig() {
return $this->config;
}
/**
* @since 1.26
* @param LoggerInterface $logger
*/
public function setLogger( LoggerInterface $logger ) {
$this->logger = $logger;
}
/**
* @since 1.27
* @return LoggerInterface
*/
public function getLogger() {
return $this->logger;
}
/**
* @since 1.26
* @return MessageBlobStore
*/
public function getMessageBlobStore() {
return $this->blobStore;
}
/**
* @since 1.25
* @param MessageBlobStore $blobStore
*/
public function setMessageBlobStore( MessageBlobStore $blobStore ) {
$this->blobStore = $blobStore;
}
/**
* @since 1.35
* @param DependencyStore $tracker
*/
public function setDependencyStore( DependencyStore $tracker ) {
$this->depStore = $tracker;
}
/**
* @internal For use by ServiceWiring.php
* @param array $moduleSkinStyles
*/
public function setModuleSkinStyles( array $moduleSkinStyles ) {
$this->moduleSkinStyles = $moduleSkinStyles;
}
/**
* Register a module with the ResourceLoader system.
*
* @see $wgResourceModules for the available options.
* @param string|array[] $name Module name as a string or, array of module info arrays
* keyed by name.
* @param array|null $info Module info array. When using the first parameter to register
* multiple modules at once, this parameter is optional.
* @throws InvalidArgumentException If a module name contains illegal characters (pipes or commas)
* @throws InvalidArgumentException If the module info is not an array
*/
public function register( $name, array $info = null ) {
// Allow multiple modules to be registered in one call
$registrations = is_array( $name ) ? $name : [ $name => $info ];
foreach ( $registrations as $name => $info ) {
// Warn on duplicate registrations
if ( isset( $this->moduleInfos[$name] ) ) {
// A module has already been registered by this name
$this->logger->warning(
'ResourceLoader duplicate registration warning. ' .
'Another module has already been registered as ' . $name
);
}
// Check validity
if ( !self::isValidModuleName( $name ) ) {
throw new InvalidArgumentException( "ResourceLoader module name '$name' is invalid, "
. "see ResourceLoader::isValidModuleName()" );
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
}
if ( !is_array( $info ) ) {
throw new InvalidArgumentException(
'Invalid module info for "' . $name . '": expected array, got ' . gettype( $info )
);
}
resourceloader: Allow skins to provide additional styles for any module The newly introduced $wgResourceModuleSkinStyles global enables skins to provide additional stylesheets to existing ResourceLoader module. This both makes it easier (or at all possible) to override default styles and lowers the style footprint by making it possible not to load styles unused on most pages. ---- Example: Use the file 'foo-styles.css' for the 'mediawiki.foo' module when using the MySkin skin: $wgResourceModuleSkinStyles['myskin'] = array( 'mediawiki.foo' => 'foo-styles.css', 'remoteSkinPath' => 'MySkin', 'localBasePath' => __DIR__, ); For detailed documentation, see the doc comment in DefaultSettings.php. For a practical usage example, see Vector.php. ---- Implementation notes: * The values defined in $wgResourceModuleSkinStyles are embedded into the modules as late as possible (in ResourceLoader::register()). * Only plain file modules are supported, setting module skin styles for other module types has no effect. * ResourceLoader and ResourceLoaderFileModule now support loading files from arbitrary paths to make this possible, defined using ResourceLoaderFilePath objects. * This required some adjustments in seemingly unrelated places for code which didn't handle the paths fully correctly before. * ResourceLoader and ResourceLoaderFileModule are now a bit more tightly coupled than before :( * Included a tiny example change for the Vector skin, a lot more of similar cleanup is possible and planned for the future. * Many of the non-essential mediawiki.* modules defined in Resources.php should be using `'skinStyles' => array( 'default' => … )` instead of `'styles' => …` to allow more customizations, this is also planned for the future after auditing which ones would actually benefit from this. Change-Id: Ica4ff9696b490e35f60288d7ce1295766c427e87
2014-06-26 14:29:31 +00:00
// Attach module
$this->moduleInfos[$name] = $info;
}
}
2010-09-04 12:53:01 +00:00
/**
* @internal For use by ServiceWiring only
* @codeCoverageIgnore
*/
public function registerTestModules(): void {
$extRegistry = ExtensionRegistry::getInstance();
$testModules = $extRegistry->getAttribute( 'QUnitTestModules' );
$testModuleNames = [];
foreach ( $testModules as $name => &$module ) {
// Turn any single-module dependency into an array
if ( isset( $module['dependencies'] ) && is_string( $module['dependencies'] ) ) {
$module['dependencies'] = [ $module['dependencies'] ];
}
// Ensure the testrunner loads before any tests
$module['dependencies'][] = 'mediawiki.qunit-testrunner';
// Keep track of the modules to load on SpecialJavaScriptTest
$testModuleNames[] = $name;
}
// Core test modules (their names have further precedence).
$testModules = ( include MW_INSTALL_PATH . '/tests/qunit/QUnitTestResources.php' ) + $testModules;
$testModuleNames[] = 'test.MediaWiki';
$this->register( $testModules );
$this->testModuleNames = $testModuleNames;
}
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
/**
* Add a foreign source of modules.
*
* Source IDs are typically the same as the Wiki ID or database name (e.g. lowercase a-z).
*
* @param array|string $sources Source ID (string), or [ id1 => loadUrl, id2 => loadUrl, ... ]
* @param string|array|null $loadUrl load.php url (string), or array with loadUrl key for
* backwards-compatibility.
* @throws InvalidArgumentException If array-form $loadUrl lacks a 'loadUrl' key.
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
*/
public function addSource( $sources, $loadUrl = null ) {
if ( !is_array( $sources ) ) {
$sources = [ $sources => $loadUrl ];
}
foreach ( $sources as $id => $source ) {
// Disallow duplicates
if ( isset( $this->sources[$id] ) ) {
throw new RuntimeException( 'Cannot register source ' . $id . ' twice' );
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
}
// Support: MediaWiki 1.24 and earlier
if ( is_array( $source ) ) {
if ( !isset( $source['loadScript'] ) ) {
throw new InvalidArgumentException( 'Each source must have a "loadScript" key' );
}
$source = $source['loadScript'];
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
}
$this->sources[$id] = $source;
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
}
}
/**
* @return string[]
*/
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
public function getModuleNames() {
return array_keys( $this->moduleInfos );
}
/**
* Get a list of modules with QUnit tests.
*
* @internal For use by SpecialJavaScriptTest only
* @return string[]
* @codeCoverageIgnore
*/
public function getTestSuiteModuleNames() {
return $this->testModuleNames;
}
2010-09-04 12:53:01 +00:00
/**
* Check whether a ResourceLoader module is registered
*
* @since 1.25
* @param string $name
* @return bool
*/
public function isModuleRegistered( $name ) {
return isset( $this->moduleInfos[$name] );
}
/**
* Get the Module object for a given module name.
2010-09-05 13:31:34 +00:00
*
* If an array of module parameters exists but a Module object has not yet
* been instantiated, this method will instantiate and cache that object such that
* subsequent calls simply return the same object.
*
* @param string $name Module name
* @return Module|null If module has been registered, return a
* Module instance. Otherwise, return null.
*/
public function getModule( $name ) {
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
if ( !isset( $this->modules[$name] ) ) {
if ( !isset( $this->moduleInfos[$name] ) ) {
// No such module
return null;
}
// Construct the requested module object
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
$info = $this->moduleInfos[$name];
if ( isset( $info['factory'] ) ) {
/** @var Module $object */
$object = call_user_func( $info['factory'], $info );
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
} else {
$class = $info['class'] ?? FileModule::class;
/** @var Module $object */
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
$object = new $class( $info );
}
$object->setConfig( $this->getConfig() );
$object->setLogger( $this->logger );
Hooks::run() call site migration Migrate all callers of Hooks::run() to use the new HookContainer/HookRunner system. General principles: * Use DI if it is already used. We're not changing the way state is managed in this patch. * HookContainer is always injected, not HookRunner. HookContainer is a service, it's a more generic interface, it is the only thing that provides isRegistered() which is needed in some cases, and a HookRunner can be efficiently constructed from it (confirmed by benchmark). Because HookContainer is needed for object construction, it is also needed by all factories. * "Ask your friendly local base class". Big hierarchies like SpecialPage and ApiBase have getHookContainer() and getHookRunner() methods in the base class, and classes that extend that base class are not expected to know or care where the base class gets its HookContainer from. * ProtectedHookAccessorTrait provides protected getHookContainer() and getHookRunner() methods, getting them from the global service container. The point of this is to ease migration to DI by ensuring that call sites ask their local friendly base class rather than getting a HookRunner from the service container directly. * Private $this->hookRunner. In some smaller classes where accessor methods did not seem warranted, there is a private HookRunner property which is accessed directly. Very rarely (two cases), there is a protected property, for consistency with code that conventionally assumes protected=private, but in cases where the class might actually be overridden, a protected accessor is preferred over a protected property. * The last resort: Hooks::runner(). Mostly for static, file-scope and global code. In a few cases it was used for objects with broken construction schemes, out of horror or laziness. Constructors with new required arguments: * AuthManager * BadFileLookup * BlockManager * ClassicInterwikiLookup * ContentHandlerFactory * ContentSecurityPolicy * DefaultOptionsManager * DerivedPageDataUpdater * FullSearchResultWidget * HtmlCacheUpdater * LanguageFactory * LanguageNameUtils * LinkRenderer * LinkRendererFactory * LocalisationCache * MagicWordFactory * MessageCache * NamespaceInfo * PageEditStash * PageHandlerFactory * PageUpdater * ParserFactory * PermissionManager * RevisionStore * RevisionStoreFactory * SearchEngineConfig * SearchEngineFactory * SearchFormWidget * SearchNearMatcher * SessionBackend * SpecialPageFactory * UserNameUtils * UserOptionsManager * WatchedItemQueryService * WatchedItemStore Constructors with new optional arguments: * DefaultPreferencesFactory * Language * LinkHolderArray * MovePage * Parser * ParserCache * PasswordReset * Router setHookContainer() now required after construction: * AuthenticationProvider * ResourceLoaderModule * SearchEngine Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
$object->setHookContainer( $this->hookContainer );
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
$object->setName( $name );
$object->setDependencyAccessCallbacks(
[ $this, 'loadModuleDependenciesInternal' ],
[ $this, 'saveModuleDependenciesInternal' ]
);
$object->setSkinStylesOverride( $this->moduleSkinStyles );
* Made Resources.php return a pure-data array instead of an ugly mix of data and code. This allows the class code to be lazy-loaded with the autoloader, for a performance advantage especially on non-APC installs. And using the convention where if the class is omitted, ResourceLoaderFileModule is assumed, the registration code becomes shorter and simpler. * Modified ResourceLoader to lazy-initialise module objects, for a further performance advantage. * Deleted ResourceLoader::getModules(), provided getModuleNames() instead. Although the startup module needs this functionality, it's slow to generate, so to avoid misuse, it's better to provide a foolproof fast interface and let the startup module do the slow thing itself. * Modified ResourceLoader::register() to optionally accept an info array instead of an object. * Added $wgResourceModules, allowing extensions to efficiently define their own resource loader modules. The trouble with hooks is that they contain code, and code is slow. We've been through all this before with i18n. Hooks are useful as a performance tool only if you call them very rarely. * Moved ResourceLoader settings to their own section in DefaultSettings.php * Added options to ResourceLoaderFileModule equivalent to the $localBasePath and $remoteBasePath parameters, to allow it to be instantiated via the new array style. Also added remoteExtPath, which allows modules to be registered before $wgExtensionAssetsPath is known. * Added OutputPage::getResourceLoader(), mostly for debugging. * The time saving at the moment is about 5ms per request with no extensions, which is significant already with 6 load.php requests for a cold cache page view. This is a much more scalable interface; the relative saving will grow as more extensions are added which use this interface, especially for non-APC installs. Although the interface is backwards compatible, extension updates will follow in a subsequent commit.
2010-11-19 10:41:06 +00:00
$this->modules[$name] = $object;
}
return $this->modules[$name];
}
2010-09-04 12:53:01 +00:00
/**
* Load information stored in the database and dependency tracking store about modules
*
* @param string[] $moduleNames
* @param Context $context ResourceLoader-specific context of the request
*/
public function preloadModuleInfo( array $moduleNames, Context $context ) {
// Load all tracked indirect file dependencies for the modules
$vary = Module::getVary( $context );
$entitiesByModule = [];
foreach ( $moduleNames as $moduleName ) {
$entitiesByModule[$moduleName] = "$moduleName|$vary";
}
$depsByEntity = $this->depStore->retrieveMulti(
self::RL_DEP_STORE_PREFIX,
$entitiesByModule
);
// Inject the indirect file dependencies for all the modules
foreach ( $moduleNames as $moduleName ) {
$module = $this->getModule( $moduleName );
if ( $module ) {
$entity = $entitiesByModule[$moduleName];
$deps = $depsByEntity[$entity];
$paths = Module::expandRelativePaths( $deps['paths'] );
$module->setFileDependencies( $context, $paths );
}
}
// Batched version of WikiModule::getTitleInfo
$dbr = wfGetDB( DB_REPLICA );
WikiModule::preloadTitleInfo( $context, $dbr, $moduleNames );
// Prime in-object cache for message blobs for modules with messages
$modulesWithMessages = [];
foreach ( $moduleNames as $moduleName ) {
$module = $this->getModule( $moduleName );
if ( $module && $module->getMessages() ) {
$modulesWithMessages[$moduleName] = $module;
}
}
// Prime in-object cache for message blobs for modules with messages
$lang = $context->getLanguage();
$store = $this->getMessageBlobStore();
$blobs = $store->getBlobs( $modulesWithMessages, $lang );
foreach ( $blobs as $moduleName => $blob ) {
$modulesWithMessages[$moduleName]->setMessageBlob( $blob, $lang );
}
}
/**
* @internal Exposed for letting getModule() pass the callable to DependencyStore
* @param string $moduleName
* @param string $variant Language/skin variant
* @return string[] List of absolute file paths
*/
public function loadModuleDependenciesInternal( $moduleName, $variant ) {
$deps = $this->depStore->retrieve( self::RL_DEP_STORE_PREFIX, "$moduleName|$variant" );
return Module::expandRelativePaths( $deps['paths'] );
}
/**
* @internal Exposed for letting getModule() pass the callable to DependencyStore
* @param string $moduleName
* @param string $variant Language/skin variant
* @param string[] $paths List of relative paths referenced during computation
* @param string[] $priorPaths List of relative paths tracked in the dependency store
*/
public function saveModuleDependenciesInternal( $moduleName, $variant, $paths, $priorPaths ) {
$hasPendingUpdate = (bool)$this->depStoreUpdateBuffer;
$entity = "$moduleName|$variant";
if ( array_diff( $paths, $priorPaths ) || array_diff( $priorPaths, $paths ) ) {
// Dependency store needs to be updated with the new path list
if ( $paths ) {
$deps = $this->depStore->newEntityDependencies( $paths, time() );
$this->depStoreUpdateBuffer[$entity] = $deps;
} else {
$this->depStoreUpdateBuffer[$entity] = null;
}
}
ResourceLoader: Remove DependencyStore::renew == Background When file dependency information is lost, the startup module computes a hash that is based on an incomplete summary of bundled resources. This means it arrives at a "wrong" hash. Once a browser actually asks for that version of the module, though, we rediscover the dependency information, and subsequent startup responses will include arrive once again at the same correct hash. These 5-minute windows of time where the browser cache of anyone visiting is churned over are not great, and so we try to avoid them. The status quo is the dedicated module_deps table in core with no expiry. This means a potential concern is building up gargage over time for modules and extensions that no longer exist or are no longer deployed on that wiki. In practice this has not been much of an issue, we haven't run the cleanupRemovedModules.php or purgeModuleDeps.php scripts in years. Once in 2017 to fix corrupt rows (T158105), and once in 2020 to estimate needed space if we had expiries <https://phabricator.wikimedia.org/T113916#6142457>. Hence we're moving to mainstash via KeyValueDepStore, and not to memcached. But for that we might as well start using experies. To not compromise on losing dep info regularly and causing avoidable browser cache for modules that are hot and very much still existing, we adopted `renew()` in 5282a0296 when drafting KeyValueDepStore, so that we keep moving the TTL of active rows forward and let the rest naturally expire. == Problem The changeTTL writes are so heavy and undebounced, that it fully saturates the hardware disk, unable to keep up simply with the amount of streaming append-only writes to disk. https://phabricator.wikimedia.org/T312902 == Future Perhaps we can make this work if SqlBagOStuff in "MainStash" mode was more efficient and lenient around changeTTL. E.g. rather than simultanously ensure presence of the row itself for perfect eventual consistency, maybe it could just be a light "touch" to ensure the TTL of any such row has a given minimum TTL. Alternatively, if we don't make it part of the generalised SqlBag/MainStash interface but something speciifc to KeyValueDepStore, we could also do something several orders of magnitudes more efficient, such as only touching it once a day or once a week, instead of several hundred times a second after every read performing a write that amplifies the read back into a full row write, with thus a very large and repetative binlog. == This change As interim measure, I propose we remove renew() and instead increase the TTL from 1 week to 1 year. This is still shorter than "indefinite" which is what the module_deps table does in the status quo, and that was never an issue in practice in terms of space. This is because the list of modules modules is quite stable. It's limited to modules that are both file-backed (so no gadgets) and also have non-trivial file dependencies (such as styles.less -> foo.css -> bar.svg). == Impact The installer and update.php (DatabaseUpdater) already clear `module_deps` and `objectcache` so this is a non-issue for third parties. For WMF, it means that the maintenance script we never ran, can be removed as it will now automatically clean up this stuff after a year of inactivity, with a small cache churn cost to pay at that time. Bug: T113916 Bug: T312902 Change-Id: Ie11bdfdcf5e6724bc19ac24e4353aaea316029fd
2022-07-11 21:20:22 +00:00
// If paths were unchanged, leave the dependency store unchanged also.
// The entry will eventually expire, after which we will briefly issue an incomplete
// version hash for a 5-min startup window, the module then recomputes and rediscovers
// the paths and arrive at the same module version hash once again. It will churn
// part of the browser cache once, for clients connecting during that window.
if ( !$hasPendingUpdate ) {
DeferredUpdates::addCallableUpdate( function () {
$updatesByEntity = $this->depStoreUpdateBuffer;
ResourceLoader: Remove DependencyStore::renew == Background When file dependency information is lost, the startup module computes a hash that is based on an incomplete summary of bundled resources. This means it arrives at a "wrong" hash. Once a browser actually asks for that version of the module, though, we rediscover the dependency information, and subsequent startup responses will include arrive once again at the same correct hash. These 5-minute windows of time where the browser cache of anyone visiting is churned over are not great, and so we try to avoid them. The status quo is the dedicated module_deps table in core with no expiry. This means a potential concern is building up gargage over time for modules and extensions that no longer exist or are no longer deployed on that wiki. In practice this has not been much of an issue, we haven't run the cleanupRemovedModules.php or purgeModuleDeps.php scripts in years. Once in 2017 to fix corrupt rows (T158105), and once in 2020 to estimate needed space if we had expiries <https://phabricator.wikimedia.org/T113916#6142457>. Hence we're moving to mainstash via KeyValueDepStore, and not to memcached. But for that we might as well start using experies. To not compromise on losing dep info regularly and causing avoidable browser cache for modules that are hot and very much still existing, we adopted `renew()` in 5282a0296 when drafting KeyValueDepStore, so that we keep moving the TTL of active rows forward and let the rest naturally expire. == Problem The changeTTL writes are so heavy and undebounced, that it fully saturates the hardware disk, unable to keep up simply with the amount of streaming append-only writes to disk. https://phabricator.wikimedia.org/T312902 == Future Perhaps we can make this work if SqlBagOStuff in "MainStash" mode was more efficient and lenient around changeTTL. E.g. rather than simultanously ensure presence of the row itself for perfect eventual consistency, maybe it could just be a light "touch" to ensure the TTL of any such row has a given minimum TTL. Alternatively, if we don't make it part of the generalised SqlBag/MainStash interface but something speciifc to KeyValueDepStore, we could also do something several orders of magnitudes more efficient, such as only touching it once a day or once a week, instead of several hundred times a second after every read performing a write that amplifies the read back into a full row write, with thus a very large and repetative binlog. == This change As interim measure, I propose we remove renew() and instead increase the TTL from 1 week to 1 year. This is still shorter than "indefinite" which is what the module_deps table does in the status quo, and that was never an issue in practice in terms of space. This is because the list of modules modules is quite stable. It's limited to modules that are both file-backed (so no gadgets) and also have non-trivial file dependencies (such as styles.less -> foo.css -> bar.svg). == Impact The installer and update.php (DatabaseUpdater) already clear `module_deps` and `objectcache` so this is a non-issue for third parties. For WMF, it means that the maintenance script we never ran, can be removed as it will now automatically clean up this stuff after a year of inactivity, with a small cache churn cost to pay at that time. Bug: T113916 Bug: T312902 Change-Id: Ie11bdfdcf5e6724bc19ac24e4353aaea316029fd
2022-07-11 21:20:22 +00:00
$this->depStoreUpdateBuffer = [];
$cache = ObjectCache::getLocalClusterInstance();
$scopeLocks = [];
$depsByEntity = [];
$entitiesUnreg = [];
foreach ( $updatesByEntity as $entity => $update ) {
$lockKey = $cache->makeKey( 'rl-deps', $entity );
$scopeLocks[$entity] = $cache->getScopedLock( $lockKey, 0 );
if ( !$scopeLocks[$entity] ) {
// avoid duplicate write request slams (T124649)
// the lock must be specific to the current wiki (T247028)
continue;
}
if ( $update === null ) {
$entitiesUnreg[] = $entity;
} else {
$depsByEntity[$entity] = $update;
}
}
$ttl = self::RL_MODULE_DEP_TTL;
$this->depStore->storeMulti( self::RL_DEP_STORE_PREFIX, $depsByEntity, $ttl );
$this->depStore->remove( self::RL_DEP_STORE_PREFIX, $entitiesUnreg );
} );
}
}
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
/**
* Get the list of sources.
*
* @return array Like [ id => load.php url, ... ]
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
*/
public function getSources() {
return $this->sources;
}
/**
* Get the URL to the load.php endpoint for the given ResourceLoader source.
*
* @since 1.24
* @param string $source Source ID
* @return string
* @throws UnexpectedValueException If the source ID was not registered
*/
public function getLoadScript( $source ) {
if ( !isset( $this->sources[$source] ) ) {
throw new UnexpectedValueException( "Unknown source '$source'" );
}
return $this->sources[$source];
}
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
/**
* @internal For use by StartUpModule only.
*/
public const HASH_LENGTH = 5;
/**
* Create a hash for module versioning purposes.
*
* This hash is used in three ways:
*
* - To differentiate between the current version and a past version
* of a module by the same name.
*
* In the cache key of localStorage in the browser (mw.loader.store).
* This store keeps only one version of any given module. As long as the
* next version the client encounters has a different hash from the last
* version it saw, it will correctly discard it in favour of a network fetch.
*
* A browser may evict a site's storage container for any reason (e.g. when
* the user hasn't visited a site for some time, and/or when the device is
* low on storage space). Anecdotally it seems devices rarely keep unused
* storage beyond 2 weeks on mobile devices and 4 weeks on desktop.
* But, there is no hard limit or expiration on localStorage.
* ResourceLoader's Client also clears localStorage when the user changes
* their language preference or when they (temporarily) use Debug Mode.
*
* The only hard factors that reduce the range of possible versions are
* 1) the name and existence of a given module, and
* 2) the TTL for mw.loader.store, and
* 3) the `$wgResourceLoaderStorageVersion` configuration variable.
*
* - To identify a batch response of modules from load.php in an HTTP cache.
*
* When fetching modules in a batch from load.php, a combined hash
* is created by the JS code, and appended as query parameter.
*
* In cache proxies (e.g. Varnish, Nginx) and in the browser's HTTP cache,
* these urls are used to identify other previously cached responses.
* The range of possible versions a given version has to be unique amongst
* is determined by the maximum duration each response is stored for, which
* is controlled by `$wgResourceLoaderMaxage['versioned']`.
*
* - To detect race conditions between multiple web servers in a MediaWiki
* deployment of which some have the newer version and some still the older
* version.
*
* An HTTP request from a browser for the Startup manifest may be responded
* to by a server with the newer version. The browser may then use that to
* request a given module, which may then be responded to by a server with
* the older version. To avoid caching this for too long (which would pollute
* all other users without repairing itself), the combined hash that the JS
* client adds to the url is verified by the server (in ::sendResponseHeaders).
* If they don't match, we instruct cache proxies and clients to not cache
* this response as long as they normally would. This is also the reason
* that the algorithm used here in PHP must match the one used in JS.
*
* The fnv132 digest creates a 32-bit integer, which goes upto 4 Giga and
* needs up to 7 chars in base 36.
* Within 7 characters, base 36 can count up to 78,364,164,096 (78 Giga),
* (but with fnv132 we'd use very little of this range, mostly padding).
* Within 6 characters, base 36 can count up to 2,176,782,336 (2 Giga).
* Within 5 characters, base 36 can count up to 60,466,176 (60 Mega).
*
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* @since 1.26
* @param string $value
* @return string Hash
*/
public static function makeHash( $value ) {
$hash = hash( 'fnv132', $value );
// The base_convert will pad it (if too short),
// then substr() will trim it (if too long).
return substr(
\Wikimedia\base_convert( $hash, 16, 36, self::HASH_LENGTH ),
0,
self::HASH_LENGTH
);
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
}
/**
* Add an error to the 'errors' array and log it.
*
* @internal For use by StartUpModule.
* @since 1.29
* @param Exception $e
* @param string $msg
* @param array $context
*/
resourceloader: Use 'enableModuleContentVersion' for startup module This significantly simplifies the getVersionHash implementation for StartupModule, and fixes a couple of bugs. Previously, the startup module's E-Tag was determined by the 'getDefinitionSummary' method, which combined the E-Tag values from all registered modules, plus what we thought is all information used by 'getScript' (config vars, embedded script files, list of base modules, ...) However, this were various things part of the manifest that it forgot about, including: * Changes to the list of dependencies of a module. * Changes to the name of module. * Changes to the cache group of module. * Adding or removing a foreign module source (mw.loader.addSource). These are all quite rare, and when they do change, they usually also involve a change that *was* tracked already. But, sometimes they don't and that's when bugs happened. Instead of the tracking array of getDefinitionSummary, we now use the 'enableModuleContentVersion' option for StartupModule, which simply calls the actual getScript() method and hashes that. Of note: When an exception happens with the version computation of any individual module, we catch it, log it, and continue with the rest. Previously, the first time such error was discovered at run-time would be in the getCombinedVersion() call from StartupModule::getAllModuleHashes(). That public getCombinedVersion() method of ResourceLoader had the benefit of also outputting details of that exception in the HTTP response output. In order to keep that behaviour, I made outputErrorAndLog() public so that StartupModule can call it directly now. This is covered by ResourceLoaderTest::testMakeModuleResponseStartupError. Bug: T201686 Change-Id: I8e8d3a2cd2ccd68d2d78e988bcdd0d77fbcbf1d4
2018-08-30 02:52:39 +00:00
public function outputErrorAndLog( Exception $e, $msg, array $context = [] ) {
MWExceptionHandler::logException( $e );
$this->logger->warning(
$msg,
$context + [ 'exception' => $e ]
);
$this->errors[] = self::formatExceptionNoComment( $e );
}
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
/**
* Helper method to get and combine versions of multiple modules.
*
* @since 1.26
* @param Context $context
* @param string[] $moduleNames List of known module names
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* @return string Hash
*/
public function getCombinedVersion( Context $context, array $moduleNames ) {
if ( !$moduleNames ) {
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
return '';
}
$hashes = array_map( function ( $module ) use ( $context ) {
resourceloader: Don't let module exception break startup When getScript (or some other method used in a module response) throws an error, only that module fails (by outputting mw.loader.state instead of mw.loader.implement). Other modules will work. This has always been the case and is working fine. For example, "load.php?modules=foo|bar", where 'foo' throws, will return: ```js /* exception message: .. */ mw.loader.implement('bar', ..) mw.loader.state('foo', 'error') ``` The problem, however, is that during the generation of the startup module, we iterate over all other modules. In 2011, the getVersionHash method (then: getModifiedTime) was fairly simple and unlikely to throw errors. Nowadays, some modules use enableModuleContentVersion which will involve the same code path as for regular module responses. The try/catch in ResourceLoader::makeModuleResponse() suffices for the case of loading modules other than startup. But when loading the startup module, and an exception happens in getVersionHash, then the entire startup response is replaced with an exception comment. Example case: * A file not existing for a FileModule subclass that uses enableModuleContentVersion. * A database error from a data module, like CiteDataModule or CNChoiceData. Changes: * Ensure E-Tag is still useful while an error happens in production because we respond with 200 OK and one error isn't the same as another. Fixed by try/catch in getCombinedVersion. * Ensure start manifest isn't disrupted by one broken module. Fixed by try/catch in StartupModule::getModuleRegistrations(). Tests: * testMakeModuleResponseError: The case that already worked fined. * testMakeModuleResponseStartupError: The case fixed in this commit. * testGetCombinedVersion: The case fixed in this commit for E-Tag. Bug: T152266 Change-Id: Ice4ede5ea594bf3fa591134bc9382bd9c24e2f39
2016-12-03 00:48:14 +00:00
try {
return $this->getModule( $module )->getVersionHash( $context );
} catch ( TimeoutException $e ) {
throw $e;
resourceloader: Don't let module exception break startup When getScript (or some other method used in a module response) throws an error, only that module fails (by outputting mw.loader.state instead of mw.loader.implement). Other modules will work. This has always been the case and is working fine. For example, "load.php?modules=foo|bar", where 'foo' throws, will return: ```js /* exception message: .. */ mw.loader.implement('bar', ..) mw.loader.state('foo', 'error') ``` The problem, however, is that during the generation of the startup module, we iterate over all other modules. In 2011, the getVersionHash method (then: getModifiedTime) was fairly simple and unlikely to throw errors. Nowadays, some modules use enableModuleContentVersion which will involve the same code path as for regular module responses. The try/catch in ResourceLoader::makeModuleResponse() suffices for the case of loading modules other than startup. But when loading the startup module, and an exception happens in getVersionHash, then the entire startup response is replaced with an exception comment. Example case: * A file not existing for a FileModule subclass that uses enableModuleContentVersion. * A database error from a data module, like CiteDataModule or CNChoiceData. Changes: * Ensure E-Tag is still useful while an error happens in production because we respond with 200 OK and one error isn't the same as another. Fixed by try/catch in getCombinedVersion. * Ensure start manifest isn't disrupted by one broken module. Fixed by try/catch in StartupModule::getModuleRegistrations(). Tests: * testMakeModuleResponseError: The case that already worked fined. * testMakeModuleResponseStartupError: The case fixed in this commit. * testGetCombinedVersion: The case fixed in this commit for E-Tag. Bug: T152266 Change-Id: Ice4ede5ea594bf3fa591134bc9382bd9c24e2f39
2016-12-03 00:48:14 +00:00
} catch ( Exception $e ) {
resourceloader: Use 'enableModuleContentVersion' for startup module This significantly simplifies the getVersionHash implementation for StartupModule, and fixes a couple of bugs. Previously, the startup module's E-Tag was determined by the 'getDefinitionSummary' method, which combined the E-Tag values from all registered modules, plus what we thought is all information used by 'getScript' (config vars, embedded script files, list of base modules, ...) However, this were various things part of the manifest that it forgot about, including: * Changes to the list of dependencies of a module. * Changes to the name of module. * Changes to the cache group of module. * Adding or removing a foreign module source (mw.loader.addSource). These are all quite rare, and when they do change, they usually also involve a change that *was* tracked already. But, sometimes they don't and that's when bugs happened. Instead of the tracking array of getDefinitionSummary, we now use the 'enableModuleContentVersion' option for StartupModule, which simply calls the actual getScript() method and hashes that. Of note: When an exception happens with the version computation of any individual module, we catch it, log it, and continue with the rest. Previously, the first time such error was discovered at run-time would be in the getCombinedVersion() call from StartupModule::getAllModuleHashes(). That public getCombinedVersion() method of ResourceLoader had the benefit of also outputting details of that exception in the HTTP response output. In order to keep that behaviour, I made outputErrorAndLog() public so that StartupModule can call it directly now. This is covered by ResourceLoaderTest::testMakeModuleResponseStartupError. Bug: T201686 Change-Id: I8e8d3a2cd2ccd68d2d78e988bcdd0d77fbcbf1d4
2018-08-30 02:52:39 +00:00
// If modules fail to compute a version, don't fail the request (T152266)
// and still compute versions of other modules.
$this->outputErrorAndLog( $e,
resourceloader: Don't let module exception break startup When getScript (or some other method used in a module response) throws an error, only that module fails (by outputting mw.loader.state instead of mw.loader.implement). Other modules will work. This has always been the case and is working fine. For example, "load.php?modules=foo|bar", where 'foo' throws, will return: ```js /* exception message: .. */ mw.loader.implement('bar', ..) mw.loader.state('foo', 'error') ``` The problem, however, is that during the generation of the startup module, we iterate over all other modules. In 2011, the getVersionHash method (then: getModifiedTime) was fairly simple and unlikely to throw errors. Nowadays, some modules use enableModuleContentVersion which will involve the same code path as for regular module responses. The try/catch in ResourceLoader::makeModuleResponse() suffices for the case of loading modules other than startup. But when loading the startup module, and an exception happens in getVersionHash, then the entire startup response is replaced with an exception comment. Example case: * A file not existing for a FileModule subclass that uses enableModuleContentVersion. * A database error from a data module, like CiteDataModule or CNChoiceData. Changes: * Ensure E-Tag is still useful while an error happens in production because we respond with 200 OK and one error isn't the same as another. Fixed by try/catch in getCombinedVersion. * Ensure start manifest isn't disrupted by one broken module. Fixed by try/catch in StartupModule::getModuleRegistrations(). Tests: * testMakeModuleResponseError: The case that already worked fined. * testMakeModuleResponseStartupError: The case fixed in this commit. * testGetCombinedVersion: The case fixed in this commit for E-Tag. Bug: T152266 Change-Id: Ice4ede5ea594bf3fa591134bc9382bd9c24e2f39
2016-12-03 00:48:14 +00:00
'Calculating version for "{module}" failed: {exception}',
[
'module' => $module,
]
);
return '';
}
}, $moduleNames );
return self::makeHash( implode( '', $hashes ) );
}
/**
* Get the expected value of the 'version' query parameter.
*
* This is used by respond() to set a short Cache-Control header for requests with
* information newer than the current server has. This avoids pollution of edge caches.
* Typically during deployment. (T117587)
*
* This MUST match return value of `mw.loader#getCombinedVersion()` client-side.
*
* @since 1.28
* @param Context $context
* @param string[] $modules
* @return string Hash
*/
public function makeVersionQuery( Context $context, array $modules ) {
// As of MediaWiki 1.28, the server and client use the same algorithm for combining
// version hashes. There is no technical reason for this to be same, and for years the
// implementations differed. If getCombinedVersion in PHP (used for StartupModule and
// E-Tag headers) differs in the future from getCombinedVersion in JS (used for 'version'
// query parameter), then this method must continue to match the JS one.
$filtered = [];
foreach ( $modules as $name ) {
if ( !$this->getModule( $name ) ) {
// If a versioned request contains a missing module, the version is a mismatch
// as the client considered a module (and version) we don't have.
return '';
}
$filtered[] = $name;
}
return $this->getCombinedVersion( $context, $filtered );
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
}
2010-09-05 13:31:34 +00:00
/**
* Output a response to a load request, including the content-type header.
*
* @param Context $context Context in which a response should be formed
*/
public function respond( Context $context ) {
// Buffer output to catch warnings. Normally we'd use ob_clean() on the
// top-level output buffer to clear warnings, but that breaks when ob_gzhandler
// is used: ob_clean() will clear the GZIP header in that case and it won't come
// back for subsequent output, resulting in invalid GZIP. So we have to wrap
// the whole thing in our own output buffer to be sure the active buffer
// doesn't use ob_gzhandler.
// See https://bugs.php.net/bug.php?id=36514
ob_start();
$this->errors = [];
$responseTime = $this->measureResponseTime();
ProfilingContext::singleton()->init( MW_ENTRY_POINT, 'respond' );
// Find out which modules are missing and instantiate the others
$modules = [];
$missing = [];
foreach ( $context->getModules() as $name ) {
$module = $this->getModule( $name );
if ( $module ) {
// Do not allow private modules to be loaded from the web.
// This is a security issue, see T36907.
if ( $module->getGroup() === Module::GROUP_PRIVATE ) {
// Not a serious error, just means something is trying to access it (T101806)
$this->logger->debug( "Request for private module '$name' denied" );
$this->errors[] = "Cannot build private module \"$name\"";
continue;
* (bug 34212) ApiBlock/ApiUnblock allow action to take place without a token parameter present * (bug 35317) CSRF in Special:Upload Revert r56793, which removed the CSRF check for Special:Upload for normal file uploads. Cross-site posting of file uploads without user interaction has been possible since at least as early as Chrome 8 (late 2010) and Firefox 6 (mid 2011). Commonist has used api.php since version 0.4.0 (April 2010), and the API already requires an edit token, so Commonist 0.4.0+ is not affected by this change. * (bug 34907) Fix for CSRF vulnerability due to mw.user.tokens. Patch by Roan Kattouw and Tim Starling. * Filter out private modules early in ResourceLoader::makeResponse() and just pretend they weren't specified. This means these modules cannot be loaded through load.php . This filtering must not happen in makeModuleResponse(), because that would break inlining. * Force inlining of private modules in OutputPage::makeResourceLoaderLink(), disregarding $wgResourceLoaderInlinePrivateModules * Remove $wgResourceLoaderInlinePrivateModules * Remove special treatment of private modules ($private) in ResourceLoader::makeResponse() and sendResponseHeaders(), because we're not allowing private modules to be loaded through here any more * Remove identity checks in ResourceLoaderUserOptionsModule and ResourceLoaderUserCSSPrefsModule, they didn't make a lot of sense before but they're certainly useless now. * Factored out error comment construction in ResourceLoader.php and stripped comment terminations from exception messages. I didn't find an XSS vulnerability but it looked scary. Patchset2: Removes whitespace error that prevented automatic merge by Gerrit: includes/resourceloader/ResourceLoaderUserOptionsModule.php Change-Id: I2dec8b8caf9db3c64919763865cc10cccdd6a1a3
2012-03-22 19:52:37 +00:00
}
$modules[$name] = $module;
} else {
$missing[] = $name;
}
}
2010-09-04 12:53:01 +00:00
try {
// Preload for getCombinedVersion() and for batch makeModuleResponse()
$this->preloadModuleInfo( array_keys( $modules ), $context );
} catch ( TimeoutException $e ) {
throw $e;
} catch ( Exception $e ) {
$this->outputErrorAndLog( $e, 'Preloading module info failed: {exception}' );
}
// Combine versions to propagate cache invalidation
$versionHash = $this->getCombinedVersion( $context, array_keys( $modules ) );
// See RFC 2616 § 3.11 Entity Tags
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.11
$etag = 'W/"' . $versionHash . '"';
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
// Try the client-side cache first
if ( $this->tryRespondNotModified( $context, $etag ) ) {
return; // output handled (buffers cleared)
}
// Use file cache if enabled and available...
if ( $this->useFileCache ) {
$fileCache = ResourceFileCache::newFromContext( $context );
if ( $this->tryRespondFromFileCache( $fileCache, $context, $etag ) ) {
return; // output handled
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
}
} else {
$fileCache = null;
}
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
// Generate a response
$response = $this->makeModuleResponse( $context, $modules, $missing );
// Capture any PHP warnings from the output buffer and append them to the
// error list if we're in debug mode.
if ( $context->getDebug() ) {
$warnings = ob_get_contents();
if ( strlen( $warnings ) ) {
$this->errors[] = $warnings;
}
}
// Consider saving the response to file cache (unless there are errors).
if ( $fileCache && !$this->errors && $missing === [] &&
ResourceFileCache::useFileCache( $context ) ) {
if ( $fileCache->isCacheWorthy() ) {
// There were enough hits, save the response to the cache
$fileCache->saveText( $response );
} else {
$fileCache->incrMissesRecent( $context->getRequest() );
}
}
$this->sendResponseHeaders( $context, $etag, (bool)$this->errors, $this->extraHeaders );
// Remove the output buffer and output the response
ob_end_clean();
if ( $context->getImageObj() && $this->errors ) {
// We can't show both the error messages and the response when it's an image.
$response = implode( "\n\n", $this->errors );
} elseif ( $this->errors ) {
$errorText = implode( "\n\n", $this->errors );
$errorResponse = self::makeComment( $errorText );
if ( $context->shouldIncludeScripts() ) {
$errorResponse .= 'if (window.console && console.error) { console.error('
. $context->encodeJson( $errorText )
. "); }\n";
}
// Prepend error info to the response
$response = $errorResponse . $response;
}
// @phan-suppress-next-line SecurityCheck-XSS
echo $response;
}
/**
* Send stats about the time used to build the response
* @return ScopedCallback
*/
protected function measureResponseTime() {
$statStart = $_SERVER['REQUEST_TIME_FLOAT'];
return new ScopedCallback( static function () use ( $statStart ) {
$statTiming = microtime( true ) - $statStart;
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
$stats->timing( 'resourceloader.responseTime', $statTiming * 1000 );
} );
}
/**
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* Send main response headers to the client.
*
* Deals with Content-Type, CORS (for stylesheets), and caching.
*
* @param Context $context
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* @param string $etag ETag header value
* @param bool $errors Whether there are errors in the response
* @param string[] $extra Array of extra HTTP response headers
*/
protected function sendResponseHeaders(
Context $context, $etag, $errors, array $extra = []
): void {
HeaderCallback::warnIfHeadersSent();
if ( $errors ) {
$maxage = self::MAXAGE_RECOVER;
} elseif (
$context->getVersion() !== null
&& $context->getVersion() !== $this->makeVersionQuery( $context, $context->getModules() )
) {
// If we need to self-correct, set a very short cache expiry
// to basically just debounce CDN traffic. This applies to:
// - Internal errors, e.g. due to misconfiguration.
// - Version mismatch, e.g. due to deployment race (T117587, T47877).
$this->logger->info( 'Client and server registry version out of sync' );
$maxage = self::MAXAGE_RECOVER;
} elseif ( $context->getVersion() === null ) {
// Resources that can't set a version, should have their updates propagate to
// clients quickly. This applies to shared resources linked from HTML, such as
// the startup module and stylesheets.
$maxage = $this->maxageUnversioned;
} else {
// When a version is set, use a long expiry because changes
// will naturally miss the cache by using a different URL.
$maxage = $this->maxageVersioned;
}
if ( $context->getImageObj() ) {
// Output different headers if we're outputting textual errors.
if ( $errors ) {
header( 'Content-Type: text/plain; charset=utf-8' );
} else {
$context->getImageObj()->sendResponseHeaders( $context );
}
} elseif ( $context->getOnly() === 'styles' ) {
header( 'Content-Type: text/css; charset=utf-8' );
header( 'Access-Control-Allow-Origin: *' );
} else {
header( 'Content-Type: text/javascript; charset=utf-8' );
}
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
// See RFC 2616 § 14.19 ETag
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
header( 'ETag: ' . $etag );
if ( $context->getDebug() ) {
// Do not cache debug responses
header( 'Cache-Control: private, no-cache, must-revalidate' );
header( 'Pragma: no-cache' );
} else {
// T132418: When a resource expires mid-way a browsing session, prefer to renew it in
// the background instead of blocking the next page load (eg. startup module, or CSS).
$staleDirective = ( $maxage > self::MAXAGE_RECOVER
? ", stale-while-revalidate=" . min( 60, intval( $maxage / 2 ) )
: ''
);
header( "Cache-Control: public, max-age=$maxage, s-maxage=$maxage" . $staleDirective );
header( 'Expires: ' . ConvertibleTimestamp::convert( TS_RFC2822, time() + $maxage ) );
}
foreach ( $extra as $header ) {
header( $header );
}
}
2010-09-04 12:53:01 +00:00
/**
* Respond with HTTP 304 Not Modified if appropriate.
*
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* If there's an If-None-Match header, respond with a 304 appropriately
* and clear out the output buffer. If the client cache is too old then do nothing.
*
* @param Context $context
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* @param string $etag ETag header value
* @return bool True if HTTP 304 was sent and output handled
*/
protected function tryRespondNotModified( Context $context, $etag ) {
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
// See RFC 2616 § 14.26 If-None-Match
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
$clientKeys = $context->getRequest()->getHeader( 'If-None-Match', WebRequest::GETHEADER_LIST );
// Never send 304s in debug mode
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
if ( $clientKeys !== false && !$context->getDebug() && in_array( $etag, $clientKeys ) ) {
// There's another bug in ob_gzhandler (see also the comment at
// the top of this function) that causes it to gzip even empty
// responses, meaning it's impossible to produce a truly empty
// response (because the gzip header is always there). This is
// a problem because 304 responses have to be completely empty
// per the HTTP spec, and Firefox behaves buggily when they're not.
// See also https://bugs.php.net/bug.php?id=51579
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
// To work around this, we tear down all output buffering before
// sending the 304.
wfResetOutputBuffers( /* $resetGzipEncoding = */ true );
HttpStatus::header( 304 );
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
$this->sendResponseHeaders( $context, $etag, false );
return true;
}
return false;
}
/**
* Send out code for a response from file cache if possible.
*
* @param ResourceFileCache $fileCache Cache object for this request URL
* @param Context $context Context in which to generate a response
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
* @param string $etag ETag header value
* @return bool If this found a cache file and handled the response
*/
protected function tryRespondFromFileCache(
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
ResourceFileCache $fileCache,
Context $context,
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
$etag
) {
// Buffer output to catch warnings.
ob_start();
// Get the maximum age the cache can be
$maxage = $context->getVersion() === null
? $this->maxageUnversioned
: $this->maxageVersioned;
// Minimum timestamp the cache file must have
$minTime = time() - $maxage;
$good = $fileCache->isCacheGood( ConvertibleTimestamp::convert( TS_MW, $minTime ) );
if ( !$good ) {
try { // RL always hits the DB on file cache miss...
wfGetDB( DB_REPLICA );
} catch ( DBConnectionError $e ) { // ...check if we need to fallback to cache
$good = $fileCache->isCacheGood(); // cache existence check
}
}
if ( $good ) {
$ts = $fileCache->cacheTimestamp();
// Send content type and cache headers
resourceloader: Replace timestamp system with version hashing Modules now track their version via getVersionHash() instead of getModifiedTime(). == Background == While some resources have observeable timestamps (e.g. files stored on disk), many other resources do not. E.g. config variables, and module definitions. For static file modules, one can e.g. revert one of more files in a module to a previous version and not affect the max timestamp. Wiki modules include pages only if they exist. The user module supports common.js and skin.js. By default neither exists. If a user has both, and then the less-recently modified one is deleted, the max-timestamp remains unchanged. For client-side caching, batch requests use "Math.max" on the relevant timestamps. Again, if a module changes but another module is more recent (e.g. out-of-order deployment, or out-of-order discovery), the change would not result in a cache miss. More scenarios can be found in the associated Phabricator tasks. == Version hash == Previously we virtually mapped these variables to a timestamp by storing the current time alongside a hash of the value in ObjectCache. Considering the number of possible request contexts (wikis * modules * users * skins * languages) this doesn't work well. It results in needless cache invalidation when the first time observation is purged due to LRU algorithms. It also has other minor bugs leading to fewer cache hits. All modules automatically get the benefits of version hashing with this change. The old getDefinitionMtime() and getHashMtime() have been replaced with dummies that return 1. These functions are often called from getModifiedTime() in subclasses. For backward-compatibility, their respective values (definition summary and hash) are now included in getVersionHash directly. As examples, the following modules have been updated to use getVersionHash directly. Other modules still work fine and can be updated later. * ResourceLoaderFileModule * ResourceLoaderEditToolbarModule * ResourceLoaderStartUpModule * ResourceLoaderWikiModule The presence of hashes in place of timestamps increases the startup module size on a default MediaWiki install from 4.4k to 5.8k (after gzip and minification). == ETag == Since timestamps are no longer tracked, we need a different way to implement caching for cache proxies (e.g. Varnish) and web browsers. Previously we used the Last-Modified header (in combination with Cache-Control and Expires). Instead of Last-Modified (and If-Modified-Since), we use ETag (and If-None-Match). Entity tags (new in HTTP/1.1) are much stricter than Last-Modified by default. They instruct browsers to allow usage of partial Range requests. Since our responses are dynamically generated, we need to use the Weak version of ETag. While this sounds bad, it's no different than Last-Modified. As reassured by RFC 2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.3> the specified behaviour behind Last-Modified follows the same "Weak" caching logic as Entity tags. It's just that entity tags are capable of a stricter mode (whereas Last-Modified is inherently weak). == File cache == If $wgUseFileCache is enabled, ResourceLoader uses ResourceFileCache to cache load.php responses. While the blind TTL handling (during the allowed expiry period) is still maxage/timestamp based, tryRespondNotModified() now requires the caller to know the expected ETag. For this to work, the FileCache handling had to be moved from the top of ResoureLoader::respond() to after the expected ETag is computed. This also allows us to remove the duplicate tryRespondNotModified() handling since that's is already handled by ResourceLoader::respond() meanwhile. == Misc == * Remove redundant modifiedTime cache in ResourceLoaderFileModule. * Change bugzilla references to Phabricator. * Centralised inclusion of wgCacheEpoch using getDefinitionSummary. Previously this logic was duplicated in each place the modified timestamp was used. * It's easy to forget calling the parent class in getDefinitionSummary(). Previously this method only tracked 'class' by default. As such, various extensions hardcoded that one value instead of calling the parent and extending the array. To better prevent this in the future, getVersionHash() now asserts that the '_cacheEpoch' property made it through. * tests: Don't use getDefinitionSummary() as an API. Fix ResourceLoaderWikiModuleTest to call getPages properly. * In tests, the default timestamp used to be 1388534400000 (which is the unix time of 20140101000000; the unit tests' CacheEpoch). The new version hash of these modules is "XyCC+PSK", which is the base64 encoded prefix of the SHA1 digest of: '{"_class":"ResourceLoaderTestModule","_cacheEpoch":"20140101000000"}' * Add sha1.js library for client-side hash generation. Compared various different implementations for code size (after minfication/gzip), and speed (when used for short hexidecimal strings). https://jsperf.com/sha1-implementations - CryptoJS <https://code.google.com/p/crypto-js/#SHA-1> (min+gzip: 2.5k) http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha1.js Chrome: 45k, Firefox: 89k, Safari: 92k - jsSHA <https://github.com/Caligatio/jsSHA> https://github.com/Caligatio/jsSHA/blob/3c1d4f2e/src/sha1.js (min+gzip: 1.8k) Chrome: 65k, Firefox: 53k, Safari: 69k - phpjs-sha1 <https://github.com/kvz/phpjs> (RL min+gzip: 0.8k) https://github.com/kvz/phpjs/blob/1eaab15d/functions/strings/sha1.js Chrome: 200k, Firefox: 280k, Safari: 78k Modern browsers implement the HTML5 Crypto API. However, this API is asynchronous, only enabled when on HTTPS in Chromium, and is quite low-level. It requires boilerplate code to actually use with TextEncoder, ArrayBuffer and Uint32Array. Due this being needed in the module loader, we'd have to load the fallback regardless. Considering this is not used in a critical path for performance, it's not worth shipping two implementations for this optimisation. May also resolve: * T44094 * T90411 * T94810 Bug: T94074 Change-Id: Ibb292d2416839327d1807a66c78fd96dac0637d0
2015-04-29 22:53:24 +00:00
$this->sendResponseHeaders( $context, $etag, false );
$response = $fileCache->fetchText();
// Capture any PHP warnings from the output buffer and append them to the
// response in a comment if we're in debug mode.
if ( $context->getDebug() ) {
$warnings = ob_get_contents();
if ( strlen( $warnings ) ) {
$response = self::makeComment( $warnings ) . $response;
}
}
// Remove the output buffer and output the response
ob_end_clean();
echo $response . "\n/* Cached {$ts} */";
return true; // cache hit
}
// Clear buffer
ob_end_clean();
return false; // cache miss
}
/**
* Generate a CSS or JS comment block.
*
* Only use this for public data, not error message details.
*
* @param string $text
* @return string
*/
public static function makeComment( $text ) {
* (bug 34212) ApiBlock/ApiUnblock allow action to take place without a token parameter present * (bug 35317) CSRF in Special:Upload Revert r56793, which removed the CSRF check for Special:Upload for normal file uploads. Cross-site posting of file uploads without user interaction has been possible since at least as early as Chrome 8 (late 2010) and Firefox 6 (mid 2011). Commonist has used api.php since version 0.4.0 (April 2010), and the API already requires an edit token, so Commonist 0.4.0+ is not affected by this change. * (bug 34907) Fix for CSRF vulnerability due to mw.user.tokens. Patch by Roan Kattouw and Tim Starling. * Filter out private modules early in ResourceLoader::makeResponse() and just pretend they weren't specified. This means these modules cannot be loaded through load.php . This filtering must not happen in makeModuleResponse(), because that would break inlining. * Force inlining of private modules in OutputPage::makeResourceLoaderLink(), disregarding $wgResourceLoaderInlinePrivateModules * Remove $wgResourceLoaderInlinePrivateModules * Remove special treatment of private modules ($private) in ResourceLoader::makeResponse() and sendResponseHeaders(), because we're not allowing private modules to be loaded through here any more * Remove identity checks in ResourceLoaderUserOptionsModule and ResourceLoaderUserCSSPrefsModule, they didn't make a lot of sense before but they're certainly useless now. * Factored out error comment construction in ResourceLoader.php and stripped comment terminations from exception messages. I didn't find an XSS vulnerability but it looked scary. Patchset2: Removes whitespace error that prevented automatic merge by Gerrit: includes/resourceloader/ResourceLoaderUserOptionsModule.php Change-Id: I2dec8b8caf9db3c64919763865cc10cccdd6a1a3
2012-03-22 19:52:37 +00:00
$encText = str_replace( '*/', '* /', $text );
return "/*\n$encText\n*/\n";
}
/**
* Handle exception display.
*
* @param Throwable $e Exception to be shown to the user
* @return string Sanitized text in a CSS/JS comment that can be returned to the user
*/
public static function formatException( Throwable $e ) {
return self::makeComment( self::formatExceptionNoComment( $e ) );
}
/**
* Handle exception display.
*
* @since 1.25
* @param Throwable $e Exception to be shown to the user
* @return string Sanitized text that can be returned to the user
*/
protected static function formatExceptionNoComment( Throwable $e ) {
if ( !MWExceptionRenderer::shouldShowExceptionDetails() ) {
return MWExceptionHandler::getPublicLogMessage( $e );
}
return MWExceptionHandler::getLogMessage( $e ) .
"\nBacktrace:\n" .
MWExceptionHandler::getRedactedTraceAsString( $e );
}
/**
* Generate code for a response.
*
* Calling this method also populates the `errors` and `headers` members,
* later used by respond().
*
* @param Context $context Context in which to generate a response
* @param Module[] $modules List of module objects keyed by module name
* @param string[] $missing List of requested module names that are unregistered (optional)
* @return string Response data
*/
public function makeModuleResponse( Context $context,
array $modules, array $missing = []
) {
if ( $modules === [] && $missing === [] ) {
return <<<MESSAGE
/* This file is the Web entry point for MediaWiki's ResourceLoader:
<https://www.mediawiki.org/wiki/ResourceLoader>. In this request,
no modules were requested. Max made me put this here. */
MESSAGE;
}
$image = $context->getImageObj();
if ( $image ) {
$data = $image->getImageData( $context );
if ( $data === false ) {
$data = '';
$this->errors[] = 'Image generation failed';
}
return $data;
}
$states = [];
foreach ( $missing as $name ) {
$states[$name] = 'missing';
}
$only = $context->getOnly();
$filter = $only === 'styles' ? 'minify-css' : 'minify-js';
$debug = (bool)$context->getDebug();
$out = '';
foreach ( $modules as $name => $module ) {
try {
$content = $module->getModuleContent( $context );
$implementKey = $name . '@' . $module->getVersionHash( $context );
$strContent = '';
if ( isset( $content['headers'] ) ) {
$this->extraHeaders = array_merge( $this->extraHeaders, $content['headers'] );
}
// Append output
switch ( $only ) {
case 'scripts':
$scripts = $content['scripts'];
if ( is_string( $scripts ) ) {
// Load scripts raw...
$strContent = $scripts;
} elseif ( is_array( $scripts ) ) {
// ...except when $scripts is an array of URLs or an associative array
$strContent = self::makeLoaderImplementScript(
$implementKey,
$scripts,
[],
null,
[]
);
}
break;
case 'styles':
$styles = $content['styles'];
// We no longer separate into media, they are all combined now with
ResourceLoader: Refactor style loading Fixes: * bug 31676: Work around IE stylesheet limit. * bug 35562: @import styles broken in modules that combine multiple stylesheets. * bug 40498: Don't output empty "@media print { }" blocks. * bug 40500: Don't ignore media-type for urls in debug mode. Approach: * Re-use the same <style> tag so that we stay under the 31 stylesheet limit in IE. Unless the to-be-added css text from the being-loaded module contains @import, in which case we do create a new <style> tag and then re-use that one from that point on (bug 31676). * Return stylesheets as arrays, instead of a concatenated string. This fixes bug 35562, because @import only works when at the top of a stylesheet. By not unconditionally concatenating files within a module on the server side already, @import will work in e.g. module 'site' that contains 2 wiki pages. This is normalized in ResourceLoader::makeCombinedStyles(), so far only ResourceLoaderWikiModule makes use of this. Misc. clean up and bug fixes: * Reducing usage of jQuery() and mw.html.element() where native DOM would be very simple and faster. Aside from simplicity and speed, this is also working towards a more stand-alone ResourceLoader. * Trim server output a little bit more - Redundant new line after minify-css (it is now an array, so no need to keep space afterwards) - Redundant semi-colon after minify-js if it ends in a colon * Allow space in styleTest.css.php * Clean up and extend unit tests to cover for these features and bug fixes. * Don't set styleEl.rel = 'stylesheet'; that has no business on a <style> tag. * Fix bug in mw.loader's addStyleTag(). It turns out IE6 has an odd security measure that does not allow manipulation of elements (at least style tags) that are created by a different script (even if that script was served from the same domain/origin etc.). We didn't ran into this before because we only created new style tags, never appended to them. Now that we do, this came up. Took a while to figure out because it was created by mediawiki.js but it calls jQuery which did the actual dom insertion. Odd thing is, we load jquery.js and mediawiki.js in the same request even... Without this all css-url related mw.loader tests would fail in IE6. * mediawiki.js and mediawiki.test.js now pass jshint again. Tested (and passing qunit/?module=mediawiki; 123 of 123): * Chrome 14, 21 * Firefox 3.0, 3.6, 4, 7, 14, 15, 16beta * IE 6, 7, 8, 9 * Safari 4.0, 5.0, 5.1 * Opera 10.0, 11.1, 11.5, 11.6, 12.0, 12.5beta * iPhone 3GS / iOS 3.0 / Mobile Safari 4.0 iPhone 4 / iOS 4.0.1 / Mobile Safari 4.0.5 iPhone 4S / iOS 6.0 Beta / Mobile Safari 6.0 Change-Id: I3e8227ddb87fd9441071ca935439fc6467751dab
2012-07-25 21:20:21 +00:00
// custom media type groups into @media .. {} sections as part of the css string.
// Module returns either an empty array or a numerical array with css strings.
$strContent = isset( $styles['css'] ) ? implode( '', $styles['css'] ) : '';
break;
default:
$scripts = $content['scripts'] ?? '';
if ( is_string( $scripts ) ) {
if ( $name === 'site' || $name === 'user' ) {
// Legacy scripts that run in the global scope without a closure.
// mw.loader.implement will use eval if scripts is a string.
// Minify manually here, because general response minification is
// not effective due it being a string literal, not a function.
if ( !$debug ) {
$scripts = self::filter( 'minify-js', $scripts ); // T107377
}
} else {
$scripts = new HtmlJsCode( $scripts );
}
}
$strContent = self::makeLoaderImplementScript(
$implementKey,
$scripts,
$content['styles'] ?? [],
isset( $content['messagesBlob'] ) ? new HtmlJsCode( $content['messagesBlob'] ) : null,
$content['templates'] ?? []
);
break;
}
if ( $debug ) {
// In debug mode, separate each response by a new line.
// For example, between 'mw.loader.implement();' statements.
$strContent = self::ensureNewline( $strContent );
} else {
resourceloader: Restore minification of embedded 'user.options' == Why and what == It is important that we don't cache the result of minifying the `user.options` blob, because it varies on every page (details below). But, it is okay to minify it. Today, we don't minify it because the only content of this blob is one line of JSON, and that JSON is already generated without spaces. I would like to start minifying it so that: 1. The "mw.loader.implement" wrapper will get minified. Right now we maintain a copy of the wrapper that is minified by hand. In the next patch, I will remove this, which will introduce whitespace for "user.options" unless we enable the minifier. 2. We can remove more complexity and state internally without worrying about whether it will still be minified. 3. We can make the output even smaller by not having to preserve the `/*nomin*/` instruction behind. This instruction is used today mainly for cases where minification might break the output, so it is important to preserve in case it is concatenated and passed to the minifier a second time later. But, for user options we don't need this protection, and so we can save a few bytes by removing this instruction at the same time. == Background == Act 1 In 2014, with task T84960, we determined that caching of `user.tokens` minification is problematic for system stability and also not useful. * This module contains security tokens that are different for every pageview and for every user. This means every time we generate a web response, we have different tokens, and thus generate different <script> content, and thus there is no use of caching the result, because we would never use it. The next time we get a different script, and will have to minify it again. That's okay, it's small and takes no time at all to minify. * If we stored it in the cache, it would not only be useless, it would also compromise the effectiveness of the php-apcu cache for all other parts of MediaWiki, because when APCu is full, it will have to delete unrelated caches to make space, thus causing more calculations to be repeated in other places. In commit 6fa48939 (I6016e4b0) we simply changed the script generation to disable caching when minifying the private 'user.tokens' module, which solved the task. Act 2 In 2015, with commit b7eb243d92 (Id6f514206), the minification logic was changed from "per response" to "per module within response", and as part of that the logic was also generalised from being just for `user.tokens` to be for "private modules", which is essentially the same (since user.tokens is the most common private module), but was preparation for a few other things: * Some extensions (like AdvancedSearch) also create their own private modules and thus benefit from this automatically. * In later years we would add support for previewing user scripts and gadgets, which turns a public module temporarily into a private one to be able to execute it with the previewed script content. These also don't need to be cached, and this correctly disabled caching for those. * We have "user.options", which is similar to "user.tokens", but does not change on every page view. It does not need to be cached because it is so small that is about as fast to just minify it than to go through the cost of hashing, keying and querying the cache. * We have merged `user.tokens` into `user.options`. Act 3 Then, with commit ca30efa30 (Ic1d802ee20) this was automation was removed in favour of the FILTER_NOMIN instruction which disabled both caching *and* minification. The was accepted because we realized that we don't need minification for the "user.options" blob because it is just one line of JSON, and the JSON is already generated without whitespace. Change-Id: I6d125fc89d8964325ec068a0746b00810e155858
2021-10-09 00:48:48 +00:00
$strContent = self::filter( $filter, $strContent, [
// Important: Do not cache minifications of embedded modules
// This is especially for the private 'user.options' module,
// which varies on every pageview and would explode the cache (T84960)
'cache' => !$module->shouldEmbedModule( $context )
] );
}
if ( $only === 'scripts' ) {
// Use a linebreak between module scripts (T162719)
$out .= self::ensureNewline( $strContent );
} else {
$out .= $strContent;
}
} catch ( TimeoutException $e ) {
throw $e;
} catch ( Exception $e ) {
$this->outputErrorAndLog( $e, 'Generating module package failed: {exception}' );
// Respond to client with error-state instead of module implementation
$states[$name] = 'error';
unset( $modules[$name] );
}
}
2010-09-04 12:53:01 +00:00
// Update module states
resourceloader: Remove support for raw modules Being a raw module means that when it is requested from load.php with "only=scripts" set, then the output is *not* wrapped in an 'mw.loader.implement' closure *and* there no 'mw.loader.state()' appendix. Instead, it is served "raw". Before 2018, the modules 'mediawiki' and 'jquery' were raw modules. They were needed before the client could define 'mw.loader.implement', and could never be valid dependencies. Module 'mediawiki' merged to 'startup', and 'jquery' became a regular module (T192623). Based on the architecture of modules being deliverable bundles, it doesn't make sense for there to ever be raw modules again. Anything that 'startup' needs should be bundled with it. Anything else is a regular module. On top of that, we never actually needed this feature because specifying the 'only=scripts' and 'raw=1' parameters does the same thing. The only special bit about marking modules (not requests) as "raw" was that it allowed the client to forget to specify "raw=1" and the server would automatically omit the 'mw.loader.state()' appendix based on whether the module is marked as raw. As of Ie4564ec8e26ad53f2, the two remaining use cases for raw responses now specify the 'raw=1' request parameter, and we can get rid of the "raw module" feature and all the complexity around it. == Startup module In the startup module there was an interesting use of isRaw() that has little to do with the above. The "ATTENTION" warning there applies to the startup module only, not raw modules in general. This is now fixed by explicitly checking for StartupModule. Above that warning, it talked about saving bytes, which was an optimisation given that "raw" modules don't communicate with mw.loader, they also don't need to be registered there because even if mw.loader would try to load them, the server would never inform mw.loader about the module having arrived. There are now no longer any such modules. Bug: T201483 Change-Id: I8839036e7b2b76919b6cd3aa42ccfde4d1247899
2019-06-13 18:41:56 +00:00
if ( $context->shouldIncludeScripts() && !$context->getRaw() ) {
if ( $modules && $only === 'scripts' ) {
// Set the state of modules loaded as only scripts to ready as
// they don't have an mw.loader.implement wrapper that sets the state
foreach ( $modules as $name => $module ) {
$states[$name] = 'ready';
}
}
// Set the state of modules we didn't respond to with mw.loader.implement
resourceloader: Remove support for raw modules Being a raw module means that when it is requested from load.php with "only=scripts" set, then the output is *not* wrapped in an 'mw.loader.implement' closure *and* there no 'mw.loader.state()' appendix. Instead, it is served "raw". Before 2018, the modules 'mediawiki' and 'jquery' were raw modules. They were needed before the client could define 'mw.loader.implement', and could never be valid dependencies. Module 'mediawiki' merged to 'startup', and 'jquery' became a regular module (T192623). Based on the architecture of modules being deliverable bundles, it doesn't make sense for there to ever be raw modules again. Anything that 'startup' needs should be bundled with it. Anything else is a regular module. On top of that, we never actually needed this feature because specifying the 'only=scripts' and 'raw=1' parameters does the same thing. The only special bit about marking modules (not requests) as "raw" was that it allowed the client to forget to specify "raw=1" and the server would automatically omit the 'mw.loader.state()' appendix based on whether the module is marked as raw. As of Ie4564ec8e26ad53f2, the two remaining use cases for raw responses now specify the 'raw=1' request parameter, and we can get rid of the "raw module" feature and all the complexity around it. == Startup module In the startup module there was an interesting use of isRaw() that has little to do with the above. The "ATTENTION" warning there applies to the startup module only, not raw modules in general. This is now fixed by explicitly checking for StartupModule. Above that warning, it talked about saving bytes, which was an optimisation given that "raw" modules don't communicate with mw.loader, they also don't need to be registered there because even if mw.loader would try to load them, the server would never inform mw.loader about the module having arrived. There are now no longer any such modules. Bug: T201483 Change-Id: I8839036e7b2b76919b6cd3aa42ccfde4d1247899
2019-06-13 18:41:56 +00:00
if ( $states ) {
$stateScript = self::makeLoaderStateScript( $context, $states );
if ( !$debug ) {
$stateScript = self::filter( 'minify-js', $stateScript );
}
// Use a linebreak between module script and state script (T162719)
$out = self::ensureNewline( $out ) . $stateScript;
}
} elseif ( $states ) {
$this->errors[] = 'Problematic modules: '
// Silently ignore invalid UTF-8 injected via 'modules' query
// Don't issue server-side warnings for client errors. (T331641)
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
. @$context->encodeJson( $states );
}
2010-09-04 12:53:01 +00:00
return $out;
}
/**
* Ensure the string is either empty or ends in a line break
* @internal
* @param string $str
* @return string
*/
public static function ensureNewline( $str ) {
$end = substr( $str, -1 );
if ( $end === false || $end === '' || $end === "\n" ) {
return $str;
}
return $str . "\n";
}
/**
* Get names of modules that use a certain message.
*
* @param string $messageKey
* @return string[] List of module names
*/
public function getModulesByMessage( $messageKey ) {
$moduleNames = [];
foreach ( $this->getModuleNames() as $moduleName ) {
$module = $this->getModule( $moduleName );
if ( in_array( $messageKey, $module->getMessages() ) ) {
$moduleNames[] = $moduleName;
}
}
return $moduleNames;
}
/**
* Return JS code that calls mw.loader.implement with given module properties.
*
* @param string $name Module name used as implement key (format "`[name]@[version]`")
* @param HtmlJsCode|array|string|string[] $scripts
* - HtmlJsCode: Concatenated scripts to be wrapped in a closure
* - array: Package files array containing HtmlJsCode for individual JS files,
* as produced by Module::getScript().
* - string: Script contents to eval in global scope (for site/user scripts).
* - string[]: List of URLs (for debug mode).
* @param array<string,string|array<string,string[]>> $styles
* Under optional key "css", there is a concatenated CSS string.
* Under optional key "url", there is an array by media type withs URLs to stylesheets (for debug mode).
* These come from Module::getStyles(), formatted by Module:buildContent().
* @param HtmlJsCode|null $messages An already JSON-encoded map from message keys to values,
* wrapped in an HtmlJsCode object.
* @param array<string,string> $templates Map from template name to template source.
* @return string JavaScript code
*/
private static function makeLoaderImplementScript(
$name, $scripts, $styles, $messages, $templates
) {
if ( $scripts instanceof HtmlJsCode ) {
if ( $scripts->value === '' ) {
$scripts = null;
} else {
$scripts = new HtmlJsCode( "function ( $, jQuery, require, module ) {\n{$scripts->value}\n}" );
}
} elseif ( is_array( $scripts ) && isset( $scripts['files'] ) ) {
$files = $scripts['files'];
foreach ( $files as &$file ) {
// $file is changed (by reference) from a descriptor array to the content of the file
// All of these essentially do $file = $file['content'];, some just have wrapping around it
if ( $file['type'] === 'script' ) {
// Ensure that the script has a newline at the end to close any comment in the
// last line.
$content = self::ensureNewline( $file['content'] );
// Provide CJS `exports` (in addition to CJS2 `module.exports`) to package modules (T284511).
// $/jQuery are simply used as globals instead.
// TODO: Remove $/jQuery param from traditional module closure too (and bump caching)
$file = new HtmlJsCode( "function ( require, module, exports ) {\n$content}" );
} else {
$file = $file['content'];
}
}
$scripts = HtmlJsCode::encodeObject( [
'main' => $scripts['main'],
'files' => HtmlJsCode::encodeObject( $files, true )
], true );
} elseif ( !is_string( $scripts ) && !is_array( $scripts ) ) {
throw new InvalidArgumentException( 'Script must be a string or an array of URLs' );
}
// mw.loader.implement requires 'styles', 'messages' and 'templates' to be objects (not
// arrays). json_encode considers empty arrays to be numerical and outputs "[]" instead
// of "{}". Force them to objects.
$module = [
$name,
$scripts,
(object)$styles,
$messages ?? (object)[],
(object)$templates
];
self::trimArray( $module );
// We use pretty output unconditionally to make this method simpler.
// Minification is taken care of closer to the output.
return Xml::encodeJsCall( 'mw.loader.implement', $module, true );
}
/**
* Combines an associative array mapping media type to CSS into a
* single stylesheet with "@media" blocks.
*
* @param array<string,string|string[]> $stylePairs Map from media type to CSS string(s)
* @return string[] CSS strings
*/
public static function makeCombinedStyles( array $stylePairs ) {
$out = [];
ResourceLoader: Refactor style loading Fixes: * bug 31676: Work around IE stylesheet limit. * bug 35562: @import styles broken in modules that combine multiple stylesheets. * bug 40498: Don't output empty "@media print { }" blocks. * bug 40500: Don't ignore media-type for urls in debug mode. Approach: * Re-use the same <style> tag so that we stay under the 31 stylesheet limit in IE. Unless the to-be-added css text from the being-loaded module contains @import, in which case we do create a new <style> tag and then re-use that one from that point on (bug 31676). * Return stylesheets as arrays, instead of a concatenated string. This fixes bug 35562, because @import only works when at the top of a stylesheet. By not unconditionally concatenating files within a module on the server side already, @import will work in e.g. module 'site' that contains 2 wiki pages. This is normalized in ResourceLoader::makeCombinedStyles(), so far only ResourceLoaderWikiModule makes use of this. Misc. clean up and bug fixes: * Reducing usage of jQuery() and mw.html.element() where native DOM would be very simple and faster. Aside from simplicity and speed, this is also working towards a more stand-alone ResourceLoader. * Trim server output a little bit more - Redundant new line after minify-css (it is now an array, so no need to keep space afterwards) - Redundant semi-colon after minify-js if it ends in a colon * Allow space in styleTest.css.php * Clean up and extend unit tests to cover for these features and bug fixes. * Don't set styleEl.rel = 'stylesheet'; that has no business on a <style> tag. * Fix bug in mw.loader's addStyleTag(). It turns out IE6 has an odd security measure that does not allow manipulation of elements (at least style tags) that are created by a different script (even if that script was served from the same domain/origin etc.). We didn't ran into this before because we only created new style tags, never appended to them. Now that we do, this came up. Took a while to figure out because it was created by mediawiki.js but it calls jQuery which did the actual dom insertion. Odd thing is, we load jquery.js and mediawiki.js in the same request even... Without this all css-url related mw.loader tests would fail in IE6. * mediawiki.js and mediawiki.test.js now pass jshint again. Tested (and passing qunit/?module=mediawiki; 123 of 123): * Chrome 14, 21 * Firefox 3.0, 3.6, 4, 7, 14, 15, 16beta * IE 6, 7, 8, 9 * Safari 4.0, 5.0, 5.1 * Opera 10.0, 11.1, 11.5, 11.6, 12.0, 12.5beta * iPhone 3GS / iOS 3.0 / Mobile Safari 4.0 iPhone 4 / iOS 4.0.1 / Mobile Safari 4.0.5 iPhone 4S / iOS 6.0 Beta / Mobile Safari 6.0 Change-Id: I3e8227ddb87fd9441071ca935439fc6467751dab
2012-07-25 21:20:21 +00:00
foreach ( $stylePairs as $media => $styles ) {
// FileModule::getStyle can return the styles as a string or an
// array of strings. This is to allow separation in the front-end.
$styles = (array)$styles;
ResourceLoader: Refactor style loading Fixes: * bug 31676: Work around IE stylesheet limit. * bug 35562: @import styles broken in modules that combine multiple stylesheets. * bug 40498: Don't output empty "@media print { }" blocks. * bug 40500: Don't ignore media-type for urls in debug mode. Approach: * Re-use the same <style> tag so that we stay under the 31 stylesheet limit in IE. Unless the to-be-added css text from the being-loaded module contains @import, in which case we do create a new <style> tag and then re-use that one from that point on (bug 31676). * Return stylesheets as arrays, instead of a concatenated string. This fixes bug 35562, because @import only works when at the top of a stylesheet. By not unconditionally concatenating files within a module on the server side already, @import will work in e.g. module 'site' that contains 2 wiki pages. This is normalized in ResourceLoader::makeCombinedStyles(), so far only ResourceLoaderWikiModule makes use of this. Misc. clean up and bug fixes: * Reducing usage of jQuery() and mw.html.element() where native DOM would be very simple and faster. Aside from simplicity and speed, this is also working towards a more stand-alone ResourceLoader. * Trim server output a little bit more - Redundant new line after minify-css (it is now an array, so no need to keep space afterwards) - Redundant semi-colon after minify-js if it ends in a colon * Allow space in styleTest.css.php * Clean up and extend unit tests to cover for these features and bug fixes. * Don't set styleEl.rel = 'stylesheet'; that has no business on a <style> tag. * Fix bug in mw.loader's addStyleTag(). It turns out IE6 has an odd security measure that does not allow manipulation of elements (at least style tags) that are created by a different script (even if that script was served from the same domain/origin etc.). We didn't ran into this before because we only created new style tags, never appended to them. Now that we do, this came up. Took a while to figure out because it was created by mediawiki.js but it calls jQuery which did the actual dom insertion. Odd thing is, we load jquery.js and mediawiki.js in the same request even... Without this all css-url related mw.loader tests would fail in IE6. * mediawiki.js and mediawiki.test.js now pass jshint again. Tested (and passing qunit/?module=mediawiki; 123 of 123): * Chrome 14, 21 * Firefox 3.0, 3.6, 4, 7, 14, 15, 16beta * IE 6, 7, 8, 9 * Safari 4.0, 5.0, 5.1 * Opera 10.0, 11.1, 11.5, 11.6, 12.0, 12.5beta * iPhone 3GS / iOS 3.0 / Mobile Safari 4.0 iPhone 4 / iOS 4.0.1 / Mobile Safari 4.0.5 iPhone 4S / iOS 6.0 Beta / Mobile Safari 6.0 Change-Id: I3e8227ddb87fd9441071ca935439fc6467751dab
2012-07-25 21:20:21 +00:00
foreach ( $styles as $style ) {
$style = trim( $style );
// Don't output an empty "@media print { }" block (T42498)
if ( $style === '' ) {
continue;
ResourceLoader: Refactor style loading Fixes: * bug 31676: Work around IE stylesheet limit. * bug 35562: @import styles broken in modules that combine multiple stylesheets. * bug 40498: Don't output empty "@media print { }" blocks. * bug 40500: Don't ignore media-type for urls in debug mode. Approach: * Re-use the same <style> tag so that we stay under the 31 stylesheet limit in IE. Unless the to-be-added css text from the being-loaded module contains @import, in which case we do create a new <style> tag and then re-use that one from that point on (bug 31676). * Return stylesheets as arrays, instead of a concatenated string. This fixes bug 35562, because @import only works when at the top of a stylesheet. By not unconditionally concatenating files within a module on the server side already, @import will work in e.g. module 'site' that contains 2 wiki pages. This is normalized in ResourceLoader::makeCombinedStyles(), so far only ResourceLoaderWikiModule makes use of this. Misc. clean up and bug fixes: * Reducing usage of jQuery() and mw.html.element() where native DOM would be very simple and faster. Aside from simplicity and speed, this is also working towards a more stand-alone ResourceLoader. * Trim server output a little bit more - Redundant new line after minify-css (it is now an array, so no need to keep space afterwards) - Redundant semi-colon after minify-js if it ends in a colon * Allow space in styleTest.css.php * Clean up and extend unit tests to cover for these features and bug fixes. * Don't set styleEl.rel = 'stylesheet'; that has no business on a <style> tag. * Fix bug in mw.loader's addStyleTag(). It turns out IE6 has an odd security measure that does not allow manipulation of elements (at least style tags) that are created by a different script (even if that script was served from the same domain/origin etc.). We didn't ran into this before because we only created new style tags, never appended to them. Now that we do, this came up. Took a while to figure out because it was created by mediawiki.js but it calls jQuery which did the actual dom insertion. Odd thing is, we load jquery.js and mediawiki.js in the same request even... Without this all css-url related mw.loader tests would fail in IE6. * mediawiki.js and mediawiki.test.js now pass jshint again. Tested (and passing qunit/?module=mediawiki; 123 of 123): * Chrome 14, 21 * Firefox 3.0, 3.6, 4, 7, 14, 15, 16beta * IE 6, 7, 8, 9 * Safari 4.0, 5.0, 5.1 * Opera 10.0, 11.1, 11.5, 11.6, 12.0, 12.5beta * iPhone 3GS / iOS 3.0 / Mobile Safari 4.0 iPhone 4 / iOS 4.0.1 / Mobile Safari 4.0.5 iPhone 4S / iOS 6.0 Beta / Mobile Safari 6.0 Change-Id: I3e8227ddb87fd9441071ca935439fc6467751dab
2012-07-25 21:20:21 +00:00
}
// Transform the media type based on request params and config
// The way that this relies on $wgRequest to propagate request params is slightly evil
$media = OutputPage::transformCssMedia( $media );
if ( $media === '' || $media == 'all' ) {
$out[] = $style;
} elseif ( is_string( $media ) ) {
$out[] = "@media $media {\n" . str_replace( "\n", "\n\t", "\t" . $style ) . "}";
}
// else: skip
}
}
return $out;
}
/**
* Wrapper around json_encode that avoids needless escapes,
* and pretty-prints in debug mode.
*
* @param mixed $data
* @return string|false JSON string, false on error
*/
private static function encodeJsonForScript( $data ) {
// Keep output as small as possible by disabling needless escape modes
// that PHP uses by default.
// However, while most module scripts are only served on HTTP responses
// for JavaScript, some modules can also be embedded in the HTML as inline
// scripts. This, and the fact that we sometimes need to export strings
// containing user-generated content and labels that may genuinely contain
// a sequences like "</script>", we need to encode either '/' or '<'.
// By default PHP escapes '/'. Let's escape '<' instead which is less common
// and allows URLs to mostly remain readable.
$jsonFlags = JSON_UNESCAPED_SLASHES |
JSON_UNESCAPED_UNICODE |
JSON_HEX_TAG |
JSON_HEX_AMP;
if ( self::inDebugMode() ) {
$jsonFlags |= JSON_PRETTY_PRINT;
}
return json_encode( $data, $jsonFlags );
}
/**
* Format a JS call to mw.loader.state()
*
* @internal For use by StartUpModule
* @param Context $context
* @param array<string,string> $states
* @return string JavaScript code
*/
public static function makeLoaderStateScript(
Context $context, array $states
) {
return 'mw.loader.state('
// Silently ignore invalid UTF-8 injected via 'modules' query
// Don't issue server-side warnings for client errors. (T331641)
// phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
. @$context->encodeJson( $states )
. ');';
}
private static function isEmptyObject( stdClass $obj ) {
foreach ( $obj as $value ) {
return false;
}
return true;
}
/**
* Remove empty values from the end of an array.
*
* Values considered empty:
*
* - null
* - []
* - new HtmlJsCode( '{}' )
* - new stdClass()
* - (object)[]
*
* @param array &$array
*/
private static function trimArray( array &$array ): void {
$i = count( $array );
while ( $i-- ) {
if ( $array[$i] === null
|| $array[$i] === []
|| ( $array[$i] instanceof HtmlJsCode && $array[$i]->value === '{}' )
|| ( $array[$i] instanceof stdClass && self::isEmptyObject( $array[$i] ) )
) {
unset( $array[$i] );
} else {
break;
}
}
}
/**
* Format JS code which calls `mw.loader.register()` with the given parameters.
*
* @par Example
* @code
*
* ResourceLoader::makeLoaderRegisterScript( $context, [
* [ $name1, $version1, $dependencies1, $group1, $source1, $skip1 ],
* [ $name2, $version2, $dependencies1, $group2, $source2, $skip2 ],
* ...
* ] ):
* @endcode
*
* @internal For use by StartUpModule only
* @param Context $context
* @param array[] $modules Array of module registration arrays, each containing
* - string: module name
* - string: module version
* - array|null: List of dependencies (optional)
* - string|null: Module group (optional)
* - string|null: Name of foreign module source, or 'local' (optional)
* - string|null: Script body of a skip function (optional)
* @phan-param array<int,array{0:string,1:string,2?:?array,3?:?string,4?:?string,5?:?string}> $modules
* @return string JavaScript code
*/
public static function makeLoaderRegisterScript(
Context $context, array $modules
) {
// Optimisation: Transform dependency names into indexes when possible
// to produce smaller output. They are expanded by mw.loader.register on
// the other end.
$index = [];
foreach ( $modules as $i => $module ) {
// Build module name index
$index[$module[0]] = $i;
}
foreach ( $modules as &$module ) {
if ( isset( $module[2] ) ) {
foreach ( $module[2] as &$dependency ) {
if ( isset( $index[$dependency] ) ) {
// Replace module name in dependency list with index
$dependency = $index[$dependency];
}
}
}
self::trimArray( $module );
}
return 'mw.loader.register('
. $context->encodeJson( $modules )
. ');';
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
}
/**
* Format JS code which calls `mw.loader.addSource()` with the given parameters.
*
* - ResourceLoader::makeLoaderSourcesScript( $context,
* [ $id1 => $loadUrl, $id2 => $loadUrl, ... ]
* );
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
* Register sources with the given IDs and properties.
*
* @internal For use by StartUpModule only
* @param Context $context
* @param array<string,string> $sources
* @return string JavaScript code
[ResourceLoader 2]: Add support for multiple loadScript sources Front-end: * New mw.loader method: addSource(). Call with two arguments or an object as first argument for multiple registrations * New property in module registry: "source". Optional for local modules (falls back to 'local'). When loading/using one or more modules, the worker will group the request by source and make separate requests to the sources as needed. * Re-arranging object properties in mw.loader.register to match the same order all other code parts use. * Adding documentation for 'source' and where missing updating it to include 'group' as well. * Refactor of mw.loader.work() by Roan Kattouw and Timo Tijhof:' -- Additional splitting layer by source (in addition to splitting by group), renamed 'groups' to 'splits' -- Clean up of the loop, and removing a no longer needed loop after the for-in-loop -- Much more function documentation in mw.loader.work() -- Moved caching of wgResourceLoaderMaxQueryLength out of the loop and renamed 'limit' to 'maxQueryLength Back-end changed provided through patch by Roan Kattouw (to avoid broken code between commits): * New method in ResourceLoader: addSource(). During construction of ResourceLoader this will be called by default for 'local' with loadScript property set to $wgLoadScript. Additional sources can be registered through $wgResourceLoaderSources (empty array by default) * Calling mw.loader.addSource from the startup module * Passing source to mw.loader.register from startup module * Some new static helper methods Use: * By default nothing should change in core, all modules simply default to 'local'. This info originates from the getSource()-method of the ResourceLoaderModule class, which is inherited to all core ResourceLoaderModule-implementations (none override it) * Third-party users and/or extensions can create new classes extending ResourceLoaderModule, re-implementing the getSource-method to return something else. Basic example: $wgResourceLoaderSources['mywiki'] = array( 'loadScript' => 'http://example.org/w/load.php' ); class MyCentralWikiModule extends ResourceLoaderModule { function getSource(){ return 'mywiki'; } } $wgResourceModules['cool.stuff'] => array( 'class' => 'MyCentralWikiModule' ); More complicated example // imagine some stuff with a ForeignGadgetRepo class, putting stuff in $wgResourceLoaderSources in the __construct() method class ForeignGadgetRepoGadget extends ResourceLoaderModule { function getSource(){ return $this->source; } } Loading: Loading is completely transparent, stuff like $wgOut->addModules() or mw.loader.loader/using both take it as any other module and load from the right source accordingly. -- This commit is part of the ResourceLoader 2 project.
2011-07-26 21:10:34 +00:00
*/
public static function makeLoaderSourcesScript(
Context $context, array $sources
) {
return 'mw.loader.addSource('
. $context->encodeJson( $sources )
. ');';
}
/**
* Wrap JavaScript code to run after the startup module.
*
* @param string $script JavaScript code
* @return string JavaScript code
*/
public static function makeLoaderConditionalScript( $script ) {
// Adds a function to lazy-created RLQ
return '(RLQ=window.RLQ||[]).push(function(){' .
trim( $script ) . '});';
}
/**
* Wrap JavaScript code to run after a required module.
*
* @since 1.32
* @param string|string[] $modules Module name(s)
* @param string $script JavaScript code
* @return string JavaScript code
*/
public static function makeInlineCodeWithModule( $modules, $script ) {
// Adds an array to lazy-created RLQ
return '(RLQ=window.RLQ||[]).push(['
. self::encodeJsonForScript( $modules ) . ','
. 'function(){' . trim( $script ) . '}'
. ']);';
}
/**
* Make an HTML script that runs given JS code after startup and base modules.
*
* The code will be wrapped in a closure, and it will be executed by ResourceLoader's
* startup module if the client has adequate support for MediaWiki JavaScript code.
*
* @param string $script JavaScript code
* @param string|null $nonce Content-Security-Policy nonce
* (from `OutputPage->getCSP()->getNonce()`)
* @return string|WrappedString HTML
*/
Initial support for Content Security Policy, disabled by default The primary goal here is a defense in depth measure to stop an attacker who found a bug in the parser allowing them to insert malicious attributes. This wouldn't stop someone who could insert a full script tag (since at current it can't distinguish between malicious and legit user js). It also would not prevent DOM-based or reflected XSS for anons, as the nonce value is guessable for anons when receiving a response cached by varnish. However, the limited protection of just stopping stored XSS where the attacker only has control of attributes, is still a big win in my opinion. (But it wouldn't prevent someone who has that type of xss from abusing things like data-ooui attribute). This will likely break many gadgets. Its expected that any sort of rollout on Wikimedia will be done very slowly, with lots of testing and the report-only option to begin with. This is behind feature flags that are off by default, so merging this patch should not cause any change in default behaviour. This may break some extensions (The most obvious one is charinsert (See fe648d41005), but will probably need some testing in report-only mode to see if anything else breaks) This uses the unsafe-eval option of CSP, in order to support RL's local storage thingy. For better security, we may want to remove some of the sillier uses of eval (e.g. jquery.ui.datepicker.js). For more info, see spec: https://www.w3.org/TR/CSP2/ Additionally see: https://www.mediawiki.org/wiki/Requests_for_comment/Content-Security-Policy Bug: T135963 Change-Id: I80f6f469ba4c0b608385483457df96ccb7429ae5
2016-02-29 04:13:10 +00:00
public static function makeInlineScript( $script, $nonce = null ) {
$js = self::makeLoaderConditionalScript( $script );
Initial support for Content Security Policy, disabled by default The primary goal here is a defense in depth measure to stop an attacker who found a bug in the parser allowing them to insert malicious attributes. This wouldn't stop someone who could insert a full script tag (since at current it can't distinguish between malicious and legit user js). It also would not prevent DOM-based or reflected XSS for anons, as the nonce value is guessable for anons when receiving a response cached by varnish. However, the limited protection of just stopping stored XSS where the attacker only has control of attributes, is still a big win in my opinion. (But it wouldn't prevent someone who has that type of xss from abusing things like data-ooui attribute). This will likely break many gadgets. Its expected that any sort of rollout on Wikimedia will be done very slowly, with lots of testing and the report-only option to begin with. This is behind feature flags that are off by default, so merging this patch should not cause any change in default behaviour. This may break some extensions (The most obvious one is charinsert (See fe648d41005), but will probably need some testing in report-only mode to see if anything else breaks) This uses the unsafe-eval option of CSP, in order to support RL's local storage thingy. For better security, we may want to remove some of the sillier uses of eval (e.g. jquery.ui.datepicker.js). For more info, see spec: https://www.w3.org/TR/CSP2/ Additionally see: https://www.mediawiki.org/wiki/Requests_for_comment/Content-Security-Policy Bug: T135963 Change-Id: I80f6f469ba4c0b608385483457df96ccb7429ae5
2016-02-29 04:13:10 +00:00
$escNonce = '';
if ( $nonce === null ) {
wfWarn( __METHOD__ . " did not get nonce. Will break CSP" );
} elseif ( $nonce !== false ) {
// If it was false, CSP is disabled, so no nonce attribute.
// Nonce should be only base64 characters, so should be safe,
// but better to be safely escaped than sorry.
$escNonce = ' nonce="' . htmlspecialchars( $nonce ) . '"';
}
return new WrappedString(
Initial support for Content Security Policy, disabled by default The primary goal here is a defense in depth measure to stop an attacker who found a bug in the parser allowing them to insert malicious attributes. This wouldn't stop someone who could insert a full script tag (since at current it can't distinguish between malicious and legit user js). It also would not prevent DOM-based or reflected XSS for anons, as the nonce value is guessable for anons when receiving a response cached by varnish. However, the limited protection of just stopping stored XSS where the attacker only has control of attributes, is still a big win in my opinion. (But it wouldn't prevent someone who has that type of xss from abusing things like data-ooui attribute). This will likely break many gadgets. Its expected that any sort of rollout on Wikimedia will be done very slowly, with lots of testing and the report-only option to begin with. This is behind feature flags that are off by default, so merging this patch should not cause any change in default behaviour. This may break some extensions (The most obvious one is charinsert (See fe648d41005), but will probably need some testing in report-only mode to see if anything else breaks) This uses the unsafe-eval option of CSP, in order to support RL's local storage thingy. For better security, we may want to remove some of the sillier uses of eval (e.g. jquery.ui.datepicker.js). For more info, see spec: https://www.w3.org/TR/CSP2/ Additionally see: https://www.mediawiki.org/wiki/Requests_for_comment/Content-Security-Policy Bug: T135963 Change-Id: I80f6f469ba4c0b608385483457df96ccb7429ae5
2016-02-29 04:13:10 +00:00
Html::inlineScript( $js, $nonce ),
"<script$escNonce>(RLQ=window.RLQ||[]).push(function(){",
'});</script>'
);
}
/**
* Return JS code which will set the MediaWiki configuration array to
* the given value.
*
* @param array $configuration List of configuration values keyed by variable name
* @return string JavaScript code
* @throws Exception
*/
public static function makeConfigSetScript( array $configuration ) {
$json = self::encodeJsonForScript( $configuration );
if ( $json === false ) {
$e = new Exception(
'JSON serialization of config data failed. ' .
'This usually means the config data is not valid UTF-8.'
);
MWExceptionHandler::logException( $e );
return 'mw.log.error(' . self::encodeJsonForScript( $e->__toString() ) . ');';
}
return "mw.config.set($json);";
}
/**
* Convert an array of module names to a packed query string.
*
* For example, `[ 'foo.bar', 'foo.baz', 'bar.baz', 'bar.quux' ]`
* becomes `'foo.bar,baz|bar.baz,quux'`.
*
* This process is reversed by ResourceLoader::expandModuleNames().
* See also mw.loader#buildModulesString() which is a port of this, used
* on the client-side.
*
* @param string[] $modules List of module names (strings)
* @return string Packed query string
*/
public static function makePackedModulesString( array $modules ) {
$moduleMap = []; // [ prefix => [ suffixes ] ]
foreach ( $modules as $module ) {
$pos = strrpos( $module, '.' );
$prefix = $pos === false ? '' : substr( $module, 0, $pos );
$suffix = $pos === false ? $module : substr( $module, $pos + 1 );
$moduleMap[$prefix][] = $suffix;
}
$arr = [];
foreach ( $moduleMap as $prefix => $suffixes ) {
$p = $prefix === '' ? '' : $prefix . '.';
$arr[] = $p . implode( ',', $suffixes );
}
return implode( '|', $arr );
}
/**
* Expand a string of the form `jquery.foo,bar|jquery.ui.baz,quux` to
* an array of module names like `[ 'jquery.foo', 'jquery.bar',
* 'jquery.ui.baz', 'jquery.ui.quux' ]`.
*
* This process is reversed by ResourceLoader::makePackedModulesString().
*
* @since 1.33
* @param string $modules Packed module name list
* @return string[] Array of module names
*/
public static function expandModuleNames( $modules ) {
$retval = [];
$exploded = explode( '|', $modules );
foreach ( $exploded as $group ) {
if ( strpos( $group, ',' ) === false ) {
// This is not a set of modules in foo.bar,baz notation
// but a single module
$retval[] = $group;
continue;
}
// This is a set of modules in foo.bar,baz notation
$pos = strrpos( $group, '.' );
if ( $pos === false ) {
// Prefixless modules, i.e. without dots
$retval = array_merge( $retval, explode( ',', $group ) );
continue;
}
// We have a prefix and a bunch of suffixes
$prefix = substr( $group, 0, $pos ); // 'foo'
$suffixes = explode( ',', substr( $group, $pos + 1 ) ); // [ 'bar', 'baz' ]
foreach ( $suffixes as $suffix ) {
$retval[] = "$prefix.$suffix";
}
}
return $retval;
}
/**
* Determine whether debug mode is on.
*
* Order of priority is:
* - 1) Request parameter,
* - 2) Cookie,
* - 3) Site configuration.
*
* @return int
*/
public static function inDebugMode() {
resourceloader: Implement "skip function" feature A module can be registered with a skip function. Such function, if provided, will be invoked by the client when a module is queued for loading. If the function returns true, the client will bypass any further loading action and mark the module as 'ready'. This can be used to implement a feature test for a module providing a shim or polyfill. * Change visibility of method ResourceLoader::filter to public. So that it can be invoked by ResourceLoaderStartupModule. * Add option to suppress the cache key report in ResourceLoader::filter. We usually only call the minifier once on an entire request reponse (because it's all concatenated javascript or embedded javascript in various different closures, still valid as one large script) and only add a little bottom line for the cache key. When embedding the skip function we have to run the minifier on them separately as they're output as strings (not actual functions). These strings are typically quite small and blowing up the response with loads of cache keys is not desirable in production. * Add method to clear the static cache of ResourceLoader::inDebugMode. Global static state is evil but, as long as we have it, we at least need to clear it after switching contexts in the test suite. Also: * Remove obsolete setting of 'debug=true' in the FauxRequest in ResourceLoaderTestCase. It already sets global wgResourceLoaderDebug in the setUp() method. Bug: 66390 Change-Id: I87a0ea888d791ad39f114380c42e2daeca470961
2014-04-30 21:06:51 +00:00
if ( self::$debugMode === null ) {
global $wgRequest;
$resourceLoaderDebug = MediaWikiServices::getInstance()->getMainConfig()->get(
MainConfigNames::ResourceLoaderDebug );
$str = $wgRequest->getRawVal( 'debug',
$wgRequest->getCookie( 'resourceLoaderDebug', '', $resourceLoaderDebug ? 'true' : '' )
resourceloader: Implement "skip function" feature A module can be registered with a skip function. Such function, if provided, will be invoked by the client when a module is queued for loading. If the function returns true, the client will bypass any further loading action and mark the module as 'ready'. This can be used to implement a feature test for a module providing a shim or polyfill. * Change visibility of method ResourceLoader::filter to public. So that it can be invoked by ResourceLoaderStartupModule. * Add option to suppress the cache key report in ResourceLoader::filter. We usually only call the minifier once on an entire request reponse (because it's all concatenated javascript or embedded javascript in various different closures, still valid as one large script) and only add a little bottom line for the cache key. When embedding the skip function we have to run the minifier on them separately as they're output as strings (not actual functions). These strings are typically quite small and blowing up the response with loads of cache keys is not desirable in production. * Add method to clear the static cache of ResourceLoader::inDebugMode. Global static state is evil but, as long as we have it, we at least need to clear it after switching contexts in the test suite. Also: * Remove obsolete setting of 'debug=true' in the FauxRequest in ResourceLoaderTestCase. It already sets global wgResourceLoaderDebug in the setUp() method. Bug: 66390 Change-Id: I87a0ea888d791ad39f114380c42e2daeca470961
2014-04-30 21:06:51 +00:00
);
self::$debugMode = Context::debugFromString( $str );
}
resourceloader: Implement "skip function" feature A module can be registered with a skip function. Such function, if provided, will be invoked by the client when a module is queued for loading. If the function returns true, the client will bypass any further loading action and mark the module as 'ready'. This can be used to implement a feature test for a module providing a shim or polyfill. * Change visibility of method ResourceLoader::filter to public. So that it can be invoked by ResourceLoaderStartupModule. * Add option to suppress the cache key report in ResourceLoader::filter. We usually only call the minifier once on an entire request reponse (because it's all concatenated javascript or embedded javascript in various different closures, still valid as one large script) and only add a little bottom line for the cache key. When embedding the skip function we have to run the minifier on them separately as they're output as strings (not actual functions). These strings are typically quite small and blowing up the response with loads of cache keys is not desirable in production. * Add method to clear the static cache of ResourceLoader::inDebugMode. Global static state is evil but, as long as we have it, we at least need to clear it after switching contexts in the test suite. Also: * Remove obsolete setting of 'debug=true' in the FauxRequest in ResourceLoaderTestCase. It already sets global wgResourceLoaderDebug in the setUp() method. Bug: 66390 Change-Id: I87a0ea888d791ad39f114380c42e2daeca470961
2014-04-30 21:06:51 +00:00
return self::$debugMode;
}
/**
* Reset static members used for caching.
*
* Global state and $wgRequest are evil, but we're using it right
* now and sometimes we need to be able to force ResourceLoader to
* re-evaluate the context because it has changed (e.g. in the test suite).
*
* @internal For use by unit tests
* @codeCoverageIgnore
resourceloader: Implement "skip function" feature A module can be registered with a skip function. Such function, if provided, will be invoked by the client when a module is queued for loading. If the function returns true, the client will bypass any further loading action and mark the module as 'ready'. This can be used to implement a feature test for a module providing a shim or polyfill. * Change visibility of method ResourceLoader::filter to public. So that it can be invoked by ResourceLoaderStartupModule. * Add option to suppress the cache key report in ResourceLoader::filter. We usually only call the minifier once on an entire request reponse (because it's all concatenated javascript or embedded javascript in various different closures, still valid as one large script) and only add a little bottom line for the cache key. When embedding the skip function we have to run the minifier on them separately as they're output as strings (not actual functions). These strings are typically quite small and blowing up the response with loads of cache keys is not desirable in production. * Add method to clear the static cache of ResourceLoader::inDebugMode. Global static state is evil but, as long as we have it, we at least need to clear it after switching contexts in the test suite. Also: * Remove obsolete setting of 'debug=true' in the FauxRequest in ResourceLoaderTestCase. It already sets global wgResourceLoaderDebug in the setUp() method. Bug: 66390 Change-Id: I87a0ea888d791ad39f114380c42e2daeca470961
2014-04-30 21:06:51 +00:00
*/
public static function clearCache() {
self::$debugMode = null;
}
/**
* Build a load.php URL
*
* @since 1.24
* @param string $source Name of the ResourceLoader source
* @param Context $context
* @param array $extraQuery
* @return string URL to load.php. May be protocol-relative if $wgLoadScript is, too.
*/
public function createLoaderURL( $source, Context $context,
array $extraQuery = []
) {
$query = self::createLoaderQuery( $context, $extraQuery );
$script = $this->getLoadScript( $source );
return wfAppendQuery( $script, $query );
}
/**
* Helper for createLoaderURL()
*
* @since 1.24
* @see makeLoaderQuery
* @param Context $context
* @param array $extraQuery
* @return array
*/
protected static function createLoaderQuery(
Context $context, array $extraQuery = []
) {
return self::makeLoaderQuery(
$context->getModules(),
$context->getLanguage(),
$context->getSkin(),
$context->getUser(),
$context->getVersion(),
$context->getDebug(),
$context->getOnly(),
$context->getRequest()->getBool( 'printable' ),
null,
$extraQuery
);
}
/**
* Build a query array (array representation of query string) for load.php. Helper
* function for createLoaderURL().
*
* @param string[] $modules
* @param string $lang
* @param string $skin
* @param string|null $user
* @param string|null $version
* @param int $debug
* @param string|null $only
* @param bool $printable
* @param bool|null $handheld Unused as of MW 1.38
* @param array $extraQuery
* @return array
*/
public static function makeLoaderQuery( array $modules, $lang, $skin, $user = null,
$version = null, $debug = Context::DEBUG_OFF, $only = null,
$printable = false, $handheld = null, array $extraQuery = []
) {
$query = [
'modules' => self::makePackedModulesString( $modules ),
];
// Keep urls short by omitting query parameters that
// match the defaults assumed by Context.
// Note: This relies on the defaults either being insignificant or forever constant,
// as otherwise cached urls could change in meaning when the defaults change.
if ( $lang !== Context::DEFAULT_LANG ) {
$query['lang'] = $lang;
}
if ( $skin !== Context::DEFAULT_SKIN ) {
$query['skin'] = $skin;
}
if ( $debug !== Context::DEBUG_OFF ) {
$query['debug'] = strval( $debug );
}
if ( $user !== null ) {
$query['user'] = $user;
}
if ( $version !== null ) {
$query['version'] = $version;
}
if ( $only !== null ) {
$query['only'] = $only;
}
if ( $printable ) {
$query['printable'] = 1;
}
$query += $extraQuery;
// Make queries uniform in order
ksort( $query );
return $query;
}
/**
* Check a module name for validity.
*
* Module names may not contain pipes (|), commas (,) or exclamation marks (!) and can be
* at most 255 bytes.
*
* @param string $moduleName Module name to check
* @return bool Whether $moduleName is a valid module name
*/
public static function isValidModuleName( $moduleName ) {
$len = strlen( $moduleName );
return $len <= 255 && strcspn( $moduleName, '!,|', 0, $len ) === $len;
}
/**
* Return a LESS compiler that is set up for use with MediaWiki.
*
* @since 1.27
* @param array $vars Associative array of variables that should be used
* for compilation. Since 1.32, this method no longer automatically includes
* global LESS vars from ResourceLoader::getLessVars (T191937).
* @param array $importDirs Additional directories to look in for @import (since 1.36)
* @return Less_Parser
*/
public function getLessCompiler( array $vars = [], array $importDirs = [] ) {
global $IP;
// When called from the installer, it is possible that a required PHP extension
// is missing (at least for now; see T49564). If this is the case, throw an
// exception (caught by the installer) to prevent a fatal error later on.
if ( !class_exists( Less_Parser::class ) ) {
throw new RuntimeException( 'MediaWiki requires the less.php parser' );
}
$importDirs[] = "$IP/resources/src/mediawiki.less";
$parser = new Less_Parser;
$parser->ModifyVars( $vars );
$parser->SetOption( 'relativeUrls', false );
// SetImportDirs expects an array like [ 'path1' => '', 'path2' => '' ]
$formattedImportDirs = array_fill_keys( $importDirs, '' );
// Add a callback to the import dirs array for path remapping
$formattedImportDirs[] = static function ( $path ) {
global $IP;
$importMap = [
'@wikimedia/codex-icons/' => "$IP/resources/lib/codex-icons/",
'mediawiki.skin.codex-design-tokens/' => "$IP/resources/lib/codex-design-tokens/",
'@wikimedia/codex-design-tokens/' => /** @return never */ static function ( $unused_path ) {
throw new RuntimeException(
'Importing from @wikimedia/codex-design-tokens is not supported. ' .
"To use the Codex tokens, use `@import 'mediawiki.skin.variables.less';` instead."
);
}
];
foreach ( $importMap as $importPath => $substPath ) {
if ( str_starts_with( $path, $importPath ) ) {
$restOfPath = substr( $path, strlen( $importPath ) );
if ( is_callable( $substPath ) ) {
$resolvedPath = call_user_func( $substPath, $restOfPath );
} else {
$filePath = $substPath . $restOfPath;
$resolvedPath = null;
if ( file_exists( $filePath ) ) {
$resolvedPath = $filePath;
} elseif ( file_exists( "$filePath.less" ) ) {
$resolvedPath = "$filePath.less";
}
}
if ( $resolvedPath !== null ) {
return [
Less_Environment::normalizePath( $resolvedPath ),
Less_Environment::normalizePath( dirname( $path ) )
];
} else {
break;
}
}
}
return [ null, null ];
};
$parser->SetImportDirs( $formattedImportDirs );
return $parser;
}
resourceloader: Fix debug mode for RL-to-RL cross-wiki module loads The native "foreign module source" feature, as used by the GlobalCssJs extension, did not work correctly in debug mode as the urls returned by the remote wiki were formatted as "/w/load.php...", which would be interpreted by the browser relative to the host document, instead of relative to the parent script. For example: 1. Page view on en.wikipedia.org. 2. Script call to meta.wikimedia.org/w/load.php?debug=true&modules=ext.globalCssJs.user&user This URL is formatted by getScriptURLsForDebug on en.wikipedia.org, when building the article HTML. It knows the modules is on Meta, and formats it as such. So far so good. 3. meta.wikimedia.org responds with an array of urls for sub resources. That array contained URLs like "/w/load.php...only=scripts". These were formatted by getScriptURLsForDebug running on Meta, no longer with a reason to make it a Meta-Wiki URL as it isn't perceived as cross-wiki. It is indistinguishable from debugging a Meta-Wiki page view from its perspective. This patch affects scenario 3 by always expanding it relative to the current-request's wgServer. We still only do this in debug mode. There is not yet a need to do this in non-debug mode, and if there was we'd likely want to find a way to avoid it in the common case to keep embedded URLs short. The ResourceLoader::expandUrl() method is similar to the one in Wikimedia\Minify\CSSMin. Test Plan: * view-source:http://mw.localhost:8080/w/load.php?debug=1&modules=site For Module base class. Before, the array entries were relative. After, they are full. * view-source:http://mw.localhost:8080/w/load.php?debug=1&modules=jquery For FileModule. Before, the array entries were relative. After, they are full. * view-source:http://mw.localhost:8080/wiki/Main_Page?debug=true Unchanged. * view-source:http://mw.localhost:8080/wiki/Main_Page Unchanged. Bug: T255367 Change-Id: I83919744b2677c7fb52b84089ecc60b89957d32a
2021-08-25 02:36:25 +00:00
/**
* Resolve a possibly relative URL against a base URL.
*
* The base URL must have a server and should have a protocol.
* A protocol-relative base expands to HTTPS.
*
* This is a standalone version of MediaWiki's UrlUtils::expand (T32956).
resourceloader: Fix debug mode for RL-to-RL cross-wiki module loads The native "foreign module source" feature, as used by the GlobalCssJs extension, did not work correctly in debug mode as the urls returned by the remote wiki were formatted as "/w/load.php...", which would be interpreted by the browser relative to the host document, instead of relative to the parent script. For example: 1. Page view on en.wikipedia.org. 2. Script call to meta.wikimedia.org/w/load.php?debug=true&modules=ext.globalCssJs.user&user This URL is formatted by getScriptURLsForDebug on en.wikipedia.org, when building the article HTML. It knows the modules is on Meta, and formats it as such. So far so good. 3. meta.wikimedia.org responds with an array of urls for sub resources. That array contained URLs like "/w/load.php...only=scripts". These were formatted by getScriptURLsForDebug running on Meta, no longer with a reason to make it a Meta-Wiki URL as it isn't perceived as cross-wiki. It is indistinguishable from debugging a Meta-Wiki page view from its perspective. This patch affects scenario 3 by always expanding it relative to the current-request's wgServer. We still only do this in debug mode. There is not yet a need to do this in non-debug mode, and if there was we'd likely want to find a way to avoid it in the common case to keep embedded URLs short. The ResourceLoader::expandUrl() method is similar to the one in Wikimedia\Minify\CSSMin. Test Plan: * view-source:http://mw.localhost:8080/w/load.php?debug=1&modules=site For Module base class. Before, the array entries were relative. After, they are full. * view-source:http://mw.localhost:8080/w/load.php?debug=1&modules=jquery For FileModule. Before, the array entries were relative. After, they are full. * view-source:http://mw.localhost:8080/wiki/Main_Page?debug=true Unchanged. * view-source:http://mw.localhost:8080/wiki/Main_Page Unchanged. Bug: T255367 Change-Id: I83919744b2677c7fb52b84089ecc60b89957d32a
2021-08-25 02:36:25 +00:00
*
* @internal For use by core ResourceLoader classes only
* @param string $base
* @param string $url
* @return string URL
*/
public function expandUrl( string $base, string $url ): string {
// Net_URL2::resolve() doesn't allow protocol-relative URLs, but we do.
$isProtoRelative = strpos( $base, '//' ) === 0;
if ( $isProtoRelative ) {
$base = "https:$base";
}
// Net_URL2::resolve() takes care of throwing if $base doesn't have a server.
$baseUrl = new Net_URL2( $base );
$ret = $baseUrl->resolve( $url );
if ( $isProtoRelative ) {
$ret->setScheme( false );
}
return $ret->getURL();
}
/**
* Run JavaScript or CSS data through a filter, caching the filtered result for future calls.
*
* Available filters are:
*
* - minify-js
* - minify-css
*
* If $data is empty, only contains whitespace or the filter was unknown,
* $data is returned unmodified.
*
* @param string $filter Name of filter to run
* @param string $data Text to filter, such as JavaScript or CSS text
* @param array<string,bool> $options Keys:
* - (bool) cache: Whether to allow caching this data. Default: true.
* @return string Filtered data or unfiltered data
*/
public static function filter( $filter, $data, array $options = [] ) {
if ( strpos( $data, self::FILTER_NOMIN ) !== false ) {
return $data;
}
if ( isset( $options['cache'] ) && $options['cache'] === false ) {
return self::applyFilter( $filter, $data ) ?? $data;
}
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
$cache = ObjectCache::getLocalServerInstance( CACHE_ANYTHING );
$key = $cache->makeGlobalKey(
'resourceloader-filter',
$filter,
self::CACHE_VERSION,
md5( $data )
);
$incKey = "resourceloader_cache.$filter.hit";
$result = $cache->getWithSetCallback(
$key,
BagOStuff::TTL_DAY,
function () use ( $filter, $data, &$incKey ) {
$incKey = "resourceloader_cache.$filter.miss";
return self::applyFilter( $filter, $data );
}
);
$stats->increment( $incKey );
// Use $data on cache failure
return $result ?? $data;
}
/**
* @param string $filter
* @param string $data
* @return string|null
*/
private static function applyFilter( $filter, $data ) {
$data = trim( $data );
if ( $data ) {
try {
$data = ( $filter === 'minify-css' )
? CSSMin::minify( $data )
: JavaScriptMinifier::minify( $data );
} catch ( TimeoutException $e ) {
throw $e;
} catch ( Exception $e ) {
MWExceptionHandler::logException( $e );
return null;
}
}
return $data;
}
resourceloader: Bundle `user.defaults` as part of `mediawiki.base` == Background == The `user.options` module is private, and thus has to be embedded in the page HTML. This data is quite large. For example, on enwiki the finalized mw.user.options object is about 3KB serialized/compressed (7KB uncompressed). The `user.defaults` module is an implementation detail of `user.options`, and was created to accomplish mainly two things: * Save significant data transfers by allowing it to be cached client-side without being part of the article. * Ensure consistency between articles and allow faster deployment of changes, by not being part of the cacheable article HTML. All our pageviews already load `user.defaults`, as a dependency of the popular `mediawiki.api` and `mediawiki.user` modules. These are used by `mediawiki.page.ready` (queued on all pages), and on Wikipedia these are also loaded on all pages by ULS, VisualEditor, EventLogging, and more. As such, in practice, bundling "user.defaults" with "mediawiki.base" will not cause the data to be loaded more often than before. == What == * Add virtual "user.json" package file with the same data that was previously exported by ResourceLoaderUserDefaultsModule, and pass it to mw.user.options.set() from base module's entry point. An alternative way would be to use a "user.js" file, which would return a generated "mw.user.options.set()" expression. I went for exporting it as JSON for improved maintainability (reducing the amount of JS code written in PHP), and because it performs slightly better. The JS file would implicitly come with a file closure (tiny bit more bytes), and would then be lazy executed (tiny bit more time). The chosen approach allows the browser to compile the JSON off-the-main-thread ahead of time while the module response downloads. Then when the module executes, we can reference the JSON object and use it directly. * Update internal dependency from `user.options`. * Remove `user.defaults` module without deprecation. It is an internal module with no direct use anywhere in Git (Codeseach), and no use anywhere on-wiki (Global Search). Change-Id: Id3916f94f75078808951863dea2b3a9c71b0e30c
2021-09-10 21:13:06 +00:00
/**
* Get user default options to expose to JavaScript on all pages via `mw.user.options`.
*
* @internal Exposed for use from Resources.php
*
* @param Context $context
* @param HookContainer $hookContainer
* @param UserOptionsLookup $userOptionsLookup
*
resourceloader: Bundle `user.defaults` as part of `mediawiki.base` == Background == The `user.options` module is private, and thus has to be embedded in the page HTML. This data is quite large. For example, on enwiki the finalized mw.user.options object is about 3KB serialized/compressed (7KB uncompressed). The `user.defaults` module is an implementation detail of `user.options`, and was created to accomplish mainly two things: * Save significant data transfers by allowing it to be cached client-side without being part of the article. * Ensure consistency between articles and allow faster deployment of changes, by not being part of the cacheable article HTML. All our pageviews already load `user.defaults`, as a dependency of the popular `mediawiki.api` and `mediawiki.user` modules. These are used by `mediawiki.page.ready` (queued on all pages), and on Wikipedia these are also loaded on all pages by ULS, VisualEditor, EventLogging, and more. As such, in practice, bundling "user.defaults" with "mediawiki.base" will not cause the data to be loaded more often than before. == What == * Add virtual "user.json" package file with the same data that was previously exported by ResourceLoaderUserDefaultsModule, and pass it to mw.user.options.set() from base module's entry point. An alternative way would be to use a "user.js" file, which would return a generated "mw.user.options.set()" expression. I went for exporting it as JSON for improved maintainability (reducing the amount of JS code written in PHP), and because it performs slightly better. The JS file would implicitly come with a file closure (tiny bit more bytes), and would then be lazy executed (tiny bit more time). The chosen approach allows the browser to compile the JSON off-the-main-thread ahead of time while the module response downloads. Then when the module executes, we can reference the JSON object and use it directly. * Update internal dependency from `user.options`. * Remove `user.defaults` module without deprecation. It is an internal module with no direct use anywhere in Git (Codeseach), and no use anywhere on-wiki (Global Search). Change-Id: Id3916f94f75078808951863dea2b3a9c71b0e30c
2021-09-10 21:13:06 +00:00
* @return array
*/
public static function getUserDefaults(
Context $context,
HookContainer $hookContainer,
UserOptionsLookup $userOptionsLookup
): array {
$defaultOptions = $userOptionsLookup->getDefaultOptions();
$keysToExclude = [];
$hookRunner = new HookRunner( $hookContainer );
$hookRunner->onResourceLoaderExcludeUserOptions( $keysToExclude, $context );
foreach ( $keysToExclude as $excludedKey ) {
unset( $defaultOptions[ $excludedKey ] );
}
return $defaultOptions;
resourceloader: Bundle `user.defaults` as part of `mediawiki.base` == Background == The `user.options` module is private, and thus has to be embedded in the page HTML. This data is quite large. For example, on enwiki the finalized mw.user.options object is about 3KB serialized/compressed (7KB uncompressed). The `user.defaults` module is an implementation detail of `user.options`, and was created to accomplish mainly two things: * Save significant data transfers by allowing it to be cached client-side without being part of the article. * Ensure consistency between articles and allow faster deployment of changes, by not being part of the cacheable article HTML. All our pageviews already load `user.defaults`, as a dependency of the popular `mediawiki.api` and `mediawiki.user` modules. These are used by `mediawiki.page.ready` (queued on all pages), and on Wikipedia these are also loaded on all pages by ULS, VisualEditor, EventLogging, and more. As such, in practice, bundling "user.defaults" with "mediawiki.base" will not cause the data to be loaded more often than before. == What == * Add virtual "user.json" package file with the same data that was previously exported by ResourceLoaderUserDefaultsModule, and pass it to mw.user.options.set() from base module's entry point. An alternative way would be to use a "user.js" file, which would return a generated "mw.user.options.set()" expression. I went for exporting it as JSON for improved maintainability (reducing the amount of JS code written in PHP), and because it performs slightly better. The JS file would implicitly come with a file closure (tiny bit more bytes), and would then be lazy executed (tiny bit more time). The chosen approach allows the browser to compile the JSON off-the-main-thread ahead of time while the module response downloads. Then when the module executes, we can reference the JSON object and use it directly. * Update internal dependency from `user.options`. * Remove `user.defaults` module without deprecation. It is an internal module with no direct use anywhere in Git (Codeseach), and no use anywhere on-wiki (Global Search). Change-Id: Id3916f94f75078808951863dea2b3a9c71b0e30c
2021-09-10 21:13:06 +00:00
}
/**
* Get site configuration settings to expose to JavaScript on all pages via `mw.config`.
*
* @internal Exposed for use from Resources.php
* @param Context $context
* @param Config $conf
* @return array
*/
public static function getSiteConfigSettings(
Context $context, Config $conf
): array {
$services = MediaWikiServices::getInstance();
// Namespace related preparation
// - wgNamespaceIds: Key-value pairs of all localized, canonical and aliases for namespaces.
// - wgCaseSensitiveNamespaces: Array of namespaces that are case-sensitive.
$contLang = $services->getContentLanguage();
$namespaceIds = $contLang->getNamespaceIds();
$caseSensitiveNamespaces = [];
$nsInfo = $services->getNamespaceInfo();
foreach ( $nsInfo->getCanonicalNamespaces() as $index => $name ) {
$namespaceIds[$contLang->lc( $name )] = $index;
if ( !$nsInfo->isCapitalized( $index ) ) {
$caseSensitiveNamespaces[] = $index;
}
}
$illegalFileChars = $conf->get( MainConfigNames::IllegalFileChars );
// Build list of variables
$skin = $context->getSkin();
// Start of supported and stable config vars (for use by extensions/gadgets).
$vars = [
'debug' => $context->getDebug(),
'skin' => $skin,
'stylepath' => $conf->get( MainConfigNames::StylePath ),
'wgArticlePath' => $conf->get( MainConfigNames::ArticlePath ),
'wgScriptPath' => $conf->get( MainConfigNames::ScriptPath ),
'wgScript' => $conf->get( MainConfigNames::Script ),
'wgSearchType' => $conf->get( MainConfigNames::SearchType ),
'wgVariantArticlePath' => $conf->get( MainConfigNames::VariantArticlePath ),
'wgServer' => $conf->get( MainConfigNames::Server ),
'wgServerName' => $conf->get( MainConfigNames::ServerName ),
'wgUserLanguage' => $context->getLanguage(),
'wgContentLanguage' => $contLang->getCode(),
'wgVersion' => MW_VERSION,
'wgFormattedNamespaces' => $contLang->getFormattedNamespaces(),
'wgNamespaceIds' => $namespaceIds,
'wgContentNamespaces' => $nsInfo->getContentNamespaces(),
'wgSiteName' => $conf->get( MainConfigNames::Sitename ),
'wgDBname' => $conf->get( MainConfigNames::DBname ),
'wgWikiID' => WikiMap::getCurrentWikiId(),
'wgCaseSensitiveNamespaces' => $caseSensitiveNamespaces,
'wgCommentCodePointLimit' => CommentStore::COMMENT_CHARACTER_LIMIT,
'wgExtensionAssetsPath' => $conf->get( MainConfigNames::ExtensionAssetsPath ),
];
// End of stable config vars.
// Internal variables for use by MediaWiki core and/or ResourceLoader.
$vars += [
// @internal For mediawiki.widgets
'wgUrlProtocols' => wfUrlProtocols(),
// @internal For mediawiki.page.watch
// Force object to avoid "empty" associative array from
// becoming [] instead of {} in JS (T36604)
'wgActionPaths' => (object)$conf->get( MainConfigNames::ActionPaths ),
// @internal For mediawiki.language
'wgTranslateNumerals' => $conf->get( MainConfigNames::TranslateNumerals ),
// @internal For mediawiki.Title
'wgExtraSignatureNamespaces' => $conf->get( MainConfigNames::ExtraSignatureNamespaces ),
'wgLegalTitleChars' => Title::convertByteClassToUnicodeClass( Title::legalChars() ),
'wgIllegalFileChars' => Title::convertByteClassToUnicodeClass( $illegalFileChars ),
];
( new HookRunner( $services->getHookContainer() ) )
->onResourceLoaderGetConfigVars( $vars, $skin, $conf );
return $vars;
}
/**
* @internal For testing
* @return array
*/
public function getErrors() {
return $this->errors;
}
}
class_alias( ResourceLoader::class, 'ResourceLoader' );