From cf8e22e1e41073f2eda7b955d6012effbc4aaa48 Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Fri, 21 Jul 2023 13:19:29 +0200 Subject: [PATCH] LocalisationCache: Load only core data if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract loadCoreData(), which loads only the core, non-mergeable keys from the core messages files, not the extension messages files or the JSON files. Have loadItem() call this method, skipping the full initLanguage() + recache(), if possible. Because compiling the plural rules takes up a significant amount of loading the core-only data (see discussion on Gerrit), extract readPluralFilesAndRegisterDeps() from readSourceFilesAndRegisterDeps(), and only call the latter in loadCoreData() (while recache() calls both). Also remove a comment about readSourceFilesAndRegisterDeps() returning false if the localisation doesn’t exist, which AFAICT hasn’t been true since change I35bbb3a7a1 (commit 8e0c0a9fc9) in 2014. Note that the new “core-only data” path in loadItem() bypasses the underlying LCStore even if the core data (or indeed all data) happens to be present in it. Some investigation and benchmarks (see the discussion on this change on Gerrit) indicate that this is usually a performance win; in particular, unless manualRecache is set, just checking whether the LCStore is expired is relatively expensive. [For further discussion, see also changes I00f2018400 and I64822e050e on Gerrit, which were later squashed into this change.] Bug: T342418 Change-Id: I7ec2d87c0f864c7dbfd629f0b47f22dc8a6fa552 --- includes/language/LocalisationCache.php | 151 +++++++++++++++--- .../language/LocalisationCacheTest.php | 13 ++ 2 files changed, 141 insertions(+), 23 deletions(-) diff --git a/includes/language/LocalisationCache.php b/includes/language/LocalisationCache.php index 0430d7ba711..01de9f03335 100644 --- a/includes/language/LocalisationCache.php +++ b/includes/language/LocalisationCache.php @@ -130,6 +130,18 @@ class LocalisationCache { */ private $recachedLangs = []; + /** + * An array indicating whether core data for a language has been loaded. + * If the entry for a language code $code is true, + * then {@link self::$data} is guaranteed to contain an array for $code, + * with at least an entry (possibly null) for each of the {@link self::CORE_ONLY_KEYS}, + * and all the core-only keys will be marked as loaded in {@link self::$loadedItems} too. + * Additionally, there will be a 'deps' entry for $code with the dependencies tracked so far. + * + * @var array + */ + private $coreDataLoaded = []; + /** * All item keys */ @@ -438,15 +450,31 @@ class LocalisationCache { * @param string $key */ private function loadItem( $code, $key ) { - if ( !isset( $this->initialisedLangs[$code] ) ) { - $this->initLanguage( $code ); - } - - // Check to see if initLanguage() loaded it for us if ( isset( $this->loadedItems[$code][$key] ) ) { return; } + if ( + in_array( $key, self::CORE_ONLY_KEYS, true ) || + // "synthetic" keys added by loadCoreData based on "fallback" + $key === 'fallbackSequence' || + $key === 'originalFallbackSequence' + ) { + if ( $this->langNameUtils->isValidBuiltInCode( $code ) ) { + $this->loadCoreData( $code ); + return; + } + } + + if ( !isset( $this->initialisedLangs[$code] ) ) { + $this->initLanguage( $code ); + + // Check to see if initLanguage() loaded it for us + if ( isset( $this->loadedItems[$code][$key] ) ) { + return; + } + } + if ( isset( $this->shallowFallbacks[$code] ) ) { $this->loadItem( $this->shallowFallbacks[$code], $key ); @@ -640,6 +668,7 @@ class LocalisationCache { $this->loadedItems[$primaryCode] =& $this->loadedItems[$fallbackCode]; $this->loadedSubitems[$primaryCode] =& $this->loadedSubitems[$fallbackCode]; $this->shallowFallbacks[$primaryCode] = $fallbackCode; + $this->coreDataLoaded[$primaryCode] =& $this->coreDataLoaded[$fallbackCode]; } /** @@ -808,8 +837,7 @@ class LocalisationCache { /** * Read the data from the source files for a given language, and register - * the relevant dependencies in the $deps array. If the localisation - * exists, the data array is returned, otherwise false is returned. + * the relevant dependencies in the $deps array. * * @param string $code * @param array &$deps @@ -825,12 +853,26 @@ class LocalisationCache { $data = $this->readPHPFile( $fileName, 'core' ); } - // Load CLDR plural rules for JavaScript - $data['pluralRules'] = $this->getPluralRules( $code ); - // And for PHP - $data['compiledPluralRules'] = $this->getCompiledPluralRules( $code ); - // Load plural rule types - $data['pluralRuleTypes'] = $this->getPluralRuleTypes( $code ); + return $data; + } + + /** + * Read and compile the plural data for a given language, + * and register the relevant dependencies in the $deps array. + * + * @param string $code + * @param array &$deps + * @return array + */ + private function readPluralFilesAndRegisterDeps( $code, &$deps ) { + $data = [ + // Load CLDR plural rules for JavaScript + 'pluralRules' => $this->getPluralRules( $code ), + // And for PHP + 'compiledPluralRules' => $this->getCompiledPluralRules( $code ), + // Load plural rule types + 'pluralRuleTypes' => $this->getPluralRuleTypes( $code ), + ]; foreach ( self::PLURAL_FILES as $fileName ) { $deps[] = new FileDependency( $fileName ); @@ -910,20 +952,24 @@ class LocalisationCache { } /** - * Load localisation data for a given language for both core and extensions - * and save it to the persistent cache store and the process cache - * @param string $code - * @throws MWException + * Load the core localisation data for a given language code, + * without extensions, using only the process cache. + * See {@link self::$coreDataLoaded} for what this guarantees. + * + * In addition to the core-only keys, + * {@link self::$data} may contain additional entries for $code, + * but those must not be used outside of {@link self::recache()} + * (and accordingly, they are not marked as loaded yet). */ - public function recache( $code ) { + private function loadCoreData( string $code ) { if ( !$code ) { throw new MWException( "Invalid language code requested" ); } - $this->recachedLangs[ $code ] = true; + if ( $this->coreDataLoaded[$code] ?? false ) { + return; + } - # Initial values - $initialData = array_fill_keys( self::ALL_KEYS, null ); - $coreData = $initialData; + $coreData = array_fill_keys( self::CORE_ONLY_KEYS, null ); $deps = []; # Load the primary localisation from the source file @@ -958,6 +1004,63 @@ class LocalisationCache { } } + foreach ( $coreData['fallbackSequence'] as $fbCode ) { + // load core fallback data + $fbData = $this->readSourceFilesAndRegisterDeps( $fbCode, $deps ); + foreach ( self::CORE_ONLY_KEYS as $key ) { + // core-only keys are not mergeable, only set if not present in core data yet + if ( isset( $fbData[$key] ) && !isset( $coreData[$key] ) ) { + $coreData[$key] = $fbData[$key]; + } + } + } + + $coreData['deps'] = $deps; + foreach ( $coreData as $key => $item ) { + $this->data[$code][$key] ??= null; + // @phan-suppress-next-line PhanTypeArraySuspiciousNullable -- we just set a default null + $this->mergeItem( $key, $this->data[$code][$key], $item ); + if ( + in_array( $key, self::CORE_ONLY_KEYS, true ) || + // "synthetic" keys based on "fallback" (see above) + $key === 'fallbackSequence' || + $key === 'originalFallbackSequence' + ) { + // only mark core-only keys as loaded; + // we may have loaded additional ones from the source file, + // but they are not fully loaded yet, since recache() + // may have to merge in additional values from fallback languages + $this->loadedItems[$code][$key] = true; + } + } + + $this->coreDataLoaded[$code] = true; + } + + /** + * Load localisation data for a given language for both core and extensions + * and save it to the persistent cache store and the process cache + * @param string $code + * @throws MWException + */ + public function recache( $code ) { + if ( !$code ) { + throw new MWException( "Invalid language code requested" ); + } + $this->recachedLangs[ $code ] = true; + + # Initial values + $initialData = array_fill_keys( self::ALL_KEYS, null ); + $this->data[$code] = []; + $this->loadedItems[$code] = []; + $this->loadedSubitems[$code] = []; + $this->coreDataLoaded[$code] = false; + $this->loadCoreData( $code ); + $coreData = $this->data[$code]; + // @phan-suppress-next-line PhanTypeArraySuspiciousNullable -- guaranteed by loadCoreData() + $deps = $coreData['deps']; + $coreData += $this->readPluralFilesAndRegisterDeps( $code, $deps ); + $codeSequence = array_merge( [ $code ], $coreData['fallbackSequence'] ); $messageDirs = $this->getMessagesDirs(); @@ -1029,13 +1132,14 @@ class LocalisationCache { # Load the secondary localisation from the source file to # avoid infinite cycles on cyclic fallbacks $fbData = $this->readSourceFilesAndRegisterDeps( $csCode, $deps ); + $fbData += $this->readPluralFilesAndRegisterDeps( $csCode, $deps ); # Only merge the keys that make sense to merge foreach ( self::ALL_KEYS as $key ) { if ( !isset( $fbData[ $key ] ) ) { continue; } - if ( ( $coreData[ $key ] ) === null || self::isMergeableKey( $key ) ) { + if ( !isset( $coreData[ $key ] ) || self::isMergeableKey( $key ) ) { $this->mergeItem( $key, $csData[ $key ], $fbData[ $key ] ); } } @@ -1180,6 +1284,7 @@ class LocalisationCache { unset( $this->initialisedLangs[$code] ); unset( $this->shallowFallbacks[$code] ); unset( $this->sourceLanguage[$code] ); + unset( $this->coreDataLoaded[$code] ); foreach ( $this->shallowFallbacks as $shallowCode => $fbCode ) { if ( $fbCode === $code ) { diff --git a/tests/phpunit/includes/language/LocalisationCacheTest.php b/tests/phpunit/includes/language/LocalisationCacheTest.php index a6e0bf1d10c..635721a2316 100644 --- a/tests/phpunit/includes/language/LocalisationCacheTest.php +++ b/tests/phpunit/includes/language/LocalisationCacheTest.php @@ -3,6 +3,7 @@ use MediaWiki\Config\ServiceOptions; use MediaWiki\Tests\Unit\DummyServicesTrait; use Psr\Log\NullLogger; +use Wikimedia\TestingAccessWrapper; /** * @group Database @@ -150,6 +151,18 @@ class LocalisationCacheTest extends MediaWikiIntegrationTestCase { ); } + public function testLoadCoreDataAvoidsInitLanguage(): void { + $lc = $this->getMockLocalisationCache(); + + $lc->getItem( 'de', 'fallback' ); + $lc->getItem( 'de', 'rtl' ); + $lc->getItem( 'de', 'fallbackSequence' ); + $lc->getItem( 'de', 'originalFallbackSequence' ); + + $this->assertArrayNotHasKey( 'de', + TestingAccessWrapper::newFromObject( $lc )->initialisedLangs ); + } + public function testShallowFallbackForInvalidCode(): void { $lc = $this->getMockLocalisationCache(); $invalidCode = '!invalid!';