2015-06-09 21:24:15 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
*/
|
|
|
|
|
|
2018-08-19 10:17:52 +00:00
|
|
|
use Wikimedia\StaticArrayWriter;
|
|
|
|
|
|
2015-06-09 21:24:15 +00:00
|
|
|
/**
|
language: Add missing `@ingroup`, subgroup "Languages" and ungroup files
== Ungroup file blocks
Remove `@ingroup` from `@file` blocks and keep only the class block.
This matches similar changes previously applied to API, Skins, Profile,
and ResourceLoader.
This helps make the API documentation easier to navigate.
E.g. Modules -> Language in the sidebar of
<https://doc.wikimedia.org/mediawiki-core/master/php/> as well as
<https://doc.wikimedia.org/mediawiki-core/master/php/group__Language.html>
These are currently cluttered with tons of duplicate entries for files
and classes both. We only need to group files that aren't also
documented as a class (e.g. message files, entry points, other scripts
or files that we mainly consider a data file). This has the helpful
side-effect that we don't encourage duplication of the class
description (or worse, place useful docs only in the file block), and
makes the class files consistently start with a mentally ignorable
block. Basically, unless there's something other than a class, don't
describe or group the file itself.
== Missing group
Various classes in this subtree were missing the `Language` group,
or were using different group from before T225756.
== Subgroup
For ease of navigation, move Converter subclasses to a group called
"Languages", which for documentation purposes is a subgroup of
"Language". The next commit does the same for Messages* files,
and Language subclasses (done separately for ease of review).
Change-Id: I301f471f86ba2dee924fece29a16dc3c20b5bebe
2022-06-22 22:37:31 +00:00
|
|
|
* Localisation cache storage based on PHP files and static arrays.
|
|
|
|
|
*
|
2015-06-09 21:24:15 +00:00
|
|
|
* @since 1.26
|
language: Add missing `@ingroup`, subgroup "Languages" and ungroup files
== Ungroup file blocks
Remove `@ingroup` from `@file` blocks and keep only the class block.
This matches similar changes previously applied to API, Skins, Profile,
and ResourceLoader.
This helps make the API documentation easier to navigate.
E.g. Modules -> Language in the sidebar of
<https://doc.wikimedia.org/mediawiki-core/master/php/> as well as
<https://doc.wikimedia.org/mediawiki-core/master/php/group__Language.html>
These are currently cluttered with tons of duplicate entries for files
and classes both. We only need to group files that aren't also
documented as a class (e.g. message files, entry points, other scripts
or files that we mainly consider a data file). This has the helpful
side-effect that we don't encourage duplication of the class
description (or worse, place useful docs only in the file block), and
makes the class files consistently start with a mentally ignorable
block. Basically, unless there's something other than a class, don't
describe or group the file itself.
== Missing group
Various classes in this subtree were missing the `Language` group,
or were using different group from before T225756.
== Subgroup
For ease of navigation, move Converter subclasses to a group called
"Languages", which for documentation purposes is a subgroup of
"Language". The next commit does the same for Messages* files,
and Language subclasses (done separately for ease of review).
Change-Id: I301f471f86ba2dee924fece29a16dc3c20b5bebe
2022-06-22 22:37:31 +00:00
|
|
|
* @ingroup Language
|
2015-06-09 21:24:15 +00:00
|
|
|
*/
|
|
|
|
|
class LCStoreStaticArray implements LCStore {
|
|
|
|
|
/** @var string|null Current language code. */
|
|
|
|
|
private $currentLang = null;
|
|
|
|
|
|
|
|
|
|
/** @var array Localisation data. */
|
2016-02-17 09:09:32 +00:00
|
|
|
private $data = [];
|
2015-06-09 21:24:15 +00:00
|
|
|
|
2022-02-26 07:54:16 +00:00
|
|
|
/** @var string|null File name. */
|
2015-06-09 21:24:15 +00:00
|
|
|
private $fname = null;
|
|
|
|
|
|
|
|
|
|
/** @var string Directory for cache files. */
|
|
|
|
|
private $directory;
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
public function __construct( $conf = [] ) {
|
2019-05-06 21:52:52 +00:00
|
|
|
$this->directory = $conf['directory'];
|
2015-06-09 21:24:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function startWrite( $code ) {
|
2022-01-04 10:03:09 +00:00
|
|
|
if ( !is_dir( $this->directory ) && !wfMkdirParents( $this->directory, null, __METHOD__ ) ) {
|
2019-03-29 20:12:24 +00:00
|
|
|
throw new MWException( "Unable to create the localisation store " .
|
|
|
|
|
"directory \"{$this->directory}\"" );
|
2018-06-25 15:40:25 +00:00
|
|
|
}
|
|
|
|
|
|
2015-06-09 21:24:15 +00:00
|
|
|
$this->currentLang = $code;
|
2015-06-17 20:01:00 +00:00
|
|
|
$this->fname = $this->directory . '/' . $code . '.l10n.php';
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->data[$code] = [];
|
2022-01-04 10:03:09 +00:00
|
|
|
if ( is_file( $this->fname ) ) {
|
2015-06-09 21:24:15 +00:00
|
|
|
$this->data[$code] = require $this->fname;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function set( $key, $value ) {
|
|
|
|
|
$this->data[$this->currentLang][$key] = self::encode( $value );
|
|
|
|
|
}
|
|
|
|
|
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
/**
|
|
|
|
|
* Determine whether this array contains only scalar values.
|
|
|
|
|
*
|
|
|
|
|
* @param array $arr
|
|
|
|
|
* @return bool
|
|
|
|
|
*/
|
|
|
|
|
private static function isValueArray( array $arr ) {
|
|
|
|
|
foreach ( $arr as $key => $value ) {
|
|
|
|
|
if ( is_scalar( $value )
|
|
|
|
|
|| $value === null
|
|
|
|
|
|| ( is_array( $value ) && self::isValueArray( $value ) )
|
|
|
|
|
) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-09 21:24:15 +00:00
|
|
|
/**
|
|
|
|
|
* Encodes a value into an array format
|
|
|
|
|
*
|
|
|
|
|
* @param mixed $value
|
2018-09-23 22:52:56 +00:00
|
|
|
* @return array|mixed
|
2015-06-09 21:24:15 +00:00
|
|
|
* @throws RuntimeException
|
|
|
|
|
*/
|
|
|
|
|
public static function encode( $value ) {
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
if ( is_array( $value ) && self::isValueArray( $value ) ) {
|
|
|
|
|
// Type: scalar [v]alue.
|
|
|
|
|
// Optimization: Write large arrays as one value to avoid recursive decoding cost.
|
|
|
|
|
return [ 'v', $value ];
|
2015-06-09 21:24:15 +00:00
|
|
|
}
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
if ( is_array( $value ) || is_object( $value ) ) {
|
2022-01-09 17:24:04 +00:00
|
|
|
// Type: [s]serialized.
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
// Optimization: Avoid recursive decoding cost. Write arrays with an objects
|
|
|
|
|
// as one serialised value.
|
2016-02-17 09:09:32 +00:00
|
|
|
return [ 's', serialize( $value ) ];
|
2015-06-09 21:24:15 +00:00
|
|
|
}
|
2018-09-23 22:52:56 +00:00
|
|
|
if ( is_scalar( $value ) || $value === null ) {
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
// Optimization: Reduce file size by not wrapping scalar values.
|
2018-09-23 22:52:56 +00:00
|
|
|
return $value;
|
2015-06-09 21:24:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new RuntimeException( 'Cannot encode ' . var_export( $value, true ) );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Decode something that was encoded with encode
|
|
|
|
|
*
|
2018-09-23 22:52:56 +00:00
|
|
|
* @param mixed $encoded
|
2015-06-09 21:24:15 +00:00
|
|
|
* @return array|mixed
|
|
|
|
|
* @throws RuntimeException
|
|
|
|
|
*/
|
2018-09-23 22:52:56 +00:00
|
|
|
public static function decode( $encoded ) {
|
|
|
|
|
if ( !is_array( $encoded ) ) {
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
// Unwrapped scalar value
|
2018-09-23 22:52:56 +00:00
|
|
|
return $encoded;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-24 14:04:32 +00:00
|
|
|
list( $type, $data ) = $encoded;
|
2015-06-09 21:24:15 +00:00
|
|
|
|
|
|
|
|
switch ( $type ) {
|
2018-09-23 22:52:56 +00:00
|
|
|
case 'v':
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
// Value array (1.35+) or unwrapped scalar value (1.32 and earlier)
|
2018-09-23 22:52:56 +00:00
|
|
|
return $data;
|
language: Avoid LCStoreStaticArray::decode() recursion for arrays
In looking at early flame graphs and XHGui profiles, I noticed
code paths like `decode -> decode@2 -> decode@3`, for example for
magic words arrays and special page names.
Rather than storing these as `[a, [a, [a, ..], [a, ..], [a, ..], .. ] ]`
store them instead as `[v, [ .. ]]`. This makes for smaller files,
but more important it further reduces runtime overhead.
Bug: T218207
Change-Id: I492e5d32106ba7fd1b22075cf026fee2e3d1944e
2020-05-04 16:38:22 +00:00
|
|
|
case 's':
|
|
|
|
|
return unserialize( $data );
|
|
|
|
|
case 'a':
|
|
|
|
|
// Support: MediaWiki 1.34 and earlier (older file format)
|
2020-12-31 11:37:36 +00:00
|
|
|
return array_map( [ __CLASS__, 'decode' ], $data );
|
2017-12-11 03:07:50 +00:00
|
|
|
default:
|
|
|
|
|
throw new RuntimeException(
|
|
|
|
|
'Unable to decode ' . var_export( $encoded, true ) );
|
2015-06-09 21:24:15 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function finishWrite() {
|
2018-08-19 10:17:52 +00:00
|
|
|
$writer = new StaticArrayWriter();
|
|
|
|
|
$out = $writer->create(
|
|
|
|
|
$this->data[$this->currentLang],
|
|
|
|
|
'Generated by LCStoreStaticArray.php -- do not edit!'
|
2015-06-09 21:24:15 +00:00
|
|
|
);
|
2018-08-19 10:17:52 +00:00
|
|
|
file_put_contents( $this->fname, $out );
|
localisation: Release data from memory in LCStoreStaticArray::finishWrite
With this change, the memory behaviour of LCStoreStaticArray
matches the other LCStore implementations. Specifically, that when
mass-rebuilding LocalisationCache entries for all language codes,
the computed data should be released from memory after
calling LCStore::finishWrite().
This doesn't affect user-facing web requests, even in the case
of stock MW where every once in a while a user request can lazy-
regenerate the LCStore, there is a process-cache in front of LCStore
in the LocalisationCache class.
The rebuildLocalisationCache.php clears that via
LocalisationCacheBulkLoad::unload(), but due to LCStoreStaticArray
internally holding on to the data, it was still leaking.
The leak was found by @Nikerabbit as part of testing for T218207.
To test this, amend rebuildLocalisationCache.php and add the
following on line 161, as the first line of the doRebuild/foreach/if
block:
echo "[$code-start-mem] " . round(memory_get_usage(true)/1024/1024, 2) . " MB\n";
If you then have LocalSettings.php configured like so:
$wgCacheDirectory = $wgTmpDirectory;
$wgLocalisationCacheConf['store'] = 'array';
Then before this patch, running rebuildLocalisationCache.php,
shows memory starting at 12 MB and growing 2-3 MB for every language
until the very end, closing with 970 MB memory use.
After this patch, it starts at 12 MB and stops growing at 32 MB.
When configuring as `['store'] = 'files'`, which uses LCStoreCDB,
the memory starts at 12 MB and stops growing at 44 MB, both before
and after this patch.
Bug: T218207
Change-Id: I0d215efee5b31766776a068b16811d52f9879312
2019-09-04 20:35:10 +00:00
|
|
|
// Release the data to manage the memory in rebuildLocalisationCache
|
|
|
|
|
unset( $this->data[$this->currentLang] );
|
2015-06-09 21:24:15 +00:00
|
|
|
$this->currentLang = null;
|
|
|
|
|
$this->fname = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function get( $code, $key ) {
|
|
|
|
|
if ( !array_key_exists( $code, $this->data ) ) {
|
2015-06-17 20:01:00 +00:00
|
|
|
$fname = $this->directory . '/' . $code . '.l10n.php';
|
2022-01-04 10:03:09 +00:00
|
|
|
if ( !is_file( $fname ) ) {
|
2015-06-09 21:24:15 +00:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
$this->data[$code] = require $fname;
|
|
|
|
|
}
|
|
|
|
|
$data = $this->data[$code];
|
|
|
|
|
if ( array_key_exists( $key, $data ) ) {
|
|
|
|
|
return self::decode( $data[$key] );
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|