Make WebRequest::getAcceptLang() return floats instead of strings

This is a follow-up for Ifcf0bc4.

I checked the callers of this method. The majority of them does not
care about these numeric values, only about the language codes. Most
callers even do an array_keys() immediatelly. The only effect I could
find will be visible in the ApiQueryUserInfo API module. Example:
https://en.wikipedia.org/wiki/Special:ApiSandbox#action=query&meta=userinfo&formatversion=2&uiprop=acceptlang
In this response, the numbers are currently reported as strings, but
will be reported as floats. I would argue this counts as a bugfix and
doesn't need anouncement.

This patch also splits the responsible regular expression pattern for
improved readability, utilizing the /x extended mode. No change is
made to the pattern.

Change-Id: I42de46f56d7544f9781b02a2f0ed49ef85ee44a1
This commit is contained in:
Thiemo Kreuz 2020-05-14 19:44:30 +02:00 committed by Umherirrender
parent 27346b8cba
commit e874f3fb17
2 changed files with 25 additions and 15 deletions

View file

@ -1218,7 +1218,17 @@ class WebRequest {
// Break up string into pieces (languages and q factors)
if ( !preg_match_all(
'/([a-z]{1,8}(?:-[a-z]{1,8})*|\*)\s*(?:;\s*q\s*=\s*(1(?:\.0{0,3})?|0(?:\.[0-9]{0,3})?)?)?/',
'/
# a language code or a star is required
([a-z]{1,8}(?:-[a-z]{1,8})*|\*)
# from here everything is optional
\s*
(?:
# this accepts only numbers in the range ;q=0.000 to ;q=1.000
;\s*q\s*=\s*
(1(?:\.0{0,3})?|0(?:\.\d{0,3})?)?
)?
/x',
$acceptLang,
$matches,
PREG_SET_ORDER
@ -1231,8 +1241,8 @@ class WebRequest {
foreach ( $matches as $match ) {
$languageCode = $match[1];
// When not present, the default value is 1
$qValue = $match[2] ?? 1;
if ( $qValue > 0 ) {
$qValue = (float)( $match[2] ?? 1.0 );
if ( $qValue ) {
$langs[$languageCode] = $qValue;
}
}

View file

@ -604,33 +604,33 @@ class WebRequestTest extends MediaWikiIntegrationTestCase {
public static function provideLanguageData() {
return [
[ '', [], 'Empty Accept-Language header' ],
[ 'en', [ 'en' => 1 ], 'One language' ],
[ 'en;q=', [ 'en' => 1 ], 'Empty q= defaults to 1' ],
[ 'en', [ 'en' => 1.0 ], 'One language' ],
[ 'en;q=', [ 'en' => 1.0 ], 'Empty q= defaults to 1' ],
[ 'en;q=0, de;q=0. pt;q=0.0 it;q=0.0000', [], 'Zeros to be skipped' ],
[ 'EN;Q=1.0009', [ 'en' => '1.000' ], 'Limited to max. 3 decimal places' ],
[ 'en, ar', [ 'en' => 1, 'ar' => 1 ], 'Two languages listed in appearance order.' ],
[ 'EN;Q=1.0009', [ 'en' => 1.000 ], 'Limited to max. 3 decimal places' ],
[ 'en, ar', [ 'en' => 1.0, 'ar' => 1.0 ], 'Two languages listed in appearance order.' ],
[
'zh-cn,zh-tw',
[ 'zh-cn' => 1, 'zh-tw' => 1 ],
[ 'zh-cn' => 1.0, 'zh-tw' => 1.0 ],
'Two equally prefered languages, listed in appearance order per rfc3282. Checks c9119'
],
[
'es, en; q=0.5',
[ 'es' => 1, 'en' => '0.5' ],
[ 'es' => 1.0, 'en' => 0.5 ],
'Spanish as first language and English and second'
],
[ 'en; q=0.5, es', [ 'es' => 1, 'en' => '0.5' ], 'Less prefered language first' ],
[ 'fr, en; q=0.5, es', [ 'fr' => 1, 'es' => 1, 'en' => '0.5' ], 'Three languages' ],
[ 'en; q=0.5, es', [ 'es' => 1, 'en' => '0.5' ], 'Two languages' ],
[ 'en, zh;q=0', [ 'en' => 1 ], "It's Chinese to me" ],
[ 'en; q=0.5, es', [ 'es' => 1.0, 'en' => 0.5 ], 'Less prefered language first' ],
[ 'fr, en; q=0.5, es', [ 'fr' => 1.0, 'es' => 1.0, 'en' => 0.5 ], 'Three languages' ],
[ 'en; q=0.5, es', [ 'es' => 1.0, 'en' => 0.5 ], 'Two languages' ],
[ 'en, zh;q=0', [ 'en' => 1.0 ], "It's Chinese to me" ],
[
'es; q=1, pt;q=0.7, it; q=0.6, de; q=0.1, ru;q=0',
[ 'es' => '1', 'pt' => '0.7', 'it' => '0.6', 'de' => '0.1' ],
[ 'es' => 1.0, 'pt' => 0.7, 'it' => 0.6, 'de' => 0.1 ],
'Preference for Romance languages'
],
[
'en-gb, en-us; q=1',
[ 'en-gb' => 1, 'en-us' => '1' ],
[ 'en-gb' => 1.0, 'en-us' => 1.0 ],
'Two equally prefered English variants'
],
[ '_', [], 'Invalid input' ],