From 8c4cea2ca2abc89218825605728e1876990a6367 Mon Sep 17 00:00:00 2001 From: Kevin Israel Date: Thu, 26 Jan 2023 06:32:45 -0500 Subject: [PATCH] Clean up old ICU version checks Since 1.36, intl has been a required PHP extension, and PHP 7.4 dropped support for ICU < 50.1 (Unicode 6.2), so: * In SpecialVersion, don't check whether INTL_ICU_VERSION is defined. * Remove check in the installer for outdated Unicode normalization. It was added over twelve years ago in r70126 (a21fb8651f20f1ef) with a comment that it should be kept up to date, but no one ever did. * Remove IcuCollation::getUnicodeVersionForICU(), which contained a long list of ICU versions and corresponding Unicode versions that had to be kept up to date manually. Instead use IntlChar::getUnicodeVersion(), which was added in PHP 7.0. There are no known callers outside core. * Remove LinkFilter::supportsIDN(), as ICU has had support for UTS#46 since version 4.6. There are no known callers outside core. Also remove $flags and $variant from the idn_to_utf8() call, which match PHP 7.4's defaults. (INTL_IDNA_VARIANT_2003 was the default in 7.3.) * Display the ICU and Unicode versions in the installer, just below the PHP version. The ICU version is shown on Special:Version near the PHP version, and it probably makes sense to show it there as well. Change-Id: Ibdfac1a6f46fd56b84de1140292e0ec863f043ee --- RELEASE-NOTES-1.40 | 2 + includes/LinkFilter.php | 14 +---- includes/collation/IcuCollation.php | 52 ------------------- includes/installer/Installer.php | 21 ++------ includes/installer/i18n/en.json | 2 +- includes/installer/i18n/qqq.json | 2 +- includes/specials/SpecialVersion.php | 5 +- .../language/generateCollationData.php | 37 ++----------- maintenance/refreshExternallinksIndex.php | 4 +- tests/phpunit/includes/LinkFilterTest.php | 16 +++--- 10 files changed, 23 insertions(+), 132 deletions(-) diff --git a/RELEASE-NOTES-1.40 b/RELEASE-NOTES-1.40 index d282104fa46..9b28c68e855 100644 --- a/RELEASE-NOTES-1.40 +++ b/RELEASE-NOTES-1.40 @@ -274,6 +274,8 @@ because of Phabricator reports. (current PHP default) instead of 17. Extension tests may need to be adjusted accordingly; string representations of floating-point numbers in serialized or JSON-encoded data may change. +* IcuCollation::getUnicodeVersionForICU() was removed without deprecation. +* LinkFilter::supportsIDN() was removed without deprecation. * … === Deprecations in 1.40 === diff --git a/includes/LinkFilter.php b/includes/LinkFilter.php index 9ae5ae9e8da..39bbaff447b 100644 --- a/includes/LinkFilter.php +++ b/includes/LinkFilter.php @@ -82,15 +82,6 @@ class LinkFilter { return $regex; } - /** - * Indicate whether LinkFilter IDN support is available - * @since 1.33 - * @return bool - */ - public static function supportsIDN() { - return is_callable( 'idn_to_utf8' ) && defined( 'INTL_IDNA_VARIANT_UTS46' ); - } - /** * Canonicalize a hostname for el_index * @param string $host @@ -101,9 +92,8 @@ class LinkFilter { // Canonicalize. $host = rawurldecode( $host ); - if ( $host !== '' && self::supportsIDN() ) { - // @todo Add a PHP fallback - $tmp = idn_to_utf8( $host, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46 ); + if ( $host !== '' ) { + $tmp = idn_to_utf8( $host ); if ( $tmp !== false ) { $host = $tmp; } diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index 72a6770b934..fd2b2b3fdbe 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -484,56 +484,4 @@ class IcuCollation extends Collation { } return false; } - - /** - * Return the version of Unicode appropriate for the version of ICU library - * currently in use, or false when it can't be determined. - * - * @since 1.21 - * @return string|bool - */ - public static function getUnicodeVersionForICU() { - $icuVersion = INTL_ICU_VERSION; - if ( !$icuVersion ) { - return false; - } - - $versionPrefix = substr( $icuVersion, 0, 3 ); - // Source: https://icu.unicode.org/download - $map = [ - '71.' => '14.0', - '70.' => '14.0', - '69.' => '13.0', - '68.' => '13.0', - '67.' => '13.0', - '66.' => '13.0', - '65.' => '12.0', - '64.' => '12.0', - '63.' => '11.0', - '62.' => '11.0', - '61.' => '10.0', - '60.' => '10.0', - '59.' => '9.0', - '58.' => '9.0', - '57.' => '8.0', - '56.' => '8.0', - '55.' => '7.0', - '54.' => '7.0', - '53.' => '6.3', - '52.' => '6.3', - '51.' => '6.2', - '50.' => '6.2', - '49.' => '6.1', - '4.8' => '6.0', - '4.6' => '6.0', - '4.4' => '5.2', - '4.2' => '5.1', - '4.0' => '5.1', - '3.8' => '5.0', - '3.6' => '5.0', - '3.4' => '4.1', - ]; - - return $map[$versionPrefix] ?? false; - } } diff --git a/includes/installer/Installer.php b/includes/installer/Installer.php index 09978f8d662..d43fc33b2ea 100644 --- a/includes/installer/Installer.php +++ b/includes/installer/Installer.php @@ -138,6 +138,7 @@ abstract class Installer { * @var array */ protected $envChecks = [ + 'envCheckLibicu', 'envCheckDB', 'envCheckPCRE', 'envCheckMemory', @@ -149,7 +150,6 @@ abstract class Installer { 'envCheckServer', 'envCheckPath', 'envCheckUploadsDirectory', - 'envCheckLibicu', 'envCheck64Bit', ]; @@ -1106,24 +1106,11 @@ abstract class Installer { } /** - * Check the libicu version + * Check and display the libicu and Unicode versions */ protected function envCheckLibicu() { - /** - * This needs to be updated something that the latest libicu - * will properly normalize. This normalization was found at - * https://www.unicode.org/versions/Unicode5.2.0/#Character_Additions - * Note that we use the hex representation to create the code - * points in order to avoid any Unicode-destroying during transit. - */ - $not_normal_c = "\u{FA6C}"; - $normal_c = "\u{242EE}"; - - $intl = normalizer_normalize( $not_normal_c, Normalizer::FORM_C ); - - if ( $intl !== $normal_c ) { - $this->showMessage( 'config-unicode-update-warning' ); - } + $unicodeVersion = implode( '.', array_slice( IntlChar::getUnicodeVersion(), 0, 3 ) ); + $this->showMessage( 'config-env-icu', INTL_ICU_VERSION, $unicodeVersion ); } /** diff --git a/includes/installer/i18n/en.json b/includes/installer/i18n/en.json index e90216a34c5..2076036aa0d 100644 --- a/includes/installer/i18n/en.json +++ b/includes/installer/i18n/en.json @@ -46,7 +46,7 @@ "config-env-good": "The environment has been checked.\nYou can install MediaWiki.", "config-env-bad": "The environment has been checked.\nYou cannot install MediaWiki.", "config-env-php": "PHP $1 is installed.", - "config-unicode-update-warning": "Warning: The installed version of the [https://php.net/manual/en/book.intl.php PHP intl extension]'s Unicode normalization wrapper uses an older version of [http://site.icu-project.org/ the ICU project's] library.\nYou should [https://www.mediawiki.org/wiki/Special:MyLanguage/Unicode_normalization_considerations upgrade] if you are at all concerned about using Unicode.", + "config-env-icu": "ICU $1 is installed (supports Unicode $2).", "config-no-db": "Could not find a suitable database driver! You need to install a database driver for PHP.\nThe following database {{PLURAL:$2|type is|types are}} supported: $1.\n\nIf you compiled PHP yourself, reconfigure it with a database client enabled, for example, using ./configure --with-mysqli.\nIf you installed PHP from a Debian or Ubuntu package, then you also need to install, for example, the php-mysql package.", "config-outdated-sqlite": "Warning: you have SQLite $2, which is lower than minimum required version $1. SQLite will be unavailable.", "config-no-fts3": "Warning: SQLite is compiled without the [//sqlite.org/fts3.html FTS3 module], search features will be unavailable on this backend.", diff --git a/includes/installer/i18n/qqq.json b/includes/installer/i18n/qqq.json index 8b1754ebd35..f5082f78894 100644 --- a/includes/installer/i18n/qqq.json +++ b/includes/installer/i18n/qqq.json @@ -71,7 +71,7 @@ "config-env-good": "See also:\n* {{msg-mw|Config-env-bad}}", "config-env-bad": "See also:\n* {{msg-mw|Config-env-good}}", "config-env-php": "Parameters:\n* $1 - the version of PHP that has been installed\nSee also:\n* {{msg-mw|config-env-php-toolow}}", - "config-unicode-update-warning": "ICU is a body producing standard software tools for support of Unicode and other internationalization aspects. This message warns the system administrator installing MediaWiki that the server's software is not up-to-date and MediaWiki will have problems handling some characters.", + "config-env-icu": "ICU refers to [[:wikipedia:International Components for Unicode|International Components for Unicode]], a set of libraries for support of Unicode and other internationalization aspects. Parameters:\n* $1 - the version of ICU that has been installed\n* $2 - the version of Unicode supported by the installed ICU version", "config-no-db": "{{doc-important|Do not translate \"./configure --with-mysqli\" and \"php-mysql\".}}\nParameters:\n* $1 is comma separated list of database types supported by MediaWiki.\n* $2 is the count of items in $1 - for use in plural.", "config-outdated-sqlite": "Used as warning. Parameters:\n* $2 - the version of SQLite that has been installed\n* $1 - minimum version", "config-no-fts3": "A \"[[:wikipedia:Front and back ends|backend]]\" is a system or component that ordinary users don't interact with directly and don't need to know about, and that is responsible for a distinct task or service - for example, a storage back-end is a generic system for storing data which other applications can use. Possible alternatives for back-end are \"system\" or \"service\", or (depending on context and language) even leave it untranslated.", diff --git a/includes/specials/SpecialVersion.php b/includes/specials/SpecialVersion.php index f396c44820f..3b716e470d7 100644 --- a/includes/specials/SpecialVersion.php +++ b/includes/specials/SpecialVersion.php @@ -283,13 +283,10 @@ class SpecialVersion extends SpecialPage { $software = [ '[https://www.mediawiki.org/ MediaWiki]' => self::getVersionLinked(), '[https://php.net/ PHP]' => PHP_VERSION . " (" . PHP_SAPI . ")", + '[https://icu.unicode.org/ ICU]' => INTL_ICU_VERSION, $dbr->getSoftwareLink() => $dbr->getServerInfo(), ]; - if ( defined( 'INTL_ICU_VERSION' ) ) { - $software['[https://icu.unicode.org/ ICU]'] = INTL_ICU_VERSION; - } - // Allow a hook to add/remove items. Hooks::runner()->onSoftwareInfo( $software ); diff --git a/maintenance/language/generateCollationData.php b/maintenance/language/generateCollationData.php index 5cc1fe1d469..10c45e50050 100644 --- a/maintenance/language/generateCollationData.php +++ b/maintenance/language/generateCollationData.php @@ -61,14 +61,9 @@ class GenerateCollationData extends Maintenance { $allkeysPresent = file_exists( "{$this->dataDir}/allkeys.txt" ); $ucdallPresent = file_exists( "{$this->dataDir}/ucd.all.grouped.xml" ); - // As of January 2013, these links work for all versions of Unicode - // between 5.1 and 6.2, inclusive. - $allkeysURL = "https://www.unicode.org/Public/UCA//allkeys.txt"; - $ucdallURL = "https://www.unicode.org/Public//ucdxml/ucd.all.grouped.zip"; - if ( !$allkeysPresent || !$ucdallPresent ) { $icuVersion = INTL_ICU_VERSION; - $unicodeVersion = IcuCollation::getUnicodeVersionForICU(); + $unicodeVersion = implode( '.', array_slice( IntlChar::getUnicodeVersion(), 0, 3 ) ); $error = ""; @@ -83,33 +78,11 @@ class GenerateCollationData extends Maintenance { . "\n\n"; } - $versionKnown = false; - if ( version_compare( $icuVersion, "4.0", "<" ) ) { - // Extra old version - $error .= "You are using outdated version of ICU ($icuVersion), intended for " - . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) - . "; this file might not be available for it, and it's not supported by MediaWiki. " - . " You are on your own; consider upgrading PHP's intl extension or try " - . "one of the files available at:"; - } elseif ( version_compare( $icuVersion, "51.0", ">=" ) ) { - // Extra recent version - $error .= "You are using ICU $icuVersion, released after this script was last updated. " - . "Check what is the Unicode version it is using at http://site.icu-project.org/download . " - . "It can't be guaranteed everything will work, but appropriate file(s) should " - . "be available at:"; - } else { - // ICU 4.0 to 50.x - $versionKnown = true; - $error .= "You are using ICU $icuVersion, intended for " - . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) - . ". Appropriate file(s) should be available at:"; - } - $error .= "\n"; + $error .= "You are using ICU $icuVersion, intended for Unicode $unicodeVersion. " + . "Appropriate file(s) should be available at:\n"; - if ( $versionKnown && $unicodeVersion ) { - $allkeysURL = str_replace( "", "$unicodeVersion.0", $allkeysURL ); - $ucdallURL = str_replace( "", "$unicodeVersion.0", $ucdallURL ); - } + $allkeysURL = "https://www.unicode.org/Public/UCA/$unicodeVersion/allkeys.txt"; + $ucdallURL = "https://www.unicode.org/Public/$unicodeVersion/ucdxml/ucd.all.grouped.zip"; if ( !$allkeysPresent ) { $error .= "* $allkeysURL\n"; diff --git a/maintenance/refreshExternallinksIndex.php b/maintenance/refreshExternallinksIndex.php index 87e5f4cc6a4..49a09e44480 100644 --- a/maintenance/refreshExternallinksIndex.php +++ b/maintenance/refreshExternallinksIndex.php @@ -39,9 +39,7 @@ class RefreshExternallinksIndex extends LoggedUpdateMaintenance { } protected function getUpdateKey() { - return static::class - . ' v' . LinkFilter::VERSION - . ( LinkFilter::supportsIDN() ? '+' : '-' ) . 'IDN'; + return static::class . ' v' . LinkFilter::VERSION . '+IDN'; } protected function updateSkippedMessage() { diff --git a/tests/phpunit/includes/LinkFilterTest.php b/tests/phpunit/includes/LinkFilterTest.php index 4969c115345..0a47f524fc9 100644 --- a/tests/phpunit/includes/LinkFilterTest.php +++ b/tests/phpunit/includes/LinkFilterTest.php @@ -139,10 +139,10 @@ class LinkFilterTest extends MediaWikiLangTestCase { [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::]' ], [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123]' ], [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123:456]' ], - [ 'http://', 'xn--f-vgaa.example.com', 'http://fóó.example.com', [ 'idn' => true ] ], - [ 'http://', 'xn--f-vgaa.example.com', 'http://f%c3%b3%C3%B3.example.com', [ 'idn' => true ] ], - [ 'http://', 'fóó.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ], - [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ], + [ 'http://', 'xn--f-vgaa.example.com', 'http://fóó.example.com' ], + [ 'http://', 'xn--f-vgaa.example.com', 'http://f%c3%b3%C3%B3.example.com' ], + [ 'http://', 'fóó.example.com', 'http://xn--f-vgaa.example.com' ], + [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://xn--f-vgaa.example.com' ], [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://fóó.example.com' ], [ 'http://', 'fóó.example.com', 'http://f%c3%b3%C3%B3.example.com' ], @@ -207,13 +207,9 @@ class LinkFilterTest extends MediaWikiLangTestCase { * @param string $url URL to feed to LinkFilter::makeIndexes * @param array $options * - found: (bool) Should the URL be found? (defaults true) - * - idn: (bool) Does this test require the idn conversion (default false) */ public function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $options = [] ) { - $options += [ 'found' => true, 'idn' => false ]; - if ( !empty( $options['idn'] ) && !LinkFilter::supportsIDN() ) { - $this->markTestSkipped( 'LinkFilter IDN support is not available' ); - } + $options += [ 'found' => true ]; $indexes = LinkFilter::makeIndexes( $url ); $likeArray = LinkFilter::makeLikeArray( $pattern, $protocol ); @@ -234,7 +230,7 @@ class LinkFilterTest extends MediaWikiLangTestCase { $debugmsg .= "\t'$indexString'\n"; } - if ( !empty( $options['found'] ) ) { + if ( $options['found'] ) { $this->assertTrue( $matches > 0, "Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"