Speed up CIDR matching from $wgSquidServersNoPurge

This adds a new generic library class IPSet, which precomputes
a reasonably-efficient data structure from the input list of
addresses and CIDR net/mask specs for fast runtime matching,
and then uses it to check trusted XFF-setters.

See also: 32b4f19f173fc5cff1029eedee63a39a2d72dd3a
Bug: 57021
Change-Id: Ia3b12fb90c3e7e492374a128943b014481cc2730
This commit is contained in:
Brandon Black 2014-05-06 13:49:26 -05:00 committed by Ori.livneh
parent d2effb755b
commit e323bc3393
9 changed files with 593 additions and 24 deletions

View file

@ -685,6 +685,7 @@ $wgAutoloadLocalClasses = array(
'IEContentAnalyzer' => 'includes/libs/IEContentAnalyzer.php',
'IEUrlExtension' => 'includes/libs/IEUrlExtension.php',
'MappedIterator' => 'includes/libs/MappedIterator.php',
'IPSet' => 'includes/libs/IPSet.php',
'JavaScriptMinifier' => 'includes/libs/JavaScriptMinifier.php',
'JSCompilerContext' => 'includes/libs/jsminplus.php',
'JSMinPlus' => 'includes/libs/jsminplus.php',

View file

@ -1050,7 +1050,7 @@ class Block {
continue;
}
# Don't check trusted IPs (includes local squids which will be in every request)
if ( wfIsTrustedProxy( $ipaddr ) ) {
if ( IP::isTrustedProxy( $ipaddr ) ) {
continue;
}
# Check both the original IP (to check against single blocks), as well as build

View file

@ -4148,39 +4148,23 @@ function wfGetIP() {
* Checks if an IP is a trusted proxy provider.
* Useful to tell if X-Forwarded-For data is possibly bogus.
* Squid cache servers for the site are whitelisted.
* @deprecated Since 1.24, use IP::isTrustedProxy()
*
* @param string $ip
* @return bool
*/
function wfIsTrustedProxy( $ip ) {
$trusted = wfIsConfiguredProxy( $ip );
wfRunHooks( 'IsTrustedProxy', array( &$ip, &$trusted ) );
return $trusted;
return IP::isTrustedProxy( $ip );
}
/**
* Checks if an IP matches a proxy we've configured.
* @deprecated Since 1.24, use IP::isConfiguredProxy()
*
* @param string $ip
* @return bool
* @since 1.23 Supports CIDR ranges in $wgSquidServersNoPurge
*/
function wfIsConfiguredProxy( $ip ) {
global $wgSquidServers, $wgSquidServersNoPurge;
// quick check of known proxy servers
$trusted = in_array( $ip, $wgSquidServers )
|| in_array( $ip, $wgSquidServersNoPurge );
if ( !$trusted ) {
// slightly slower check to see if the ip is listed directly or in a CIDR
// block in $wgSquidServersNoPurge
foreach ( $wgSquidServersNoPurge as $block ) {
if ( strpos( $block, '/' ) !== false && IP::isInRange( $ip, $block ) ) {
$trusted = true;
break;
}
}
}
return $trusted;
return IP::isTrustedProxy( $ip );
}

View file

@ -1123,6 +1123,7 @@ HTML;
# Append XFF
$forwardedFor = $this->getHeader( 'X-Forwarded-For' );
if ( $forwardedFor !== false ) {
$isConfigured = IP::isConfiguredProxy( $ip );
$ipchain = array_map( 'trim', explode( ',', $forwardedFor ) );
$ipchain = array_reverse( $ipchain );
if ( $ip ) {
@ -1139,13 +1140,13 @@ HTML;
continue;
}
$curIP = IP::sanitizeIP( IP::canonicalize( $curIP ) );
if ( wfIsTrustedProxy( $curIP ) && isset( $ipchain[$i + 1] ) ) {
if ( wfIsConfiguredProxy( $curIP ) || // bug 48919; treat IP as sane
if ( IP::isTrustedProxy( $curIP ) && isset( $ipchain[$i + 1] ) ) {
if ( IP::isConfiguredProxy( $curIP ) || // bug 48919; treat IP as sane
IP::isPublic( $ipchain[$i + 1] ) ||
$wgUsePrivateIPs
) {
$nextIP = IP::canonicalize( $ipchain[$i + 1] );
if ( !$nextIP && wfIsConfiguredProxy( $ip ) ) {
if ( !$nextIP && $isConfigured ) {
// We have not yet made it past CDN/proxy servers of this site,
// so either they are misconfigured or there is some IP spoofing.
throw new MWException( "Invalid IP given in XFF '$forwardedFor'." );

277
includes/libs/IPSet.php Normal file
View file

@ -0,0 +1,277 @@
<?php
/**
* @section LICENSE
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Brandon Black <blblack@gmail.com>
*/
/**
* Matches IP addresses against a set of CIDR specifications
*
* Usage:
* // At startup, calculate the optimized data structure for the set:
* $ipset = new IPSet( $wgSquidServersNoPurge );
* // runtime check against cached set (returns bool):
* $allowme = $ipset->match( $ip );
*
* In rough benchmarking, this takes about 80% more time than
* in_array() checks on a short (a couple hundred at most) array
* of addresses. It's fast either way at those levels, though,
* and IPSet would scale better than in_array if the array were
* much larger.
*
* For mixed-family CIDR sets, however, this code gives well over
* 100x speedup vs iterating IP::isInRange() over an array
* of CIDR specs.
*
* The basic implementation is two separate binary trees
* (IPv4 and IPv6) as nested php arrays with keys named 0 and 1.
* The values false and true are terminal match-fail and match-success,
* otherwise the value is a deeper node in the tree.
*
* A simple depth-compression scheme is also implemented: whole-byte
* tree compression at whole-byte boundaries only, where no branching
* occurs during that whole byte of depth. A compressed node has
* keys 'comp' (the byte to compare) and 'next' (the next node to
* recurse into if 'comp' matched successfully).
*
* For example, given these inputs:
* 25.0.0.0/9
* 25.192.0.0/10
*
* The v4 tree would look like:
* root4 => array(
* 'comp' => 25,
* 'next' => array(
* 0 => true,
* 1 => array(
* 0 => false,
* 1 => true,
* ),
* ),
* );
*
* (multi-byte compression nodes were attempted as well, but were
* a net loss in my test scenarios due to additional match complexity)
*
* @since 1.24
*/
class IPSet {
/** @var array $root4: the root of the IPv4 matching tree */
private $root4 = array( false, false );
/** @var array $root6: the root of the IPv6 matching tree */
private $root6 = array( false, false );
/**
* __construct() instantiate the object from an array of CIDR specs
*
* @param array $cfg array of IPv[46] CIDR specs as strings
* @return IPSet new IPSet object
*
* Invalid input network/mask values in $cfg will result in issuing
* E_WARNING and/or E_USER_WARNING and the bad values being ignored.
*/
public function __construct( array $cfg ) {
foreach ( $cfg as $cidr ) {
$this->addCidr( $cidr );
}
self::recOptimize( $this->root4 );
self::recCompress( $this->root4, 0, 24 );
self::recOptimize( $this->root6 );
self::recCompress( $this->root6, 0, 120 );
}
/**
* Add a single CIDR spec to the internal matching trees
*
* @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's)
*/
private function addCidr( $cidr ) {
// v4 or v6 check
if ( strpos( $cidr, ':' ) === false ) {
$node =& $this->root4;
$defMask = '32';
} else {
$node =& $this->root6;
$defMask = '128';
}
// Default to all-1's mask if no netmask in the input
if ( strpos( $cidr, '/' ) === false ) {
$net = $cidr;
$mask = $defMask;
} else {
list( $net, $mask ) = explode( '/', $cidr, 2 );
if ( !ctype_digit( $mask ) || intval( $mask ) > $defMask ) {
trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING );
return;
}
}
$mask = intval( $mask ); // explicit integer convert, checked above
// convert $net to an array of integer bytes, length 4 or 16:
$raw = inet_pton( $net );
if ( $raw === false ) {
return; // inet_pton() sends an E_WARNING for us
}
$rawOrd = array_map( 'ord', str_split( $raw ) );
// special-case: zero mask overwrites the whole tree with a pair of terminal successes
if ( $mask == 0 ) {
$node = array( true, true );
return;
}
// iterate the bits of the address while walking the tree structure for inserts
$curBit = 0;
while ( 1 ) {
$maskShift = 7 - ( $curBit & 7 );
$node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
++$curBit;
if ( $node === true ) {
// already added a larger supernet, no need to go deeper
return;
} elseif ( $curBit == $mask ) {
// this may wipe out deeper subnets from earlier
$node = true;
return;
} elseif ( $node === false ) {
// create new subarray to go deeper
$node = array( false, false );
}
}
}
/**
* Match an IP address against the set
*
* @param string $ip string IPv[46] address
* @return boolean true is match success, false is match failure
*
* If $ip is unparseable, inet_pton may issue an E_WARNING to that effect
*/
public function match( $ip ) {
$raw = inet_pton( $ip );
if ( $raw === false ) {
return false; // inet_pton() sends an E_WARNING for us
}
$rawOrd = array_map( 'ord', str_split( $raw ) );
if ( count( $rawOrd ) == 4 ) {
$node =& $this->root4;
} else {
$node =& $this->root6;
}
$curBit = 0;
while ( 1 ) {
if ( isset( $node['comp'] ) ) {
// compressed node, matches 1 whole byte on a byte boundary
if ( $rawOrd[$curBit >> 3] != $node['comp'] ) {
return false;
}
$curBit += 8;
$node =& $node['next'];
} else {
// uncompressed node, walk in the correct direction for the current bit-value
$maskShift = 7 - ( $curBit & 7 );
$node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
++$curBit;
}
if ( $node === true || $node === false ) {
return $node;
}
}
}
/**
* Recursively merges adjacent nets into larger supernets
*
* @param array &$node Tree node to optimize, by-reference
*
* e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7
*/
private static function recOptimize( &$node ) {
if ( $node[0] !== false && $node[0] !== true && self::recOptimize( $node[0] ) ) {
$node[0] = true;
}
if ( $node[1] !== false && $node[1] !== true && self::recOptimize( $node[1] ) ) {
$node[1] = true;
}
if ( $node[0] === true && $node[1] === true ) {
return true;
}
return false;
}
/**
* Recursively compresses a tree
*
* @param array &$node Tree node to compress, by-reference
* @param integer $curBit current depth in the tree
* @param integer $maxCompStart maximum depth at which compression can start, family-specific
*
* This is a very simplistic compression scheme: if we go through a whole
* byte of address starting at a byte boundary with no real branching
* other than immediate false-vs-(node|true), compress that subtree down to a single
* byte-matching node.
* The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent)
*/
private static function recCompress( &$node, $curBit, $maxCompStart ) {
if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s)
$byte = 0;
$cnode =& $node;
$i = 8;
while ( $i-- ) {
if ( $cnode[0] === false ) {
$byte |= 1 << $i;
$cnode =& $cnode[1];
} elseif ( $cnode[1] === false ) {
$cnode =& $cnode[0];
} else {
// partial-byte branching, give up
break;
}
}
if ( $i == -1 ) { // means we did not exit the while() via break
$node = array(
'comp' => $byte,
'next' => &$cnode,
);
$curBit += 8;
if ( $cnode !== true ) {
self::recCompress( $cnode, $curBit, $maxCompStart );
}
return;
}
}
++$curBit;
if ( $curBit <= $maxCompStart ) {
if ( $node[0] !== false && $node[0] !== true ) {
self::recCompress( $node[0], $curBit, $maxCompStart );
}
if ( $node[1] !== false && $node[1] !== true ) {
self::recCompress( $node[1], $curBit, $maxCompStart );
}
}
}
}

View file

@ -65,6 +65,8 @@ define( 'IP_ADDRESS_STRING',
* and IP blocks.
*/
class IP {
/** @var IPSet */
private static $ipSet = null;
/**
* Determine if a string is as valid IP address or network (CIDR prefix).
* SIIT IPv4-translated addresses are rejected.
@ -772,4 +774,52 @@ class IP {
return "$start/$bits";
}
/**
* Checks if an IP is a trusted proxy provider.
* Useful to tell if X-Forwarded-For data is possibly bogus.
* Squid cache servers for the site are whitelisted.
* @since 1.24
*
* @param string $ip
* @return bool
*/
public static function isTrustedProxy( $ip ) {
$trusted = self::isConfiguredProxy( $ip );
wfRunHooks( 'IsTrustedProxy', array( &$ip, &$trusted ) );
return $trusted;
}
/**
* Checks if an IP matches a proxy we've configured
* @since 1.24
*
* @param string $ip
* @return bool
*/
public static function isConfiguredProxy( $ip ) {
global $wgSquidServers, $wgSquidServersNoPurge;
wfProfileIn( __METHOD__ );
// Quick check of known singular proxy servers
$trusted = in_array( $ip, $wgSquidServers );
// Check against addresses and CIDR nets in the NoPurge list
if ( !$trusted ) {
if ( !self::$ipSet ) {
self::$ipSet = new IPSet( $wgSquidServersNoPurge );
}
$trusted = self::$ipSet->match( $ip );
}
wfProfileOut( __METHOD__ );
return $trusted;
}
/**
* Clears precomputed data used for proxy support.
* Use this only for unit tests.
*/
public static function clearCaches() {
self::$ipSet = null;
}
}

View file

@ -10,10 +10,12 @@ class WebRequestTest extends MediaWikiTestCase {
parent::setUp();
$this->oldServer = $_SERVER;
IP::clearCaches();
}
protected function tearDown() {
$_SERVER = $this->oldServer;
IP::clearCaches();
parent::tearDown();
}

View file

@ -0,0 +1,252 @@
<?php
/**
* @group IPSet
*/
class IPSetTest extends MediaWikiTestCase {
/**
* Provides test cases for IPSetTest::testIPSet
*
* Returns an array of test cases. Each case is an array of (description,
* config, tests). Description is just text output for failure messages,
* config is an array constructor argument for IPSet, and the tests are
* an array of IP => expected (boolean) result against the config dataset.
*/
public function provider() {
return array(
array(
'old_list_subset',
array(
'208.80.152.162',
'10.64.0.123',
'10.64.0.124',
'10.64.0.125',
'10.64.0.126',
'10.64.0.127',
'10.64.0.128',
'10.64.0.129',
'10.64.32.104',
'10.64.32.105',
'10.64.32.106',
'10.64.32.107',
'91.198.174.45',
'91.198.174.46',
'91.198.174.47',
'91.198.174.57',
'2620:0:862:1:A6BA:DBFF:FE30:CFB3',
'91.198.174.58',
'2620:0:862:1:A6BA:DBFF:FE38:FFDA',
'208.80.152.16',
'208.80.152.17',
'208.80.152.18',
'208.80.152.19',
'91.198.174.102',
'91.198.174.103',
'91.198.174.104',
'91.198.174.105',
'91.198.174.106',
'91.198.174.107',
'91.198.174.81',
'2620:0:862:1:26B6:FDFF:FEF5:B2D4',
'91.198.174.82',
'2620:0:862:1:26B6:FDFF:FEF5:ABB4',
'10.20.0.113',
'2620:0:862:102:26B6:FDFF:FEF5:AD9C',
'10.20.0.114',
'2620:0:862:102:26B6:FDFF:FEF5:7C38',
),
array(
'0.0.0.0' => false,
'255.255.255.255' => false,
'10.64.0.122' => false,
'10.64.0.123' => true,
'10.64.0.124' => true,
'10.64.0.129' => true,
'10.64.0.130' => false,
'91.198.174.81' => true,
'91.198.174.80' => false,
'0::0' => false,
'ffff:ffff:ffff:ffff:FFFF:FFFF:FFFF:FFFF' => false,
'2001:db8::1234' => false,
'2620:0:862:1:26b6:fdff:fef5:abb3' => false,
'2620:0:862:1:26b6:fdff:fef5:abb4' => true,
'2620:0:862:1:26b6:fdff:fef5:abb5' => false,
),
),
array(
'new_cidr_set',
array(
'208.80.154.0/26',
'2620:0:861:1::/64',
'208.80.154.128/26',
'2620:0:861:2::/64',
'208.80.154.64/26',
'2620:0:861:3::/64',
'208.80.155.96/27',
'2620:0:861:4::/64',
'10.64.0.0/22',
'2620:0:861:101::/64',
'10.64.16.0/22',
'2620:0:861:102::/64',
'10.64.32.0/22',
'2620:0:861:103::/64',
'10.64.48.0/22',
'2620:0:861:107::/64',
'91.198.174.0/25',
'2620:0:862:1::/64',
'10.20.0.0/24',
'2620:0:862:102::/64',
'10.128.0.0/24',
'2620:0:863:101::/64',
'10.2.4.26',
),
array(
'0.0.0.0' => false,
'255.255.255.255' => false,
'10.2.4.25' => false,
'10.2.4.26' => true,
'10.2.4.27' => false,
'10.20.0.255' => true,
'10.128.0.0' => true,
'10.64.17.55' => true,
'10.64.20.0' => false,
'10.64.27.207' => false,
'10.64.31.255' => false,
'0::0' => false,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => false,
'2001:DB8::1' => false,
'2620:0:861:106::45' => false,
'2620:0:862:103::' => false,
'2620:0:862:102:10:20:0:113' => true,
),
),
array(
'empty_set',
array(),
array(
'0.0.0.0' => false,
'255.255.255.255' => false,
'10.2.4.25' => false,
'10.2.4.26' => false,
'10.2.4.27' => false,
'10.20.0.255' => false,
'10.128.0.0' => false,
'10.64.17.55' => false,
'10.64.20.0' => false,
'10.64.27.207' => false,
'10.64.31.255' => false,
'0::0' => false,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => false,
'2001:DB8::1' => false,
'2620:0:861:106::45' => false,
'2620:0:862:103::' => false,
'2620:0:862:102:10:20:0:113' => false,
),
),
array(
'edge_cases',
array(
'0.0.0.0',
'255.255.255.255',
'::',
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff',
'10.10.10.10/25', // host bits intentional
),
array(
'0.0.0.0' => true,
'255.255.255.255' => true,
'10.2.4.25' => false,
'10.2.4.26' => false,
'10.2.4.27' => false,
'10.20.0.255' => false,
'10.128.0.0' => false,
'10.64.17.55' => false,
'10.64.20.0' => false,
'10.64.27.207' => false,
'10.64.31.255' => false,
'0::0' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => true,
'2001:DB8::1' => false,
'2620:0:861:106::45' => false,
'2620:0:862:103::' => false,
'2620:0:862:102:10:20:0:113' => false,
'10.10.9.255' => false,
'10.10.10.0' => true,
'10.10.10.1' => true,
'10.10.10.10' => true,
'10.10.10.126' => true,
'10.10.10.127' => true,
'10.10.10.128' => false,
'10.10.10.177' => false,
'10.10.10.255' => false,
'10.10.11.0' => false,
),
),
array(
'exercise_optimizer',
array(
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fffe:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fffd:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fffc:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fffb:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fffa:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff9:8000/113',
'ffff:ffff:ffff:ffff:ffff:ffff:fff9:0/113',
'ffff:ffff:ffff:ffff:ffff:ffff:fff8:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff7:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff6:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff5:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff4:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff3:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff2:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff1:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:fff0:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffef:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffee:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffec:0/111',
'ffff:ffff:ffff:ffff:ffff:ffff:ffeb:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffea:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe9:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe8:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe7:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe6:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe5:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe4:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe3:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe2:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe1:0/112',
'ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0/110',
'ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0/107',
'ffff:ffff:ffff:ffff:ffff:ffff:ffa0:0/107',
),
array(
'0.0.0.0' => false,
'255.255.255.255' => false,
'::' => false,
'ffff:ffff:ffff:ffff:ffff:ffff:ff9f:ffff' => false,
'ffff:ffff:ffff:ffff:ffff:ffff:ffa0:0' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:ffc0:1234' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:ffed:ffff' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:fff4:4444' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:fff9:8080' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => true,
),
),
);
}
/**
* Validates IPSet loading and matching code
*
* @covers IPSet
* @dataProvider provider
*/
public function testIPSet( $desc, array $cfg, array $tests ) {
$ipset = new IPSet( $cfg );
foreach ( $tests as $ip => $expected ) {
$result = $ipset->match( $ip );
$this->assertEquals( $expected, $result, "Incorrect match() result for $ip in dataset $desc" );
}
}
}

View file

@ -60,6 +60,8 @@ class PHPUnitMaintClass extends Maintenance {
return false;
}
);
// xdebug's default of 100 is too low for MediaWiki
ini_set( 'xdebug.max_nesting_level', 1000 );
}
public function execute() {