Added PHP port of CDB, with abstraction layer. Tested for correctness with a differential fuzz tester, not yet benchmarked. The idea is to open up new applications for CDB, and benefit both shell and shared hosting users.

Ported existing uses of CDB to the new abstraction layer.
This commit is contained in:
Tim Starling 2009-06-20 15:59:56 +00:00
parent 3acae9fff5
commit d93ea4874e
7 changed files with 552 additions and 7 deletions

View file

@ -89,6 +89,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
* Add $wgRevokePermissions as a means of restricting a group's rights. The syntax is
identical to $wgGroupPermissions, but users in these groups will have these rights
stripped from them.
* Added a PHP port of CDB (constant database), for improved local caching when
the DBA extension is not available.
=== Bug fixes in 1.16 ===

View file

@ -27,6 +27,13 @@ $wgAutoloadLocalClasses = array(
'Categoryfinder' => 'includes/Categoryfinder.php',
'CategoryPage' => 'includes/CategoryPage.php',
'CategoryViewer' => 'includes/CategoryPage.php',
'CdbFunctions' => 'includes/Cdb_PHP.php',
'CdbReader' => 'includes/Cdb.php',
'CdbReader_DBA' => 'includes/Cdb.php',
'CdbReader_PHP' => 'includes/Cdb_PHP.php',
'CdbWriter' => 'includes/Cdb.php',
'CdbWriter_DBA' => 'includes/Cdb.php',
'CdbWriter_PHP' => 'includes/Cdb_PHP.php',
'ChangesList' => 'includes/ChangesList.php',
'ChangesFeed' => 'includes/ChangesFeed.php',
'ChangeTags' => 'includes/ChangeTags.php',

147
includes/Cdb.php Normal file
View file

@ -0,0 +1,147 @@
<?php
/**
* Read from a CDB file.
* Native and pure PHP implementations are provided.
* http://cr.yp.to/cdb.html
*/
abstract class CdbReader {
/**
* Open a file and return a subclass instance
*/
public static function open( $fileName ) {
if ( self::haveExtension() ) {
return new CdbReader_DBA( $fileName );
} else {
wfDebug( 'Warning: no dba extension found, using emulation.' );
return new CdbReader_PHP( $fileName );
}
}
/**
* Returns true if the native extension is available
*/
public static function haveExtension() {
if ( !function_exists( 'dba_handlers' ) ) {
return false;
}
$handlers = dba_handlers();
if ( !in_array( 'cdb', $handlers ) || !in_array( 'cdb_make', $handlers ) ) {
return false;
}
return true;
}
/**
* Construct the object and open the file
*/
abstract function __construct( $fileName );
/**
* Close the file. Optional, you can just let the variable go out of scope.
*/
abstract function close();
/**
* Get a value with a given key. Only string values are supported.
*/
abstract public function get( $key );
}
/**
* Write to a CDB file.
* Native and pure PHP implementations are provided.
*/
abstract class CdbWriter {
/**
* Open a writer and return a subclass instance.
* The user must have write access to the directory, for temporary file creation.
*/
public static function open( $fileName ) {
if ( CdbReader::haveExtension() ) {
return new CdbWriter_DBA( $fileName );
} else {
wfDebug( 'Warning: no dba extension found, using emulation.' );
return new CdbWriter_PHP( $fileName );
}
}
/**
* Create the object and open the file
*/
abstract function __construct( $fileName );
/**
* Set a key to a given value. The value will be converted to string.
*/
abstract public function set( $key, $value );
/**
* Close the writer object. You should call this function before the object
* goes out of scope, to write out the final hashtables.
*/
abstract public function close();
}
/**
* Reader class which uses the DBA extension
*/
class CdbReader_DBA {
var $handle;
function __construct( $fileName ) {
$this->handle = dba_open( $fileName, 'r-', 'cdb' );
if ( !$this->handle ) {
throw new MWException( 'Unable to open DB file "' . $fileName . '"' );
}
}
function close() {
dba_close( $this->handle );
unset( $this->handle );
}
function get( $key ) {
return dba_fetch( $key, $this->handle );
}
}
/**
* Writer class which uses the DBA extension
*/
class CdbWriter_DBA {
var $handle, $realFileName, $tmpFileName;
function __construct( $fileName ) {
$this->realFileName = $fileName;
$this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
$this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' );
if ( !$this->handle ) {
throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' );
}
}
function set( $key, $value ) {
return dba_insert( $key, $value, $this->handle );
}
function close() {
dba_close( $this->handle );
if ( wfIsWindows() ) {
unlink( $this->realFileName );
}
if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
throw new MWException( 'Unable to move the new CDB file into place.' );
}
unset( $this->handle );
}
function __destruct() {
if ( isset( $this->handle ) ) {
$this->close();
}
}
}

388
includes/Cdb_PHP.php Normal file
View file

@ -0,0 +1,388 @@
<?php
/**
* This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
* appears in PHP 5.3. Changes are:
* * Error returns replaced with exceptions
* * Exception thrown if sizes or offsets are between 2GB and 4GB
* * Some variables renamed
*/
/**
* Common functions for readers and writers
*/
class CdbFunctions {
/**
* Do a sum of 32-bit signed integers with 2's complement overflow.
*
* PHP has broken plus and minus operators, but the bitwise operators
* (&, |, ^, ~, <<, >>) are all implemented as a simple wrapper around the
* underlying C operator. The algorithm here uses a binary view of addition
* to simulate 32-bit addition using 31-bit registers.
*/
public static function sumWithOverflow( $a, $b ) {
$sum = $a + $b;
if ( is_float( $sum ) ) {
// Use the plus operator to do a sum of the lowest 30 bits to produce a 31-bit result
$lowA = $a & 0x3fffffff;
$lowB = $b & 0x3fffffff;
$sum = $lowA + $lowB;
// Strip off the carry bit
$carry = ($sum & 0x40000000) >> 30;
$sum = $sum & 0x3fffffff;
// Get the last two bits
$highA = self::unsignedShiftRight( $a, 30 );
$highB = self::unsignedShiftRight( $b, 30 );
// Add with carry
$highSum = $carry + $highA + $highB;
// Recombine
$sum = $sum | ( $highSum << 30 );
}
return $sum;
}
/**
* Take a modulo of a signed integer as if it were an unsigned integer.
* $b must be less than 0x40000000 and greater than 0
*/
public static function unsignedMod( $a, $b ) {
if ( $a < 0 ) {
$m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
return $m % $b;
} else {
return $a % $b;
}
}
/**
* Shift a signed integer right as if it were unsigned
*/
public static function unsignedShiftRight( $a, $b ) {
if ( $b == 0 ) {
return $a;
}
if ( $a < 0 ) {
return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
} else {
return $a >> $b;
}
}
public static function hash( $s ) {
$h = 5381;
for ( $i = 0; $i < strlen( $s ); $i++ ) {
$h = self::sumWithOverflow( $h, $h << 5 ) ^ ord( $s[$i] );
}
return $h;
}
}
/**
* CDB reader class
*/
class CdbReader_PHP extends CdbReader {
/** The file handle */
var $handle;
/* number of hash slots searched under this key */
var $loop;
/* initialized if loop is nonzero */
var $khash;
/* initialized if loop is nonzero */
var $kpos;
/* initialized if loop is nonzero */
var $hpos;
/* initialized if loop is nonzero */
var $hslots;
/* initialized if findNext() returns true */
var $dpos;
/* initialized if cdb_findnext() returns 1 */
var $dlen;
function __construct( $fileName ) {
$this->handle = fopen( $fileName, 'rb' );
if ( !$this->handle ) {
throw new MWException( 'Unable to open DB file "' . $fileName . '"' );
}
$this->findStart();
}
function close() {
fclose( $this->handle );
unset( $this->handle );
}
public function get( $key ) {
// strval is required
if ( $this->find( strval( $key ) ) ) {
return $this->read( $this->dlen, $this->dpos );
} else {
return false;
}
}
protected function match( $key, $pos ) {
$buf = $this->read( strlen( $key ), $pos );
return $buf === $key;
}
protected function findStart() {
$this->loop = 0;
}
protected function read( $length, $pos ) {
if ( fseek( $this->handle, $pos ) == -1 ) {
// This can easily happen if the internal pointers are incorrect
throw new MWException( __METHOD__.': seek failed, file may be corrupted.' );
}
if ( $length == 0 ) {
return '';
}
$buf = fread( $this->handle, $length );
if ( $buf === false || strlen( $buf ) !== $length ) {
throw new MWException( __METHOD__.': read from cdb file failed, file may be corrupted' );
}
return $buf;
}
/**
* Unpack an unsigned integer and throw an exception if it needs more than 31 bits
*/
protected function unpack31( $s ) {
$data = unpack( 'V', $s );
if ( $data[1] > 0x7fffffff ) {
throw new MWException( __METHOD__.': error in CDB file, integer too big' );
}
return $data[1];
}
/**
* Unpack a 32-bit signed integer
*/
protected function unpackSigned( $s ) {
$data = unpack( 'va/vb', $s );
return $data['a'] | ( $data['b'] << 16 );
}
protected function findNext( $key ) {
if ( !$this->loop ) {
$u = CdbFunctions::hash( $key );
$buf = $this->read( 8, ( $u << 3 ) & 2047 );
$this->hslots = $this->unpack31( substr( $buf, 4 ) );
if ( !$this->hslots ) {
return false;
}
$this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
$this->khash = $u;
$u = CdbFunctions::unsignedShiftRight( $u, 8 );
$u = CdbFunctions::unsignedMod( $u, $this->hslots );
$u <<= 3;
$this->kpos = $this->hpos + $u;
}
while ( $this->loop < $this->hslots ) {
$buf = $this->read( 8, $this->kpos );
$pos = $this->unpack31( substr( $buf, 4 ) );
if ( !$pos ) {
return false;
}
$this->loop += 1;
$this->kpos += 8;
if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
$this->kpos = $this->hpos;
}
$u = $this->unpackSigned( substr( $buf, 0, 4 ) );
if ( $u === $this->khash ) {
$buf = $this->read( 8, $pos );
$keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
// Found
$this->dlen = $this->unpack31( substr( $buf, 4 ) );
$this->dpos = $pos + 8 + $keyLen;
return true;
}
}
}
return false;
}
protected function find( $key ) {
$this->findStart();
return $this->findNext( $key );
}
}
/**
* CDB writer class
*/
class CdbWriter_PHP extends CdbWriter {
var $handle, $realFileName, $tmpFileName;
var $hplist;
var $numEntries, $pos;
function __construct( $fileName ) {
$this->realFileName = $fileName;
$this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
$this->handle = fopen( $this->tmpFileName, 'wb' );
if ( !$this->handle ) {
throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' );
}
$this->hplist = array();
$this->numentries = 0;
$this->pos = 2048; // leaving space for the pointer array, 256 * 8
if ( fseek( $this->handle, $this->pos ) == -1 ) {
throw new MWException( __METHOD__.': fseek failed' );
}
}
function __destruct() {
if ( isset( $this->handle ) ) {
$this->close();
}
}
public function set( $key, $value ) {
if ( strval( $key ) === '' ) {
// DBA cross-check hack
return;
}
$this->addbegin( strlen( $key ), strlen( $value ) );
$this->write( $key );
$this->write( $value );
$this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
}
public function close() {
$this->finish();
fclose( $this->handle );
if ( wfIsWindows() ) {
unlink( $this->realFileName );
}
if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
throw new MWException( 'Unable to move the new CDB file into place.' );
}
unset( $this->handle );
}
protected function write( $buf ) {
$len = fwrite( $this->handle, $buf );
if ( $len !== strlen( $buf ) ) {
throw new MWException( 'Error writing to CDB file.' );
}
}
protected function posplus( $len ) {
$newpos = $this->pos + $len;
if ( $newpos > 0x7fffffff ) {
throw new MWException( 'A value in the CDB file is too large' );
}
$this->pos = $newpos;
}
protected function addend( $keylen, $datalen, $h ) {
$this->hplist[] = array(
'h' => $h,
'p' => $this->pos
);
$this->numentries++;
$this->posplus( 8 );
$this->posplus( $keylen );
$this->posplus( $datalen );
}
protected function addbegin( $keylen, $datalen ) {
if ( $keylen > 0x7fffffff ) {
throw new MWException( __METHOD__.': key length too long' );
}
if ( $datalen > 0x7fffffff ) {
throw new MWException( __METHOD__.': data length too long' );
}
$buf = pack( 'VV', $keylen, $datalen );
$this->write( $buf );
}
protected function finish() {
// Hack for DBA cross-check
$this->hplist = array_reverse( $this->hplist );
// Calculate the number of items that will be in each hashtable
$counts = array_fill( 0, 256, 0 );
foreach ( $this->hplist as $item ) {
++ $counts[ 255 & $item['h'] ];
}
// Fill in $starts with the *end* indexes
$starts = array();
$pos = 0;
for ( $i = 0; $i < 256; ++$i ) {
$pos += $counts[$i];
$starts[$i] = $pos;
}
// Excessively clever and indulgent code to simultaneously fill $packedTables
// with the packed hashtables, and adjust the elements of $starts
// to actually point to the starts instead of the ends.
$packedTables = array_fill( 0, $this->numentries, false );
foreach ( $this->hplist as $item ) {
$packedTables[--$starts[255 & $item['h']]] = $item;
}
$final = '';
for ( $i = 0; $i < 256; ++$i ) {
$count = $counts[$i];
// The size of the hashtable will be double the item count.
// The rest of the slots will be empty.
$len = $count + $count;
$final .= pack( 'VV', $this->pos, $len );
$hashtable = array();
for ( $u = 0; $u < $len; ++$u ) {
$hashtable[$u] = array( 'h' => 0, 'p' => 0 );
}
// Fill the hashtable, using the next empty slot if the hashed slot
// is taken.
for ( $u = 0; $u < $count; ++$u ) {
$hp = $packedTables[$starts[$i] + $u];
$where = CdbFunctions::unsignedMod(
CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
while ( $hashtable[$where]['p'] )
if ( ++$where == $len )
$where = 0;
$hashtable[$where] = $hp;
}
// Write the hashtable
for ( $u = 0; $u < $len; ++$u ) {
$buf = pack( 'vvV',
$hashtable[$u]['h'] & 0xffff,
CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
$hashtable[$u]['p'] );
$this->write( $buf );
$this->posplus( 8 );
}
}
// Write the pointer array at the start of the file
rewind( $this->handle );
if ( ftell( $this->handle ) != 0 ) {
throw new MWException( __METHOD__.': Error rewinding to start of file' );
}
$this->write( $final );
}
}

View file

@ -104,24 +104,24 @@ class Interwiki {
wfDebug( __METHOD__ . "( $prefix )\n" );
if( !$db ) {
$db = dba_open( $wgInterwikiCache, 'r', 'cdb' );
$db = CdbReader::open( $wgInterwikiCache );
}
/* Resolve site name */
if( $wgInterwikiScopes>=3 && !$site ) {
$site = dba_fetch( '__sites:' . wfWikiID(), $db );
$site = $db->get( '__sites:' . wfWikiID() );
if ( $site == '' ) {
$site = $wgInterwikiFallbackSite;
}
}
$value = dba_fetch( wfMemcKey( $prefix ), $db );
$value = $db->get( wfMemcKey( $prefix ) );
// Site level
if ( $value == '' && $wgInterwikiScopes >= 3 ) {
$value = dba_fetch( "_{$site}:{$prefix}", $db );
$value = $db->get( "_{$site}:{$prefix}" );
}
// Global Level
if ( $value == '' && $wgInterwikiScopes >= 2 ) {
$value = dba_fetch( "__global:{$prefix}", $db );
$value = $db->get( "__global:{$prefix}" );
}
if ( $value == 'undef' )
$value = '';

View file

@ -201,7 +201,7 @@ function makeLink( $entry, $source ) {
array_key_exists($entry['iw_prefix'],$prefixRewrites[$source]))
$entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']];
if ($dbFile)
dba_insert("{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}"),$dbFile);
$dbFile->set( "{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}") );
else
print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n";

View file

@ -19,7 +19,7 @@ chdir( $oldCwd );
# Output
if ( isset( $options['o'] ) ) {
# To database specified with -o
$dbFile = dba_open( $options['o'], "n", "cdb_make" );
$dbFile = CdbWriter::open( $options['o'] );
}
getRebuildInterwikiDump();