[FileBackend]

* Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL, SQLite).
This commit is contained in:
Aaron Schulz 2012-03-13 01:46:33 +00:00
parent 19799ae5bb
commit d19f54602f
10 changed files with 377 additions and 19 deletions

View file

@ -507,6 +507,9 @@ $wgAutoloadLocalClasses = array(
'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php',
'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',

View file

@ -45,6 +45,8 @@ abstract class FileBackend {
protected $readOnly; // string; read-only explanation message
/** @var LockManager */
protected $lockManager;
/** @var FileJournal */
protected $fileJournal;
/**
* Create a new backend instance from configuration.
@ -73,6 +75,9 @@ abstract class FileBackend {
$this->lockManager = ( $config['lockManager'] instanceof LockManager )
? $config['lockManager']
: LockManagerGroup::singleton()->get( $config['lockManager'] );
$this->fileJournal = isset( $config['fileJournal'] )
? FileJournal::factory( $config['fileJournal'], $this->name )
: FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
$this->readOnly = isset( $config['readOnly'] )
? (string)$config['readOnly']
: '';
@ -177,6 +182,8 @@ abstract class FileBackend {
* 'allowStale' : Don't require the latest available data.
* This can increase performance for non-critical writes.
* This has no effect unless the 'force' flag is set.
* 'nonJournaled' : Don't log this operation batch in the file journal.
* This limits the ability of recovery scripts.
*
* Remarks on locking:
* File system paths given to operations should refer to files that are

View file

@ -133,7 +133,7 @@ class FileBackendMultiWrite extends FileBackend {
}
// Actually attempt the operation batch...
$subStatus = FileOp::attemptBatch( $performOps, $opts );
$subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
$success = array();
$failCount = 0;

View file

@ -708,7 +708,7 @@ abstract class FileBackendStore extends FileBackend {
$this->clearCache();
// Actually attempt the operation batch...
$subStatus = FileOp::attemptBatch( $performOps, $opts );
$subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
// Merge errors into status fields
$status->merge( $subStatus );

View file

@ -24,6 +24,7 @@ abstract class FileOp {
protected $state = self::STATE_NEW; // integer
protected $failed = false; // boolean
protected $useLatest = true; // boolean
protected $batchId; // string
protected $sourceSha1; // string
protected $destSameAsSource; // boolean
@ -62,6 +63,16 @@ abstract class FileOp {
$this->params = $params;
}
/**
* Set the batch UUID this operation belongs to
*
* @param $batchId string
* @return void
*/
final protected function setBatchId( $batchId ) {
$this->batchId = $batchId;
}
/**
* Whether to allow stale data for file reads and stat checks
*
@ -73,43 +84,57 @@ abstract class FileOp {
}
/**
* Attempt a series of file operations.
* Attempt to perform a series of file operations.
* Callers are responsible for handling file locking.
*
* $opts is an array of options, including:
* 'force' : Errors that would normally cause a rollback do not.
* The remaining operations are still attempted if any fail.
* 'allowStale' : Don't require the latest available data.
* This can increase performance for non-critical writes.
* This has no effect unless the 'force' flag is set.
*
* 'force' : Errors that would normally cause a rollback do not.
* The remaining operations are still attempted if any fail.
* 'allowStale' : Don't require the latest available data.
* This can increase performance for non-critical writes.
* This has no effect unless the 'force' flag is set.
* 'nonJournaled' : Don't log this operation batch in the file journal.
*
* The resulting Status will be "OK" unless:
* a) unexpected operation errors occurred (network partitions, disk full...)
* b) significant operation errors occured and 'force' was not set
*
* @param $performOps Array List of FileOp operations
* @param $opts Array Batch operation options
* @param $journal FileJournal Journal to log operations to
* @return Status
*/
final public static function attemptBatch( array $performOps, array $opts ) {
final public static function attemptBatch(
array $performOps, array $opts, FileJournal $journal
) {
$status = Status::newGood();
$allowStale = !empty( $opts['allowStale'] );
$ignoreErrors = !empty( $opts['force'] );
$n = count( $performOps );
if ( $n > self::MAX_BATCH_SIZE ) {
$status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE );
return $status;
}
$batchId = $journal->getTimestampedUUID();
$allowStale = !empty( $opts['allowStale'] );
$ignoreErrors = !empty( $opts['force'] );
$journaled = empty( $opts['nonJournaled'] );
$entries = array(); // file journal entries
$predicates = FileOp::newPredicates(); // account for previous op in prechecks
// Do pre-checks for each operation; abort on failure...
foreach ( $performOps as $index => $fileOp ) {
$fileOp->setBatchId( $batchId );
$fileOp->allowStaleReads( $allowStale );
$subStatus = $fileOp->precheck( $predicates );
$oldPredicates = $predicates;
$subStatus = $fileOp->precheck( $predicates ); // updates $predicates
$status->merge( $subStatus );
if ( !$subStatus->isOK() ) { // operation failed?
if ( $subStatus->isOK() ) {
if ( $journaled ) { // journal log entry
$entries = array_merge( $entries,
self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) );
}
} else { // operation failed?
$status->success[$index] = false;
++$status->failCount;
if ( !$ignoreErrors ) {
@ -118,8 +143,15 @@ abstract class FileOp {
}
}
if ( $ignoreErrors ) {
# Treat all precheck() fatals as merely warnings
// Log the operations in file journal...
if ( count( $entries ) ) {
$subStatus = $journal->logChangeBatch( $entries, $batchId );
if ( !$subStatus->isOK() ) {
return $subStatus; // abort
}
}
if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings
$status->setResult( true, $status->value );
}
@ -154,6 +186,46 @@ abstract class FileOp {
return $status;
}
/**
* Get the file journal entries for a single file operation
*
* @param $fileOp FileOp
* @param $oPredicates Array Pre-op information about files
* @param $nPredicates Array Post-op information about files
* @return Array
*/
final protected static function getJournalEntries(
FileOp $fileOp, array $oPredicates, array $nPredicates
) {
$nullEntries = array();
$updateEntries = array();
$deleteEntries = array();
$pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() );
foreach ( $pathsUsed as $path ) {
$nullEntries[] = array( // assertion for recovery
'op' => 'null',
'path' => $path,
'newSha1' => $fileOp->fileSha1( $path, $oPredicates )
);
}
foreach ( $fileOp->storagePathsChanged() as $path ) {
if ( $nPredicates['sha1'][$path] === false ) { // deleted
$deleteEntries[] = array(
'op' => 'delete',
'path' => $path,
'newSha1' => ''
);
} else { // created/updated
$updateEntries[] = array(
'op' => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create',
'path' => $path,
'newSha1' => $nPredicates['sha1'][$path]
);
}
}
return array_merge( $nullEntries, $updateEntries, $deleteEntries );
}
/**
* Get the value of the parameter with the given name
*
@ -352,8 +424,8 @@ abstract class FileOp {
$params = $this->params;
$params['failedAction'] = $action;
try {
wfDebugLog( 'FileOperation',
get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) );
wfDebugLog( 'FileOperation', get_class( $this ) .
" failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) );
} catch ( Exception $e ) {
// bad config? debug log error?
}

View file

@ -0,0 +1,112 @@
<?php
/**
* @file
* @ingroup FileJournal
* @author Aaron Schulz
*/
/**
* Version of FileJournal that logs to a DB table
* @since 1.20
*/
class DBFileJournal extends FileJournal {
protected $wiki = false; // string; wiki DB name
/**
* Construct a new instance from configuration.
* $config includes:
* 'wiki' : wiki name to use for LoadBalancer
*
* @param $config Array
*/
protected function __construct( array $config ) {
parent::__construct( $config );
$this->wiki = $config['wiki'];
}
/**
* @see FileJournal::logChangeBatch()
* @return Status
*/
protected function doLogChangeBatch( array $entries, $batchId ) {
$status = Status::newGood();
$dbw = $this->getMasterDB();
if ( !$dbw ) {
$status->fatal( 'filejournal-fail-dbconnect', $this->backend );
return $status;
}
$now = wfTimestamp( TS_UNIX );
$data = array();
foreach ( $entries as $entry ) {
$data[] = array(
'fj_batch_uuid' => $batchId,
'fj_backend' => $this->backend,
'fj_op' => $entry['op'],
'fj_path' => $entry['path'],
'fj_path_sha1' => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ),
'fj_new_sha1' => $entry['newSha1'],
'fj_timestamp' => $dbw->timestamp( $now )
);
}
try {
$dbw->begin();
$dbw->insert( 'filejournal', $data, __METHOD__ );
$dbw->commit();
} catch ( DBError $e ) {
$status->fatal( 'filejournal-fail-dbquery', $this->backend );
return $status;
}
return $status;
}
/**
* @see FileJournal::purgeOldLogs()
* @return Status
*/
protected function doPurgeOldLogs() {
$status = Status::newGood();
if ( $this->ttlDays <= 0 ) {
return $status; // nothing to do
}
$dbw = $this->getMasterDB();
if ( !$dbw ) {
$status->fatal( 'filejournal-fail-dbconnect', $this->backend );
return $status;
}
$dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
try {
$dbw->begin();
$dbw->delete( 'filejournal',
array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ),
__METHOD__
);
$dbw->commit();
} catch ( DBError $e ) {
$status->fatal( 'filejournal-fail-dbquery', $this->backend );
return $status;
}
return $status;
}
/**
* Get a master connection to the logging DB
*
* @return DatabaseBase|null
*/
protected function getMasterDB() {
try {
$lb = wfGetLBFactory()->newMainLB();
return $lb->getConnection( DB_MASTER, array(), $this->wiki );
} catch ( DBConnectionError $e ) {
return null;
}
}
}

View file

@ -0,0 +1,131 @@
<?php
/**
* @defgroup FileJournal File journal
* @ingroup FileBackend
*/
/**
* @file
* @ingroup FileJournal
* @author Aaron Schulz
*/
/**
* @brief Class for handling file operation journaling.
*
* Subclasses should avoid throwing exceptions at all costs.
*
* @ingroup FileJournal
* @since 1.20
*/
abstract class FileJournal {
protected $backend; // string
protected $ttlDays; // integer
/**
* Construct a new instance from configuration.
* $config includes:
* 'ttlDays' : days to keep log entries around (false means "forever")
*
* @param $config Array
*/
protected function __construct( array $config ) {
$this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false;
}
/**
* Create an appropriate FileJournal object from config
*
* @param $config Array
* @param $backend string A registered file backend name
* @return FileJournal
*/
final public static function factory( array $config, $backend ) {
$class = $config['class'];
$jrn = new $class( $config );
if ( !$jrn instanceof self ) {
throw new MWException( "Class given is not an instance of FileJournal." );
}
$jrn->backend = $backend;
return $jrn;
}
/**
* Get a statistically unique ID string
*
* @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars>
*/
final public function getTimestampedUUID() {
$s = '';
for ( $i = 0; $i < 5; $i++ ) {
$s .= mt_rand( 0, 2147483647 );
}
$s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 );
}
/**
* Log changes made by a batch file operation.
* $entries is an array of log entries, each of which contains:
* op : Basic operation name (create, store, copy, delete)
* path : The storage path of the file
* newSha1 : The final base 36 SHA-1 of the file
* Note that 'false' should be used as the SHA-1 for non-existing files.
*
* @param $entries Array List of file operations (each an array of parameters)
* @param $batchId string UUID string that identifies the operation batch
* @return Status
*/
final public function logChangeBatch( array $entries, $batchId ) {
if ( !count( $entries ) ) {
return Status::newGood();
}
return $this->doLogChangeBatch( $entries, $batchId );
}
/**
* @see FileJournal::logChangeBatch()
*
* @param $entries Array List of file operations (each an array of parameters)
* @param $batchId string UUID string that identifies the operation batch
* @return Status
*/
abstract protected function doLogChangeBatch( array $entries, $batchId );
/**
* Purge any old log entries
*
* @return Status
*/
final public function purgeOldLogs() {
return $this->doPurgeOldLogs();
}
/**
* @see FileJournal::purgeOldLogs()
* @return Status
*/
abstract protected function doPurgeOldLogs();
}
/**
* Simple version of FileJournal that does nothing
* @since 1.20
*/
class NullFileJournal extends FileJournal {
/**
* @see FileJournal::logChangeBatch()
* @return Status
*/
protected function doLogChangeBatch( array $entries, $batchId ) {
return Status::newGood();
}
/**
* @see FileJournal::purgeOldLogs()
* @return Status
*/
protected function doPurgeOldLogs() {
return Status::newGood();
}
}

View file

@ -2274,6 +2274,10 @@ If the problem persists, contact an [[Special:ListUsers/sysop|administrator]].',
'backend-fail-contenttype' => 'Could not determine the content type of the file to store at "$1".',
'backend-fail-batchsize' => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.',
# File journal
'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".',
'filejournal-fail-dbquery' => 'Could not update the journal database for storage backend "$1".',
# Lock manager
'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.',
'lockmanager-fail-closelock' => 'Could not close lock file for "$1".',

View file

@ -0,0 +1,24 @@
-- File backend operation journal
CREATE TABLE /*_*/filejournal (
-- Unique ID for each file operation
fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
-- UUID of the batch this operation belongs to
fj_batch_uuid varbinary(32) NOT NULL,
-- The registered file backend name
fj_backend varchar(255) NOT NULL,
-- The storage path that was affected (may be internal paths)
fj_path blob NOT NULL,
-- SHA-1 file path hash in base-36
fj_path_sha1 varbinary(32) NOT NULL default '',
-- Primitive operation description (create/update/delete)
fj_op varchar(16) NOT NULL default '',
-- SHA-1 file content hash in base-36
fj_new_sha1 varbinary(32) NOT NULL default '',
-- Timestamp of the batch operation
fj_timestamp varbinary(14) NOT NULL default ''
);
CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);

View file

@ -1377,6 +1377,11 @@ $wgMessageStructure = array(
'backend-fail-batchsize'
),
'filejournal-errors' => array(
'filejournal-fail-dbconnect',
'filejournal-fail-dbquery'
),
'lockmanager-errors' => array(
'lockmanager-notlocked',
'lockmanager-fail-closelock',