[FileBackend]
* Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing. * Added the optional schema change required for using the DBFileJournal (MySQL, SQLite).
This commit is contained in:
parent
19799ae5bb
commit
d19f54602f
10 changed files with 377 additions and 19 deletions
|
|
@ -507,6 +507,9 @@ $wgAutoloadLocalClasses = array(
|
|||
'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
|
||||
'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
|
||||
'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
|
||||
'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
|
||||
'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
|
||||
'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
|
||||
'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
|
||||
'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php',
|
||||
'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ abstract class FileBackend {
|
|||
protected $readOnly; // string; read-only explanation message
|
||||
/** @var LockManager */
|
||||
protected $lockManager;
|
||||
/** @var FileJournal */
|
||||
protected $fileJournal;
|
||||
|
||||
/**
|
||||
* Create a new backend instance from configuration.
|
||||
|
|
@ -73,6 +75,9 @@ abstract class FileBackend {
|
|||
$this->lockManager = ( $config['lockManager'] instanceof LockManager )
|
||||
? $config['lockManager']
|
||||
: LockManagerGroup::singleton()->get( $config['lockManager'] );
|
||||
$this->fileJournal = isset( $config['fileJournal'] )
|
||||
? FileJournal::factory( $config['fileJournal'], $this->name )
|
||||
: FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
|
||||
$this->readOnly = isset( $config['readOnly'] )
|
||||
? (string)$config['readOnly']
|
||||
: '';
|
||||
|
|
@ -177,6 +182,8 @@ abstract class FileBackend {
|
|||
* 'allowStale' : Don't require the latest available data.
|
||||
* This can increase performance for non-critical writes.
|
||||
* This has no effect unless the 'force' flag is set.
|
||||
* 'nonJournaled' : Don't log this operation batch in the file journal.
|
||||
* This limits the ability of recovery scripts.
|
||||
*
|
||||
* Remarks on locking:
|
||||
* File system paths given to operations should refer to files that are
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ class FileBackendMultiWrite extends FileBackend {
|
|||
}
|
||||
|
||||
// Actually attempt the operation batch...
|
||||
$subStatus = FileOp::attemptBatch( $performOps, $opts );
|
||||
$subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
|
||||
|
||||
$success = array();
|
||||
$failCount = 0;
|
||||
|
|
|
|||
|
|
@ -708,7 +708,7 @@ abstract class FileBackendStore extends FileBackend {
|
|||
$this->clearCache();
|
||||
|
||||
// Actually attempt the operation batch...
|
||||
$subStatus = FileOp::attemptBatch( $performOps, $opts );
|
||||
$subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
|
||||
|
||||
// Merge errors into status fields
|
||||
$status->merge( $subStatus );
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ abstract class FileOp {
|
|||
protected $state = self::STATE_NEW; // integer
|
||||
protected $failed = false; // boolean
|
||||
protected $useLatest = true; // boolean
|
||||
protected $batchId; // string
|
||||
|
||||
protected $sourceSha1; // string
|
||||
protected $destSameAsSource; // boolean
|
||||
|
|
@ -62,6 +63,16 @@ abstract class FileOp {
|
|||
$this->params = $params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the batch UUID this operation belongs to
|
||||
*
|
||||
* @param $batchId string
|
||||
* @return void
|
||||
*/
|
||||
final protected function setBatchId( $batchId ) {
|
||||
$this->batchId = $batchId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to allow stale data for file reads and stat checks
|
||||
*
|
||||
|
|
@ -73,43 +84,57 @@ abstract class FileOp {
|
|||
}
|
||||
|
||||
/**
|
||||
* Attempt a series of file operations.
|
||||
* Attempt to perform a series of file operations.
|
||||
* Callers are responsible for handling file locking.
|
||||
*
|
||||
* $opts is an array of options, including:
|
||||
* 'force' : Errors that would normally cause a rollback do not.
|
||||
* The remaining operations are still attempted if any fail.
|
||||
* 'allowStale' : Don't require the latest available data.
|
||||
* This can increase performance for non-critical writes.
|
||||
* This has no effect unless the 'force' flag is set.
|
||||
*
|
||||
* 'force' : Errors that would normally cause a rollback do not.
|
||||
* The remaining operations are still attempted if any fail.
|
||||
* 'allowStale' : Don't require the latest available data.
|
||||
* This can increase performance for non-critical writes.
|
||||
* This has no effect unless the 'force' flag is set.
|
||||
* 'nonJournaled' : Don't log this operation batch in the file journal.
|
||||
*
|
||||
* The resulting Status will be "OK" unless:
|
||||
* a) unexpected operation errors occurred (network partitions, disk full...)
|
||||
* b) significant operation errors occured and 'force' was not set
|
||||
*
|
||||
* @param $performOps Array List of FileOp operations
|
||||
* @param $opts Array Batch operation options
|
||||
* @param $journal FileJournal Journal to log operations to
|
||||
* @return Status
|
||||
*/
|
||||
final public static function attemptBatch( array $performOps, array $opts ) {
|
||||
final public static function attemptBatch(
|
||||
array $performOps, array $opts, FileJournal $journal
|
||||
) {
|
||||
$status = Status::newGood();
|
||||
|
||||
$allowStale = !empty( $opts['allowStale'] );
|
||||
$ignoreErrors = !empty( $opts['force'] );
|
||||
|
||||
$n = count( $performOps );
|
||||
if ( $n > self::MAX_BATCH_SIZE ) {
|
||||
$status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE );
|
||||
return $status;
|
||||
}
|
||||
|
||||
$batchId = $journal->getTimestampedUUID();
|
||||
$allowStale = !empty( $opts['allowStale'] );
|
||||
$ignoreErrors = !empty( $opts['force'] );
|
||||
$journaled = empty( $opts['nonJournaled'] );
|
||||
|
||||
$entries = array(); // file journal entries
|
||||
$predicates = FileOp::newPredicates(); // account for previous op in prechecks
|
||||
// Do pre-checks for each operation; abort on failure...
|
||||
foreach ( $performOps as $index => $fileOp ) {
|
||||
$fileOp->setBatchId( $batchId );
|
||||
$fileOp->allowStaleReads( $allowStale );
|
||||
$subStatus = $fileOp->precheck( $predicates );
|
||||
$oldPredicates = $predicates;
|
||||
$subStatus = $fileOp->precheck( $predicates ); // updates $predicates
|
||||
$status->merge( $subStatus );
|
||||
if ( !$subStatus->isOK() ) { // operation failed?
|
||||
if ( $subStatus->isOK() ) {
|
||||
if ( $journaled ) { // journal log entry
|
||||
$entries = array_merge( $entries,
|
||||
self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) );
|
||||
}
|
||||
} else { // operation failed?
|
||||
$status->success[$index] = false;
|
||||
++$status->failCount;
|
||||
if ( !$ignoreErrors ) {
|
||||
|
|
@ -118,8 +143,15 @@ abstract class FileOp {
|
|||
}
|
||||
}
|
||||
|
||||
if ( $ignoreErrors ) {
|
||||
# Treat all precheck() fatals as merely warnings
|
||||
// Log the operations in file journal...
|
||||
if ( count( $entries ) ) {
|
||||
$subStatus = $journal->logChangeBatch( $entries, $batchId );
|
||||
if ( !$subStatus->isOK() ) {
|
||||
return $subStatus; // abort
|
||||
}
|
||||
}
|
||||
|
||||
if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings
|
||||
$status->setResult( true, $status->value );
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +186,46 @@ abstract class FileOp {
|
|||
return $status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file journal entries for a single file operation
|
||||
*
|
||||
* @param $fileOp FileOp
|
||||
* @param $oPredicates Array Pre-op information about files
|
||||
* @param $nPredicates Array Post-op information about files
|
||||
* @return Array
|
||||
*/
|
||||
final protected static function getJournalEntries(
|
||||
FileOp $fileOp, array $oPredicates, array $nPredicates
|
||||
) {
|
||||
$nullEntries = array();
|
||||
$updateEntries = array();
|
||||
$deleteEntries = array();
|
||||
$pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() );
|
||||
foreach ( $pathsUsed as $path ) {
|
||||
$nullEntries[] = array( // assertion for recovery
|
||||
'op' => 'null',
|
||||
'path' => $path,
|
||||
'newSha1' => $fileOp->fileSha1( $path, $oPredicates )
|
||||
);
|
||||
}
|
||||
foreach ( $fileOp->storagePathsChanged() as $path ) {
|
||||
if ( $nPredicates['sha1'][$path] === false ) { // deleted
|
||||
$deleteEntries[] = array(
|
||||
'op' => 'delete',
|
||||
'path' => $path,
|
||||
'newSha1' => ''
|
||||
);
|
||||
} else { // created/updated
|
||||
$updateEntries[] = array(
|
||||
'op' => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create',
|
||||
'path' => $path,
|
||||
'newSha1' => $nPredicates['sha1'][$path]
|
||||
);
|
||||
}
|
||||
}
|
||||
return array_merge( $nullEntries, $updateEntries, $deleteEntries );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of the parameter with the given name
|
||||
*
|
||||
|
|
@ -352,8 +424,8 @@ abstract class FileOp {
|
|||
$params = $this->params;
|
||||
$params['failedAction'] = $action;
|
||||
try {
|
||||
wfDebugLog( 'FileOperation',
|
||||
get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) );
|
||||
wfDebugLog( 'FileOperation', get_class( $this ) .
|
||||
" failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) );
|
||||
} catch ( Exception $e ) {
|
||||
// bad config? debug log error?
|
||||
}
|
||||
|
|
|
|||
112
includes/filerepo/backend/filejournal/DBFileJournal.php
Normal file
112
includes/filerepo/backend/filejournal/DBFileJournal.php
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
<?php
|
||||
/**
|
||||
* @file
|
||||
* @ingroup FileJournal
|
||||
* @author Aaron Schulz
|
||||
*/
|
||||
|
||||
/**
|
||||
* Version of FileJournal that logs to a DB table
|
||||
* @since 1.20
|
||||
*/
|
||||
class DBFileJournal extends FileJournal {
|
||||
protected $wiki = false; // string; wiki DB name
|
||||
|
||||
/**
|
||||
* Construct a new instance from configuration.
|
||||
* $config includes:
|
||||
* 'wiki' : wiki name to use for LoadBalancer
|
||||
*
|
||||
* @param $config Array
|
||||
*/
|
||||
protected function __construct( array $config ) {
|
||||
parent::__construct( $config );
|
||||
|
||||
$this->wiki = $config['wiki'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FileJournal::logChangeBatch()
|
||||
* @return Status
|
||||
*/
|
||||
protected function doLogChangeBatch( array $entries, $batchId ) {
|
||||
$status = Status::newGood();
|
||||
|
||||
$dbw = $this->getMasterDB();
|
||||
if ( !$dbw ) {
|
||||
$status->fatal( 'filejournal-fail-dbconnect', $this->backend );
|
||||
return $status;
|
||||
}
|
||||
$now = wfTimestamp( TS_UNIX );
|
||||
|
||||
$data = array();
|
||||
foreach ( $entries as $entry ) {
|
||||
$data[] = array(
|
||||
'fj_batch_uuid' => $batchId,
|
||||
'fj_backend' => $this->backend,
|
||||
'fj_op' => $entry['op'],
|
||||
'fj_path' => $entry['path'],
|
||||
'fj_path_sha1' => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ),
|
||||
'fj_new_sha1' => $entry['newSha1'],
|
||||
'fj_timestamp' => $dbw->timestamp( $now )
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
$dbw->begin();
|
||||
$dbw->insert( 'filejournal', $data, __METHOD__ );
|
||||
$dbw->commit();
|
||||
} catch ( DBError $e ) {
|
||||
$status->fatal( 'filejournal-fail-dbquery', $this->backend );
|
||||
return $status;
|
||||
}
|
||||
|
||||
return $status;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FileJournal::purgeOldLogs()
|
||||
* @return Status
|
||||
*/
|
||||
protected function doPurgeOldLogs() {
|
||||
$status = Status::newGood();
|
||||
if ( $this->ttlDays <= 0 ) {
|
||||
return $status; // nothing to do
|
||||
}
|
||||
|
||||
$dbw = $this->getMasterDB();
|
||||
if ( !$dbw ) {
|
||||
$status->fatal( 'filejournal-fail-dbconnect', $this->backend );
|
||||
return $status;
|
||||
}
|
||||
$dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
|
||||
|
||||
try {
|
||||
$dbw->begin();
|
||||
$dbw->delete( 'filejournal',
|
||||
array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ),
|
||||
__METHOD__
|
||||
);
|
||||
$dbw->commit();
|
||||
} catch ( DBError $e ) {
|
||||
$status->fatal( 'filejournal-fail-dbquery', $this->backend );
|
||||
return $status;
|
||||
}
|
||||
|
||||
return $status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a master connection to the logging DB
|
||||
*
|
||||
* @return DatabaseBase|null
|
||||
*/
|
||||
protected function getMasterDB() {
|
||||
try {
|
||||
$lb = wfGetLBFactory()->newMainLB();
|
||||
return $lb->getConnection( DB_MASTER, array(), $this->wiki );
|
||||
} catch ( DBConnectionError $e ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
131
includes/filerepo/backend/filejournal/FileJournal.php
Normal file
131
includes/filerepo/backend/filejournal/FileJournal.php
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
<?php
|
||||
/**
|
||||
* @defgroup FileJournal File journal
|
||||
* @ingroup FileBackend
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @ingroup FileJournal
|
||||
* @author Aaron Schulz
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Class for handling file operation journaling.
|
||||
*
|
||||
* Subclasses should avoid throwing exceptions at all costs.
|
||||
*
|
||||
* @ingroup FileJournal
|
||||
* @since 1.20
|
||||
*/
|
||||
abstract class FileJournal {
|
||||
protected $backend; // string
|
||||
protected $ttlDays; // integer
|
||||
|
||||
/**
|
||||
* Construct a new instance from configuration.
|
||||
* $config includes:
|
||||
* 'ttlDays' : days to keep log entries around (false means "forever")
|
||||
*
|
||||
* @param $config Array
|
||||
*/
|
||||
protected function __construct( array $config ) {
|
||||
$this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an appropriate FileJournal object from config
|
||||
*
|
||||
* @param $config Array
|
||||
* @param $backend string A registered file backend name
|
||||
* @return FileJournal
|
||||
*/
|
||||
final public static function factory( array $config, $backend ) {
|
||||
$class = $config['class'];
|
||||
$jrn = new $class( $config );
|
||||
if ( !$jrn instanceof self ) {
|
||||
throw new MWException( "Class given is not an instance of FileJournal." );
|
||||
}
|
||||
$jrn->backend = $backend;
|
||||
return $jrn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a statistically unique ID string
|
||||
*
|
||||
* @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars>
|
||||
*/
|
||||
final public function getTimestampedUUID() {
|
||||
$s = '';
|
||||
for ( $i = 0; $i < 5; $i++ ) {
|
||||
$s .= mt_rand( 0, 2147483647 );
|
||||
}
|
||||
$s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
|
||||
return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Log changes made by a batch file operation.
|
||||
* $entries is an array of log entries, each of which contains:
|
||||
* op : Basic operation name (create, store, copy, delete)
|
||||
* path : The storage path of the file
|
||||
* newSha1 : The final base 36 SHA-1 of the file
|
||||
* Note that 'false' should be used as the SHA-1 for non-existing files.
|
||||
*
|
||||
* @param $entries Array List of file operations (each an array of parameters)
|
||||
* @param $batchId string UUID string that identifies the operation batch
|
||||
* @return Status
|
||||
*/
|
||||
final public function logChangeBatch( array $entries, $batchId ) {
|
||||
if ( !count( $entries ) ) {
|
||||
return Status::newGood();
|
||||
}
|
||||
return $this->doLogChangeBatch( $entries, $batchId );
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FileJournal::logChangeBatch()
|
||||
*
|
||||
* @param $entries Array List of file operations (each an array of parameters)
|
||||
* @param $batchId string UUID string that identifies the operation batch
|
||||
* @return Status
|
||||
*/
|
||||
abstract protected function doLogChangeBatch( array $entries, $batchId );
|
||||
|
||||
/**
|
||||
* Purge any old log entries
|
||||
*
|
||||
* @return Status
|
||||
*/
|
||||
final public function purgeOldLogs() {
|
||||
return $this->doPurgeOldLogs();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FileJournal::purgeOldLogs()
|
||||
* @return Status
|
||||
*/
|
||||
abstract protected function doPurgeOldLogs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple version of FileJournal that does nothing
|
||||
* @since 1.20
|
||||
*/
|
||||
class NullFileJournal extends FileJournal {
|
||||
/**
|
||||
* @see FileJournal::logChangeBatch()
|
||||
* @return Status
|
||||
*/
|
||||
protected function doLogChangeBatch( array $entries, $batchId ) {
|
||||
return Status::newGood();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FileJournal::purgeOldLogs()
|
||||
* @return Status
|
||||
*/
|
||||
protected function doPurgeOldLogs() {
|
||||
return Status::newGood();
|
||||
}
|
||||
}
|
||||
|
|
@ -2274,6 +2274,10 @@ If the problem persists, contact an [[Special:ListUsers/sysop|administrator]].',
|
|||
'backend-fail-contenttype' => 'Could not determine the content type of the file to store at "$1".',
|
||||
'backend-fail-batchsize' => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.',
|
||||
|
||||
# File journal
|
||||
'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".',
|
||||
'filejournal-fail-dbquery' => 'Could not update the journal database for storage backend "$1".',
|
||||
|
||||
# Lock manager
|
||||
'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.',
|
||||
'lockmanager-fail-closelock' => 'Could not close lock file for "$1".',
|
||||
|
|
|
|||
24
maintenance/archives/patch-filejournal.sql
Normal file
24
maintenance/archives/patch-filejournal.sql
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
-- File backend operation journal
|
||||
CREATE TABLE /*_*/filejournal (
|
||||
-- Unique ID for each file operation
|
||||
fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
|
||||
-- UUID of the batch this operation belongs to
|
||||
fj_batch_uuid varbinary(32) NOT NULL,
|
||||
-- The registered file backend name
|
||||
fj_backend varchar(255) NOT NULL,
|
||||
-- The storage path that was affected (may be internal paths)
|
||||
fj_path blob NOT NULL,
|
||||
-- SHA-1 file path hash in base-36
|
||||
fj_path_sha1 varbinary(32) NOT NULL default '',
|
||||
-- Primitive operation description (create/update/delete)
|
||||
fj_op varchar(16) NOT NULL default '',
|
||||
-- SHA-1 file content hash in base-36
|
||||
fj_new_sha1 varbinary(32) NOT NULL default '',
|
||||
-- Timestamp of the batch operation
|
||||
fj_timestamp varbinary(14) NOT NULL default ''
|
||||
);
|
||||
|
||||
CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
|
||||
CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
|
||||
CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
|
||||
CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);
|
||||
|
|
@ -1377,6 +1377,11 @@ $wgMessageStructure = array(
|
|||
'backend-fail-batchsize'
|
||||
),
|
||||
|
||||
'filejournal-errors' => array(
|
||||
'filejournal-fail-dbconnect',
|
||||
'filejournal-fail-dbquery'
|
||||
),
|
||||
|
||||
'lockmanager-errors' => array(
|
||||
'lockmanager-notlocked',
|
||||
'lockmanager-fail-closelock',
|
||||
|
|
|
|||
Loading…
Reference in a new issue