Reinstated r94289 et all - rev_sha1/ar_sha1 field for bug 21860
This commit is contained in:
parent
d6ac929842
commit
c326cab516
11 changed files with 178 additions and 14 deletions
|
|
@ -865,6 +865,7 @@ $wgAutoloadLocalClasses = array(
|
||||||
'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
|
'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
|
||||||
'PopulateParentId' => 'maintenance/populateParentId.php',
|
'PopulateParentId' => 'maintenance/populateParentId.php',
|
||||||
'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php',
|
'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php',
|
||||||
|
'PopulateRevisionSha1' => 'maintenance/populateRevisionSha1.php',
|
||||||
'SevenZipStream' => 'maintenance/7zip.inc',
|
'SevenZipStream' => 'maintenance/7zip.inc',
|
||||||
'Sqlite' => 'maintenance/sqlite.inc',
|
'Sqlite' => 'maintenance/sqlite.inc',
|
||||||
'UpdateCollation' => 'maintenance/updateCollation.php',
|
'UpdateCollation' => 'maintenance/updateCollation.php',
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ class Revision {
|
||||||
protected $mTimestamp;
|
protected $mTimestamp;
|
||||||
protected $mDeleted;
|
protected $mDeleted;
|
||||||
protected $mSize;
|
protected $mSize;
|
||||||
|
protected $mSha1;
|
||||||
protected $mParentId;
|
protected $mParentId;
|
||||||
protected $mComment;
|
protected $mComment;
|
||||||
protected $mText;
|
protected $mText;
|
||||||
|
|
@ -122,7 +123,8 @@ class Revision {
|
||||||
'minor_edit' => $row->ar_minor_edit,
|
'minor_edit' => $row->ar_minor_edit,
|
||||||
'text_id' => isset( $row->ar_text_id ) ? $row->ar_text_id : null,
|
'text_id' => isset( $row->ar_text_id ) ? $row->ar_text_id : null,
|
||||||
'deleted' => $row->ar_deleted,
|
'deleted' => $row->ar_deleted,
|
||||||
'len' => $row->ar_len
|
'len' => $row->ar_len,
|
||||||
|
'sha1' => $row->ar_sha1
|
||||||
);
|
);
|
||||||
if ( isset( $row->ar_text ) && !$row->ar_text_id ) {
|
if ( isset( $row->ar_text ) && !$row->ar_text_id ) {
|
||||||
// Pre-1.5 ar_text row
|
// Pre-1.5 ar_text row
|
||||||
|
|
@ -313,7 +315,8 @@ class Revision {
|
||||||
'rev_minor_edit',
|
'rev_minor_edit',
|
||||||
'rev_deleted',
|
'rev_deleted',
|
||||||
'rev_len',
|
'rev_len',
|
||||||
'rev_parent_id'
|
'rev_parent_id',
|
||||||
|
'rev_sha1'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -375,6 +378,12 @@ class Revision {
|
||||||
$this->mSize = intval( $row->rev_len );
|
$this->mSize = intval( $row->rev_len );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !isset( $row->rev_sha1 ) ) {
|
||||||
|
$this->mSha1 = null;
|
||||||
|
} else {
|
||||||
|
$this->mSha1 = $row->rev_sha1;
|
||||||
|
}
|
||||||
|
|
||||||
if( isset( $row->page_latest ) ) {
|
if( isset( $row->page_latest ) ) {
|
||||||
$this->mCurrent = ( $row->rev_id == $row->page_latest );
|
$this->mCurrent = ( $row->rev_id == $row->page_latest );
|
||||||
$this->mTitle = Title::newFromRow( $row );
|
$this->mTitle = Title::newFromRow( $row );
|
||||||
|
|
@ -402,7 +411,7 @@ class Revision {
|
||||||
$this->mOrigUserText = $row->rev_user_text;
|
$this->mOrigUserText = $row->rev_user_text;
|
||||||
} elseif( is_array( $row ) ) {
|
} elseif( is_array( $row ) ) {
|
||||||
// Build a new revision to be saved...
|
// Build a new revision to be saved...
|
||||||
global $wgUser;
|
global $wgUser; // ugh
|
||||||
|
|
||||||
$this->mId = isset( $row['id'] ) ? intval( $row['id'] ) : null;
|
$this->mId = isset( $row['id'] ) ? intval( $row['id'] ) : null;
|
||||||
$this->mPage = isset( $row['page'] ) ? intval( $row['page'] ) : null;
|
$this->mPage = isset( $row['page'] ) ? intval( $row['page'] ) : null;
|
||||||
|
|
@ -414,6 +423,7 @@ class Revision {
|
||||||
$this->mDeleted = isset( $row['deleted'] ) ? intval( $row['deleted'] ) : 0;
|
$this->mDeleted = isset( $row['deleted'] ) ? intval( $row['deleted'] ) : 0;
|
||||||
$this->mSize = isset( $row['len'] ) ? intval( $row['len'] ) : null;
|
$this->mSize = isset( $row['len'] ) ? intval( $row['len'] ) : null;
|
||||||
$this->mParentId = isset( $row['parent_id'] ) ? intval( $row['parent_id'] ) : null;
|
$this->mParentId = isset( $row['parent_id'] ) ? intval( $row['parent_id'] ) : null;
|
||||||
|
$this->mSha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null;
|
||||||
|
|
||||||
// Enforce spacing trimming on supplied text
|
// Enforce spacing trimming on supplied text
|
||||||
$this->mComment = isset( $row['comment'] ) ? trim( strval( $row['comment'] ) ) : null;
|
$this->mComment = isset( $row['comment'] ) ? trim( strval( $row['comment'] ) ) : null;
|
||||||
|
|
@ -422,10 +432,14 @@ class Revision {
|
||||||
|
|
||||||
$this->mTitle = null; # Load on demand if needed
|
$this->mTitle = null; # Load on demand if needed
|
||||||
$this->mCurrent = false;
|
$this->mCurrent = false;
|
||||||
# If we still have no len_size, see it we have the text to figure it out
|
# If we still have no length, see it we have the text to figure it out
|
||||||
if ( !$this->mSize ) {
|
if ( !$this->mSize ) {
|
||||||
$this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText );
|
$this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText );
|
||||||
}
|
}
|
||||||
|
# Same for sha1
|
||||||
|
if ( $this->mSha1 === null ) {
|
||||||
|
$this->mSha1 = is_null( $this->mText ) ? null : self::base36Sha1( $this->mText );
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new MWException( 'Revision constructor passed invalid row format.' );
|
throw new MWException( 'Revision constructor passed invalid row format.' );
|
||||||
}
|
}
|
||||||
|
|
@ -468,6 +482,15 @@ class Revision {
|
||||||
return $this->mSize;
|
return $this->mSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the base36 sha1 of the text in this revision, or null if unknown.
|
||||||
|
*
|
||||||
|
* @return String
|
||||||
|
*/
|
||||||
|
public function getSha1() {
|
||||||
|
return $this->mSha1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the title of the page associated with this entry.
|
* Returns the title of the page associated with this entry.
|
||||||
*
|
*
|
||||||
|
|
@ -938,8 +961,12 @@ class Revision {
|
||||||
'rev_timestamp' => $dbw->timestamp( $this->mTimestamp ),
|
'rev_timestamp' => $dbw->timestamp( $this->mTimestamp ),
|
||||||
'rev_deleted' => $this->mDeleted,
|
'rev_deleted' => $this->mDeleted,
|
||||||
'rev_len' => $this->mSize,
|
'rev_len' => $this->mSize,
|
||||||
'rev_parent_id' => is_null($this->mParentId) ?
|
'rev_parent_id' => is_null( $this->mParentId )
|
||||||
$this->getPreviousRevisionId( $dbw ) : $this->mParentId
|
? $this->getPreviousRevisionId( $dbw )
|
||||||
|
: $this->mParentId,
|
||||||
|
'rev_sha1' => is_null( $this->mSha1 )
|
||||||
|
? Revision::base36Sha1( $this->mText )
|
||||||
|
: $this->mSha1
|
||||||
), __METHOD__
|
), __METHOD__
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -951,6 +978,15 @@ class Revision {
|
||||||
return $this->mId;
|
return $this->mId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the base 36 SHA-1 value for a string of text
|
||||||
|
* @param $text String
|
||||||
|
* @return String
|
||||||
|
*/
|
||||||
|
public static function base36Sha1( $text ) {
|
||||||
|
return wfBaseConvert( sha1( $text ), 16, 36, 31 );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lazy-load the revision's text.
|
* Lazy-load the revision's text.
|
||||||
* Currently hardcoded to the 'text' table storage engine.
|
* Currently hardcoded to the 'text' table storage engine.
|
||||||
|
|
|
||||||
|
|
@ -1667,7 +1667,8 @@ class WikiPage extends Page {
|
||||||
'ar_flags' => '\'\'', // MySQL's "strict mode"...
|
'ar_flags' => '\'\'', // MySQL's "strict mode"...
|
||||||
'ar_len' => 'rev_len',
|
'ar_len' => 'rev_len',
|
||||||
'ar_page_id' => 'page_id',
|
'ar_page_id' => 'page_id',
|
||||||
'ar_deleted' => $bitfield
|
'ar_deleted' => $bitfield,
|
||||||
|
'ar_sha1' => 'rev_sha1'
|
||||||
), array(
|
), array(
|
||||||
'page_id' => $id,
|
'page_id' => $id,
|
||||||
'page_id = rev_page'
|
'page_id = rev_page'
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,9 @@ abstract class DatabaseUpdater {
|
||||||
|
|
||||||
protected $postDatabaseUpdateMaintenance = array(
|
protected $postDatabaseUpdateMaintenance = array(
|
||||||
'DeleteDefaultMessages',
|
'DeleteDefaultMessages',
|
||||||
'PopulateRevisionLength'
|
'PopulateRevisionLength',
|
||||||
|
'PopulateRevisionSha1',
|
||||||
|
'PopulateImageSha1'
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -187,7 +187,8 @@ class MysqlUpdater extends DatabaseUpdater {
|
||||||
array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
|
array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
|
||||||
array( 'doMigrateUserOptions' ),
|
array( 'doMigrateUserOptions' ),
|
||||||
array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
|
array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
|
||||||
|
array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ),
|
||||||
|
array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' )
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,8 @@ class SqliteUpdater extends DatabaseUpdater {
|
||||||
array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
|
array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
|
||||||
array( 'doMigrateUserOptions' ),
|
array( 'doMigrateUserOptions' ),
|
||||||
array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
|
array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
|
||||||
|
array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ),
|
||||||
|
array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' )
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ class PageArchive {
|
||||||
$res = $dbr->select( 'archive',
|
$res = $dbr->select( 'archive',
|
||||||
array(
|
array(
|
||||||
'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text',
|
'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text',
|
||||||
'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id'
|
'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1'
|
||||||
),
|
),
|
||||||
array( 'ar_namespace' => $this->title->getNamespace(),
|
array( 'ar_namespace' => $this->title->getNamespace(),
|
||||||
'ar_title' => $this->title->getDBkey() ),
|
'ar_title' => $this->title->getDBkey() ),
|
||||||
|
|
@ -460,7 +460,8 @@ class PageArchive {
|
||||||
'ar_text_id',
|
'ar_text_id',
|
||||||
'ar_deleted',
|
'ar_deleted',
|
||||||
'ar_page_id',
|
'ar_page_id',
|
||||||
'ar_len' ),
|
'ar_len',
|
||||||
|
'ar_sha1' ),
|
||||||
/* WHERE */ array(
|
/* WHERE */ array(
|
||||||
'ar_namespace' => $this->title->getNamespace(),
|
'ar_namespace' => $this->title->getNamespace(),
|
||||||
'ar_title' => $this->title->getDBkey(),
|
'ar_title' => $this->title->getDBkey(),
|
||||||
|
|
|
||||||
3
maintenance/archives/patch-ar_sha1.sql
Normal file
3
maintenance/archives/patch-ar_sha1.sql
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- Adding ar_sha1 field
|
||||||
|
ALTER TABLE /*$wgDBprefix*/archive
|
||||||
|
ADD ar_sha1 varbinary(32) NOT NULL default '';
|
||||||
3
maintenance/archives/patch-rev_sha1.sql
Normal file
3
maintenance/archives/patch-rev_sha1.sql
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- Adding rev_sha1 field
|
||||||
|
ALTER TABLE /*$wgDBprefix*/revision
|
||||||
|
ADD rev_sha1 varbinary(32) NOT NULL default '';
|
||||||
108
maintenance/populateRevisionSha1.php
Normal file
108
maintenance/populateRevisionSha1.php
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Fills the rev_sha1 and ar_sha1 columns of revision
|
||||||
|
* and archive tables for revisions created before MW 1.19.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
* http://www.gnu.org/copyleft/gpl.html
|
||||||
|
*
|
||||||
|
* @ingroup Maintenance
|
||||||
|
*/
|
||||||
|
|
||||||
|
require_once( dirname( __FILE__ ) . '/Maintenance.php' );
|
||||||
|
|
||||||
|
class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
|
||||||
|
public function __construct() {
|
||||||
|
parent::__construct();
|
||||||
|
$this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
|
||||||
|
$this->setBatchSize( 200 );
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getUpdateKey() {
|
||||||
|
return 'populate rev_sha1';
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function doDBUpdates() {
|
||||||
|
$db = $this->getDB( DB_MASTER );
|
||||||
|
if ( !$db->tableExists( 'revision' ) ) {
|
||||||
|
$this->error( "revision table does not exist", true );
|
||||||
|
}
|
||||||
|
if ( !$db->tableExists( 'archive' ) ) {
|
||||||
|
$this->error( "archive table does not exist", true );
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->output( "Populating rev_sha1 column\n" );
|
||||||
|
$rc = $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' );
|
||||||
|
|
||||||
|
$this->output( "Populating ar_sha1 column\n" );
|
||||||
|
$ac = $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' );
|
||||||
|
|
||||||
|
$this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Integer Rows changed
|
||||||
|
*/
|
||||||
|
protected function doSha1Updates( $db, $table, $idCol, $prefix ) {
|
||||||
|
$start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
|
||||||
|
$end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
|
||||||
|
if ( !$start || !$end ) {
|
||||||
|
$this->output( "...$table table seems to be empty.\n" );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$count = 0;
|
||||||
|
# Do remaining chunk
|
||||||
|
$end += $this->mBatchSize - 1;
|
||||||
|
$blockStart = $start;
|
||||||
|
$blockEnd = $start + $this->mBatchSize - 1;
|
||||||
|
while ( $blockEnd <= $end ) {
|
||||||
|
$this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
|
||||||
|
$cond = "$idCol BETWEEN $blockStart AND $blockEnd
|
||||||
|
AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
|
||||||
|
$res = $db->select( $table, '*', $cond, __METHOD__ );
|
||||||
|
|
||||||
|
$db->begin();
|
||||||
|
foreach ( $res as $row ) {
|
||||||
|
if ( $table === 'archive' ) {
|
||||||
|
$rev = Revision::newFromArchiveRow( $row );
|
||||||
|
} else {
|
||||||
|
$rev = new Revision( $row );
|
||||||
|
}
|
||||||
|
$text = $rev->getRawText();
|
||||||
|
if ( !is_string( $text ) ) {
|
||||||
|
# This should not happen, but sometimes does (bug 20757)
|
||||||
|
$this->output( "Text of revision {$row->$idCol} unavailable!\n" );
|
||||||
|
} else {
|
||||||
|
$db->update( $table,
|
||||||
|
array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
|
||||||
|
array( $idCol => $row->$idCol ),
|
||||||
|
__METHOD__ );
|
||||||
|
$count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$db->commit();
|
||||||
|
|
||||||
|
$blockStart += $this->mBatchSize;
|
||||||
|
$blockEnd += $this->mBatchSize;
|
||||||
|
wfWaitForSlaves();
|
||||||
|
}
|
||||||
|
return $count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$maintClass = "PopulateRevisionSha1";
|
||||||
|
require_once( RUN_MAINTENANCE_IF_MAIN );
|
||||||
|
|
@ -311,7 +311,10 @@ CREATE TABLE /*_*/revision (
|
||||||
|
|
||||||
-- Key to revision.rev_id
|
-- Key to revision.rev_id
|
||||||
-- This field is used to add support for a tree structure (The Adjacency List Model)
|
-- This field is used to add support for a tree structure (The Adjacency List Model)
|
||||||
rev_parent_id int unsigned default NULL
|
rev_parent_id int unsigned default NULL,
|
||||||
|
|
||||||
|
-- SHA-1 text content hash in base-36
|
||||||
|
rev_sha1 varbinary(32) NOT NULL default ''
|
||||||
|
|
||||||
) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024;
|
) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024;
|
||||||
-- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit
|
-- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit
|
||||||
|
|
@ -418,7 +421,10 @@ CREATE TABLE /*_*/archive (
|
||||||
ar_page_id int unsigned,
|
ar_page_id int unsigned,
|
||||||
|
|
||||||
-- Original previous revision
|
-- Original previous revision
|
||||||
ar_parent_id int unsigned default NULL
|
ar_parent_id int unsigned default NULL,
|
||||||
|
|
||||||
|
-- SHA-1 text content hash in base-36
|
||||||
|
ar_sha1 varbinary(32) NOT NULL default ''
|
||||||
) /*$wgDBTableOptions*/;
|
) /*$wgDBTableOptions*/;
|
||||||
|
|
||||||
CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);
|
CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue