Merge "REST PageHistoryCount: Add object caching with incremental updates."

This commit is contained in:
jenkins-bot 2019-12-03 04:15:46 +00:00 committed by Gerrit Code Review
commit faea842396
5 changed files with 219 additions and 110 deletions

View file

@ -13,6 +13,7 @@ use MediaWiki\Storage\NameTableStore;
use MediaWiki\Storage\NameTableStoreFactory;
use RequestContext;
use User;
use WANObjectCache;
use Wikimedia\Message\MessageValue;
use Wikimedia\Message\ParamType;
use Wikimedia\Message\ScalarParam;
@ -54,6 +55,9 @@ class PageHistoryCountHandler extends SimpleHandler {
/** @var ILoadBalancer */
private $loadBalancer;
/** @var WANObjectCache */
private $cache;
/** @var User */
private $user;
@ -62,17 +66,20 @@ class PageHistoryCountHandler extends SimpleHandler {
* @param NameTableStoreFactory $nameTableStoreFactory
* @param PermissionManager $permissionManager
* @param ILoadBalancer $loadBalancer
* @param WANObjectCache $cache
*/
public function __construct(
RevisionStore $revisionStore,
NameTableStoreFactory $nameTableStoreFactory,
PermissionManager $permissionManager,
ILoadBalancer $loadBalancer
ILoadBalancer $loadBalancer,
WANObjectCache $cache
) {
$this->revisionStore = $revisionStore;
$this->changeTagDefStore = $nameTableStoreFactory->getChangeTagDef();
$this->permissionManager = $permissionManager;
$this->loadBalancer = $loadBalancer;
$this->cache = $cache;
// @todo Inject this, when there is a good way to do that
$this->user = RequestContext::getMain()->getUser();
@ -138,7 +145,8 @@ class PageHistoryCountHandler extends SimpleHandler {
403
);
}
$count = $this->getCount( $titleObj->getArticleID(), $normalizedType );
$count = $this->getCount( $titleObj, $normalizedType );
$countLimit = self::COUNT_LIMITS[$normalizedType];
$response = $this->getResponseFactory()->createJson( [
'count' => $count > $countLimit ? $countLimit : $count,
@ -157,40 +165,87 @@ class PageHistoryCountHandler extends SimpleHandler {
}
/**
* @param int $pageId the id of the page to load history for
* @param Title $title the title of the page to load history for
* @param string $type the validated count type
* @return int the article count
* @throws LocalizedHttpException
*/
protected function getCount( $pageId, $type ) {
private function getCount( $title, $type ) {
switch ( $type ) {
case 'anonymous':
return $this->getAnonCount( $pageId );
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getAnonCount( $title->getArticleID(), $fromRev );
}
);
case 'bot':
return $this->getBotCount( $pageId );
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getBotCount( $title->getArticleID(), $fromRev );
}
);
case 'editors':
return $this->getEditorsCount( $pageId );
$from = $this->getValidatedParams()['from'] ?? null;
$to = $this->getValidatedParams()['to'] ?? null;
if ( $from || $to ) {
return $this->getEditorsCount(
$title->getArticleID(),
$from ? $this->getRevisionOrThrow( $from ) : null,
$to ? $this->getRevisionOrThrow( $to ) : null
);
} else {
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getEditorsCount( $title->getArticleID(), $fromRev );
}, false
);
}
case 'edits':
return $this->getEditsCount( $pageId, self::COUNT_LIMITS[$type] );
$from = $this->getValidatedParams()['from'] ?? null;
$to = $this->getValidatedParams()['to'] ?? null;
if ( $from || $to ) {
return $this->getEditsCount(
$title->getArticleID(),
$from ? $this->getRevisionOrThrow( $from ) : null,
$to ? $this->getRevisionOrThrow( $to ) : null
);
} else {
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getEditsCount( $title->getArticleID(), $fromRev );
}
);
}
case 'reverted':
return $this->getRevertedCount( $pageId );
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getRevertedCount( $title->getArticleID(), $fromRev );
}
);
case 'minor':
// The query for minor counts is inefficient for the database for pages with many revisions.
// If the specified title contains more revisions than allowed, we will return an error.
// This may be fixed with object caching per T237430. If so, the check below can be removed.
$editsCount = $this->getEditsCount( $pageId, self::COUNT_LIMITS[$type] * 2 );
$editsCount = $this->getCachedCount( $title, 'edits',
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getEditsCount( $title->getArticleID(), $fromRev );
}
);
if ( $editsCount > self::COUNT_LIMITS[$type] * 2 ) {
throw new LocalizedHttpException(
new MessageValue( 'rest-pagehistorycount-too-many-revisions' ),
500
);
}
return $this->getMinorCount( $pageId );
return $this->getCachedCount( $title, $type,
function ( RevisionRecord $fromRev = null ) use ( $title ) {
return $this->getMinorCount( $title->getArticleID(), $fromRev );
}
);
// Sanity check
default:
@ -203,20 +258,79 @@ class PageHistoryCountHandler extends SimpleHandler {
}
}
/**
* @param Title $title
* @param string $type
* @param callable $fetchCount
* @param bool $incrementalUpdate
* @return int
*/
private function getCachedCount( $title, $type,
callable $fetchCount, $incrementalUpdate = true
) {
$pageId = $title->getArticleID();
return $this->cache->getWithSetCallback(
$this->cache->makeKey( 'rest', 'pagehistorycount', $pageId, $type ),
WANObjectCache::TTL_WEEK,
function ( $oldValue ) use ( $title, $fetchCount, $incrementalUpdate ) {
$currentRev = $this->revisionStore->getKnownCurrentRevision( $title );
if ( $oldValue && $incrementalUpdate ) {
if ( $oldValue['revision'] === $currentRev->getId() ) {
// Should never happen, but just in case
return $oldValue;
}
$rev = $this->revisionStore->getRevisionById( $oldValue['revision'] );
if ( $rev ) {
$additionalCount = $fetchCount( $rev );
return [
'revision' => $currentRev->getId(),
'count' => $oldValue['count'] + $additionalCount
];
}
}
// Nothing was previously stored, or incremental update was done for too long,
// recalculate from scratch.
return [
'revision' => $currentRev->getId(),
'count' => $fetchCount()
];
},
[
'touchedCallback' => function () use ( $title ) {
return wfTimestampOrNull(
TS_UNIX,
$this->revisionStore->getKnownCurrentRevision( $title )->getTimestamp()
);
},
'checkKeys' => [
"RevDelRevisionList:page:$pageId",
"DerivedPageDataUpdater:restore:page:$pageId"
],
'version' => 1,
'lockTSE' => WANObjectCache::TTL_MINUTE * 5
]
)['count'];
}
/**
* @param int $pageId the id of the page to load history for
* @param RevisionRecord|null $fromRev
* @return int the count
*/
protected function getAnonCount( $pageId ) {
protected function getAnonCount( $pageId, RevisionRecord $fromRev = null ) {
$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
$cond = [
'rev_page' => $pageId,
'actor_user IS NULL',
$dbr->bitAnd( 'rev_deleted',
RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0"
];
$bitmask = $this->getBitmask();
if ( $bitmask ) {
$cond[] = $dbr->bitAnd( 'rev_deleted', $bitmask ) . " != $bitmask";
if ( $fromRev ) {
$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
"OR rev_timestamp > {$oldTs}";
}
$edits = $dbr->selectRowCount(
@ -245,13 +359,16 @@ class PageHistoryCountHandler extends SimpleHandler {
/**
* @param int $pageId the id of the page to load history for
* @param RevisionRecord|null $fromRev
* @return int the count
*/
protected function getBotCount( $pageId ) {
protected function getBotCount( $pageId, RevisionRecord $fromRev = null ) {
$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
$cond = [
'rev_page=' . intval( $pageId ),
$dbr->bitAnd( 'rev_deleted',
RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0",
'EXISTS(' .
$dbr->selectSQLText(
'user_groups',
@ -265,9 +382,10 @@ class PageHistoryCountHandler extends SimpleHandler {
) .
')'
];
$bitmask = $this->getBitmask();
if ( $bitmask ) {
$cond[] = $dbr->bitAnd( 'rev_deleted', $bitmask ) . " != $bitmask";
if ( $fromRev ) {
$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
"OR rev_timestamp > {$oldTs}";
}
$edits = $dbr->selectRowCount(
@ -296,33 +414,25 @@ class PageHistoryCountHandler extends SimpleHandler {
/**
* @param int $pageId the id of the page to load history for
* @param RevisionRecord|null $fromRev
* @param RevisionRecord|null $toRev
* @return int the count
* @throws LocalizedHttpException
*/
protected function getEditorsCount( $pageId ) {
$from = $this->getValidatedParams()['from'] ?? null;
$to = $this->getValidatedParams()['to'] ?? null;
$fromRev = $from ? $this->getRevisionOrThrow( $from ) : null;
$toRev = $to ? $this->getRevisionOrThrow( $to ) : null;
// Reorder from and to parameters if they are out of order.
if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() ||
( $fromRev->getTimestamp() === $toRev->getTimestamp() && $from > $to ) )
) {
$tmp = $fromRev;
$fromRev = $toRev;
$toRev = $tmp;
}
protected function getEditorsCount( $pageId,
RevisionRecord $fromRev = null,
RevisionRecord $toRev = null
) {
list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
return $this->revisionStore->countAuthorsBetween( $pageId, $fromRev,
$toRev, $this->user, self::COUNT_LIMITS['editors'] );
}
/**
* @param int $pageId the id of the page to load history for
* @param RevisionRecord|null $fromRev
* @return int the count
*/
protected function getRevertedCount( $pageId ) {
protected function getRevertedCount( $pageId, RevisionRecord $fromRev = null ) {
$tagIds = [];
foreach ( self::REVERTED_TAG_NAMES as $tagName ) {
@ -337,6 +447,16 @@ class PageHistoryCountHandler extends SimpleHandler {
}
$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
$cond = [
'rev_page' => $pageId,
$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
];
if ( $fromRev ) {
$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
"OR rev_timestamp > {$oldTs}";
}
$edits = $dbr->selectRowCount(
[
'revision',
@ -364,15 +484,23 @@ class PageHistoryCountHandler extends SimpleHandler {
/**
* @param int $pageId the id of the page to load history for
* @param RevisionRecord|null $fromRev
* @return int the count
*/
protected function getMinorCount( $pageId ) {
protected function getMinorCount( $pageId, RevisionRecord $fromRev = null ) {
$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
$cond = [
'rev_page' => $pageId,
'rev_minor_edit != 0',
$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
];
if ( $fromRev ) {
$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
"OR rev_timestamp > {$oldTs}";
}
$edits = $dbr->selectRowCount( 'revision', '1',
[
'rev_page' => $pageId,
'rev_minor_edit != 0'
],
$cond,
__METHOD__,
[ 'LIMIT' => self::COUNT_LIMITS['minor'] + 1 ] // extra to detect truncation
);
@ -382,52 +510,24 @@ class PageHistoryCountHandler extends SimpleHandler {
/**
* @param int $pageId the id of the page to load history for
* @param int $limit
* @param RevisionRecord|null $fromRev
* @param RevisionRecord|null $toRev
* @return int the count
* @throws LocalizedHttpException
*/
protected function getEditsCount( $pageId, $limit ) {
$from = $this->getValidatedParams()['from'] ?? null;
$to = $this->getValidatedParams()['to'] ?? null;
$fromRev = $from ? $this->getRevisionOrThrow( $from ) : null;
$toRev = $to ? $this->getRevisionOrThrow( $to ) : null;
// Reorder from and to parameters if they are out of order.
if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() ||
( $fromRev->getTimestamp() === $toRev->getTimestamp() && $from > $to ) )
) {
$tmp = $fromRev;
$fromRev = $toRev;
$toRev = $tmp;
}
protected function getEditsCount(
$pageId,
RevisionRecord $fromRev = null,
RevisionRecord $toRev = null
) {
list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
return $this->revisionStore->countRevisionsBetween(
$pageId,
$fromRev,
$toRev,
$limit // Will be increased by 1 to detect truncation
self::COUNT_LIMITS['edits'] // Will be increased by 1 to detect truncation
);
}
/**
* Helper function for rev_deleted/user rights query conditions
*
* @todo Factor out rev_deleted logic per T233222
*
* @return int
*/
private function getBitmask() {
if ( !$this->permissionManager->userHasRight( $this->user, 'deletedhistory' ) ) {
$bitmask = RevisionRecord::DELETED_USER;
} elseif ( !$this->permissionManager
->userHasAnyRight( $this->user, 'suppressrevision', 'viewsuppressed' )
) {
$bitmask = RevisionRecord::DELETED_USER | RevisionRecord::DELETED_RESTRICTED;
} else {
$bitmask = 0;
}
return $bitmask;
}
/**
* @param int $revId
* @return RevisionRecord
@ -444,6 +544,26 @@ class PageHistoryCountHandler extends SimpleHandler {
return $rev;
}
/**
* Reorders revisions if they are present
* @param RevisionRecord|null $fromRev
* @param RevisionRecord|null $toRev
* @return array
* @phan-return array{0:RevisionRecord|null,1:RevisionRecord|null}
*/
private function orderRevisions(
RevisionRecord $fromRev = null,
RevisionRecord $toRev = null
) {
if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() ||
( $fromRev->getTimestamp() === $toRev->getTimestamp()
&& $fromRev->getId() > $toRev->getId() ) )
) {
return [ $toRev, $fromRev ];
}
return [ $fromRev, $toRev ];
}
public function needsWriteAccess() {
return false;
}

View file

@ -20,7 +20,8 @@
"RevisionStore",
"NameTableStoreFactory",
"PermissionManager",
"DBLoadBalancer"
"DBLoadBalancer",
"MainWANObjectCache"
]
},
{

View file

@ -38,7 +38,6 @@ use InvalidArgumentException;
use IP;
use LogicException;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\MediaWikiServices;
use MediaWiki\Storage\BlobAccessException;
use MediaWiki\Storage\BlobStore;
use MediaWiki\Storage\NameTableAccessException;
@ -3190,7 +3189,7 @@ class RevisionStore
*
* @return RevisionRecord|bool Returns false if missing
*/
public function getKnownCurrentRevision( Title $title, $revId ) {
public function getKnownCurrentRevision( Title $title, $revId = 0 ) {
$db = $this->getDBConnectionRef( DB_REPLICA );
$pageId = $title->getArticleID();
@ -3279,27 +3278,6 @@ class RevisionStore
}
}
/**
* Returns a bitmask for the rev_deleted field visible to the provided user.
*
* @param UserIdentity $user
* @return int
*/
private function getRevisionDeletionBitmask( UserIdentity $user ) {
// TODO: We can't inject the permission manager since it would create
// a cyclic dependency! This RevisionRecord instead? See T233222 as well.
$pm = MediaWikiServices::getInstance()->getPermissionManager();
if ( !$pm->userHasRight( $user, 'deletedhistory' ) ) {
$bitmask = RevisionRecord::DELETED_USER;
} elseif ( !$pm->userHasAnyRight( $user, 'suppressrevision', 'viewsuppressed' )
) {
$bitmask = RevisionRecord::DELETED_USER | RevisionRecord::DELETED_RESTRICTED;
} else {
$bitmask = 0;
}
return $bitmask;
}
/**
* Converts revision limits to query conditions.
*
@ -3387,19 +3365,18 @@ class RevisionStore
if ( empty( $options ) ) {
return [];
} else {
return $user ? [ $new->getUser( RevisionRecord::FOR_THIS_USER, $user ) ] : [ $new->getUser() ];
return $user ? [ $new->getUser( RevisionRecord::FOR_PUBLIC, $user ) ] : [ $new->getUser() ];
}
}
$dbr = $this->getDBConnectionRef( DB_REPLICA );
$conds = array_merge(
[ 'rev_page' => $pageId ],
[
'rev_page' => $pageId,
$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_USER ) . " = 0"
],
$this->getRevisionLimitConditions( $dbr, $old, $new, $options )
);
$bitmask = $user ? $this->getRevisionDeletionBitmask( $user ) : 0;
if ( $bitmask ) {
$conds[] = $dbr->bitAnd( 'rev_deleted', $bitmask ) . " != $bitmask";
}
$queryOpts = [ 'DISTINCT' ];
if ( $max !== null ) {
@ -3492,7 +3469,10 @@ class RevisionStore
$dbr = $this->getDBConnectionRef( DB_REPLICA );
$conds = array_merge(
[ 'rev_page' => $pageId ],
[
'rev_page' => $pageId,
$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
],
$this->getRevisionLimitConditions( $dbr, $old, $new, $options )
);
if ( $max !== null ) {

View file

@ -1568,6 +1568,10 @@ class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface {
WikiPage::onArticleCreate( $title );
} elseif ( $this->options['changed'] ) { // T52785
WikiPage::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() );
} elseif ( $this->options['restored'] ) {
MediaWikiServices::getInstance()->getMainWANObjectCache()->touchCheckKey(
"DerivedPageDataUpdater:restore:page:$id"
);
}
$oldRevision = $this->getParentRevision();

View file

@ -19,6 +19,7 @@
* @ingroup RevisionDelete
*/
use MediaWiki\MediaWikiServices;
use MediaWiki\Revision\RevisionRecord;
use Wikimedia\Rdbms\FakeResultWrapper;
use Wikimedia\Rdbms\IDatabase;
@ -180,6 +181,9 @@ class RevDelRevisionList extends RevDelList {
$this->title->purgeSquid();
// Extensions that require referencing previous revisions may need this
Hooks::run( 'ArticleRevisionVisibilitySet', [ $this->title, $this->ids, $visibilityChangeMap ] );
MediaWikiServices::getInstance()
->getMainWANObjectCache()
->touchCheckKey( "RevDelRevisionList:page:{$this->title->getArticleID()}}" );
return Status::newGood();
}
}