Merge "Write to multiple categorylinks tables on update"

This commit is contained in:
jenkins-bot 2022-01-10 18:34:25 +00:00 committed by Gerrit Code Review
commit fb80b943b6
5 changed files with 154 additions and 35 deletions

View file

@ -8857,6 +8857,17 @@ $wgCategoryPagingLimit = 200;
*/
$wgCategoryCollation = 'uppercase';
/**
* Additional category collations to store during LinksUpdate. This can be used
* to perform online migration of categories from one collation to another. An
* array of associative arrays each having the following keys:
* - table: (string) The table name
* - collation: (string) The collation to use for cl_sortkey
* - fakeCollation: (string) The collation name to insert into cl_collation
* @since 1.38
*/
$wgTempCategoryCollations = [];
/**
* Array holding default tracking category names.
*

View file

@ -143,14 +143,17 @@ class LinksUpdate extends DataUpdate {
$this->mRecursive = $recursive;
$services = MediaWikiServices::getInstance();
$config = $services->getMainConfig();
$this->tableFactory = new LinksTableGroup(
$services->getObjectFactory(),
$services->getDBLoadBalancerFactory(),
$services->getCollationFactory(),
$page,
$services->getMainConfig()->get( 'UpdateRowsPerQuery' ),
$config->get( 'UpdateRowsPerQuery' ),
function ( $table, $rows ) {
$this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $rows );
}
},
$config->get( 'TempCategoryCollations' )
);
// TODO: this does not have to be called in LinksDeletionUpdate
$this->tableFactory->setParserOutput( $parserOutput );
@ -190,6 +193,14 @@ class LinksUpdate extends DataUpdate {
return;
}
// Do any setup that needs to be done prior to acquiring the lock
// Calling getAll() here has the side-effect of calling
// LinksUpdateBatch::setParserOutput() on all subclasses, allowing
// those methods to also do pre-lock operations.
foreach ( $this->tableFactory->getAll() as $batch ) {
$batch->beforeLock();
}
if ( $this->ticket ) {
// Make sure all links update threads see the changes of each other.
// This handles the case when updates have to batched into several COMMITs.

View file

@ -2,7 +2,7 @@
namespace MediaWiki\Deferred\LinksUpdate;
use MediaWiki\Collation\CollationFactory;
use Collation;
use MediaWiki\DAO\WikiAwareEntity;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Page\PageReferenceValue;
@ -24,7 +24,8 @@ use Title;
class CategoryLinksTable extends TitleLinksTable {
/**
* @var array Associative array of new links, with the category name in the
* key and the sort key prefix in the value
* key. The value is a list consisting of the sort key prefix and the sort
* key.
*/
private $newLinks = [];
@ -43,6 +44,12 @@ class CategoryLinksTable extends TitleLinksTable {
/** @var string The collation name for cl_collation */
private $collationName;
/** @var string The table name */
private $tableName = 'categorylinks';
/** @var bool */
private $isTempTable;
/** @var string The category type, which depends on the source page */
private $categoryType;
@ -52,17 +59,31 @@ class CategoryLinksTable extends TitleLinksTable {
/** @var WikiPageFactory */
private $wikiPageFactory;
/**
* @param LanguageConverterFactory $converterFactory
* @param NamespaceInfo $namespaceInfo
* @param WikiPageFactory $wikiPageFactory
* @param Collation $collation
* @param string $collationName
* @param string $tableName
* @param bool $isTempTable
*/
public function __construct(
LanguageConverterFactory $converterFactory,
CollationFactory $collationFactory,
NamespaceInfo $namespaceInfo,
WikiPageFactory $wikiPageFactory
WikiPageFactory $wikiPageFactory,
Collation $collation,
$collationName,
$tableName,
$isTempTable
) {
$this->languageConverter = $converterFactory->getLanguageConverter();
$this->collation = $collationFactory->getCategoryCollation();
$this->collationName = $collationFactory->getDefaultCollationName();
$this->namespaceInfo = $namespaceInfo;
$this->wikiPageFactory = $wikiPageFactory;
$this->collation = $collation;
$this->collationName = $collationName;
$this->tableName = $tableName;
$this->isTempTable = $isTempTable;
}
/**
@ -75,16 +96,32 @@ class CategoryLinksTable extends TitleLinksTable {
public function setParserOutput( ParserOutput $parserOutput ) {
$this->newLinks = [];
foreach ( $parserOutput->getCategories() as $name => $sortKey ) {
// If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match
// when comparing existing vs current categories, causing T27254.
$sortKey = mb_strcut( $sortKey, 0, 255 );
$this->newLinks[(string)$name] = $sortKey;
$sourceTitle = Title::castFromPageIdentity( $this->getSourcePage() );
$sortKeyInputs = [];
foreach ( $parserOutput->getCategories() as $name => $sortKeyPrefix ) {
// If the sort key is longer then 255 bytes, it is truncated by DB,
// and then doesn't match when comparing existing vs current
// categories, causing T27254.
$sortKeyPrefix = mb_strcut( $sortKeyPrefix, 0, 255 );
$targetTitle = Title::makeTitleSafe( NS_CATEGORY, $name );
$this->languageConverter->findVariantLink( $name, $targetTitle, true );
// Treat custom sort keys as a prefix, so that if multiple
// things are forced to sort as '*' or something, they'll
// sort properly in the category rather than in page_id
// order or such.
$sortKeyInputs[$name] = $sourceTitle->getCategorySortkey( $sortKeyPrefix );
$this->newLinks[$name] = [ $sortKeyPrefix ];
}
$sortKeys = $this->collation->getSortKeys( $sortKeyInputs );
foreach ( $sortKeys as $name => $sortKey ) {
$this->newLinks[$name][1] = $sortKey;
}
}
protected function getTableName() {
return 'categorylinks';
return $this->tableName;
}
protected function getFromField() {
@ -102,8 +139,8 @@ class CategoryLinksTable extends TitleLinksTable {
* @return iterable<array>
*/
protected function getNewLinkIDs() {
foreach ( $this->newLinks as $name => $sortkey ) {
yield [ $name, $sortkey ];
foreach ( $this->newLinks as $name => [ $prefix, $sortKey ] ) {
yield [ $name, $prefix ];
}
}
@ -141,25 +178,16 @@ class CategoryLinksTable extends TitleLinksTable {
protected function isInNewSet( $linkId ) {
[ $name, $prefix ] = $linkId;
return \array_key_exists( $name, $this->newLinks )
&& $this->newLinks[$name] === $prefix;
&& $this->newLinks[$name][0] === $prefix;
}
protected function insertLink( $linkId ) {
[ $name, $prefix ] = $linkId;
$nt = Title::makeTitleSafe( NS_CATEGORY, $name );
$this->languageConverter->findVariantLink( $name, $nt, true );
// Treat custom sortkeys as a prefix, so that if multiple
// things are forced to sort as '*' or something, they'll
// sort properly in the category rather than in page_id
// order or such.
$sortkey = $this->collation->getSortKey(
Title::castFromPageIdentity( $this->getSourcePage() )
->getCategorySortkey( $prefix ) );
$sortKey = $this->newLinks[$name][1];
$this->insertRow( [
'cl_to' => $name,
'cl_sortkey' => $sortkey,
'cl_sortkey' => $sortKey,
'cl_timestamp' => $this->getDB()->timestamp(),
'cl_sortkey_prefix' => $prefix,
'cl_collation' => $this->collationName,
@ -190,6 +218,10 @@ class CategoryLinksTable extends TitleLinksTable {
}
protected function finishUpdate() {
if ( $this->isTempTable ) {
// Don't do invalidations for temporary collations
return;
}
$this->invalidateCategories();
$this->updateCategoryCounts();
}

View file

@ -363,6 +363,15 @@ abstract class LinksTable {
$this->rowsToDelete[] = $conds;
}
/**
* Subclasses can override this to do any necessary setup before the lock
* is acquired.
*
* @stable to override
*/
public function beforeLock() {
}
/**
* Subclasses can override this to do any necessary setup before individual
* write operations begin.

View file

@ -2,6 +2,7 @@
namespace MediaWiki\Deferred\LinksUpdate;
use MediaWiki\Collation\CollationFactory;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageIdentity;
@ -21,16 +22,18 @@ class LinksTableGroup {
* - serviceOptions: An array of configuration variable names. If this is
* set, the specified configuration will be sent to the subclass
* constructor as a ServiceOptions object.
* - needCollation: If true, the following additional args will be added:
* Collation, collation name and table name.
*/
private const CORE_LIST = [
'categorylinks' => [
'class' => CategoryLinksTable::class,
'services' => [
'LanguageConverterFactory',
'CollationFactory',
'NamespaceInfo',
'WikiPageFactory'
]
],
'needCollation' => true,
],
'externallinks' => [
'class' => ExternalLinksTable::class
@ -65,6 +68,9 @@ class LinksTableGroup {
/** @var LBFactory */
private $lbFactory;
/** @var CollationFactory */
private $collationFactory;
/** @var PageIdentity */
private $page;
@ -86,25 +92,37 @@ class LinksTableGroup {
/** @var LinksTable[] */
private $tables = [];
/** @var array */
private $tempCollations;
/**
* @param ObjectFactory $objectFactory
* @param LBFactory $lbFactory
* @param CollationFactory $collationFactory
* @param PageIdentity $page
* @param int $batchSize
* @param callable|null $afterUpdateHook
* @param array $tempCollations
*/
public function __construct(
ObjectFactory $objectFactory,
LBFactory $lbFactory,
CollationFactory $collationFactory,
PageIdentity $page,
$batchSize,
$afterUpdateHook
$afterUpdateHook,
array $tempCollations
) {
$this->objectFactory = $objectFactory;
$this->lbFactory = $lbFactory;
$this->collationFactory = $collationFactory;
$this->page = $page;
$this->batchSize = $batchSize;
$this->afterUpdateHook = $afterUpdateHook;
$this->tempCollations = [];
foreach ( $tempCollations as $info ) {
$this->tempCollations[$info['table']] = $info;
}
}
/**
@ -161,11 +179,46 @@ class LinksTableGroup {
* @return array
*/
private function getSpec( $tableName ) {
if ( !isset( self::CORE_LIST[$tableName] ) ) {
throw new \InvalidArgumentException(
__CLASS__ . ": unknown table name \"$tableName\"" );
if ( isset( self::CORE_LIST[$tableName] ) ) {
$spec = self::CORE_LIST[$tableName];
return $this->addCollationArgs( $spec, $tableName, false );
}
return self::CORE_LIST[$tableName];
if ( isset( $this->tempCollations[$tableName] ) ) {
$info = $this->tempCollations[$tableName];
$spec = self::CORE_LIST['categorylinks'];
return $this->addCollationArgs( $spec, $tableName, true, $info );
}
throw new \InvalidArgumentException(
__CLASS__ . ": unknown table name \"$tableName\"" );
}
/**
* Add extra args to the spec of a table that needs collation information
*
* @param array $spec
* @param string $tableName
* @param bool $isTempTable
* @param array $info Temporary collation info
* @return array ObjectFactory spec
*/
private function addCollationArgs( $spec, $tableName, $isTempTable, $info = [] ) {
if ( isset( $spec['needCollation'] ) ) {
if ( isset( $info['collation'] ) ) {
$collation = $this->collationFactory->makeCollation( $info['collation'] );
$collationName = $info['fakeCollation'] ?? $info['collation'];
} else {
$collation = $this->collationFactory->getCategoryCollation();
$collationName = $this->collationFactory->getDefaultCollationName();
}
$spec['args'] = [
$collation,
$info['fakeCollation'] ?? $collationName,
$tableName,
$isTempTable
];
unset( $spec['needCollation'] );
}
return $spec;
}
/**
@ -214,5 +267,8 @@ class LinksTableGroup {
foreach ( self::CORE_LIST as $tableName => $spec ) {
yield $this->get( $tableName );
}
foreach ( $this->tempCollations as $tableName => $collation ) {
yield $this->get( $tableName );
}
}
}