Merge "Write to multiple categorylinks tables on update"
This commit is contained in:
commit
fb80b943b6
5 changed files with 154 additions and 35 deletions
|
|
@ -8857,6 +8857,17 @@ $wgCategoryPagingLimit = 200;
|
|||
*/
|
||||
$wgCategoryCollation = 'uppercase';
|
||||
|
||||
/**
|
||||
* Additional category collations to store during LinksUpdate. This can be used
|
||||
* to perform online migration of categories from one collation to another. An
|
||||
* array of associative arrays each having the following keys:
|
||||
* - table: (string) The table name
|
||||
* - collation: (string) The collation to use for cl_sortkey
|
||||
* - fakeCollation: (string) The collation name to insert into cl_collation
|
||||
* @since 1.38
|
||||
*/
|
||||
$wgTempCategoryCollations = [];
|
||||
|
||||
/**
|
||||
* Array holding default tracking category names.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -143,14 +143,17 @@ class LinksUpdate extends DataUpdate {
|
|||
$this->mRecursive = $recursive;
|
||||
|
||||
$services = MediaWikiServices::getInstance();
|
||||
$config = $services->getMainConfig();
|
||||
$this->tableFactory = new LinksTableGroup(
|
||||
$services->getObjectFactory(),
|
||||
$services->getDBLoadBalancerFactory(),
|
||||
$services->getCollationFactory(),
|
||||
$page,
|
||||
$services->getMainConfig()->get( 'UpdateRowsPerQuery' ),
|
||||
$config->get( 'UpdateRowsPerQuery' ),
|
||||
function ( $table, $rows ) {
|
||||
$this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $rows );
|
||||
}
|
||||
},
|
||||
$config->get( 'TempCategoryCollations' )
|
||||
);
|
||||
// TODO: this does not have to be called in LinksDeletionUpdate
|
||||
$this->tableFactory->setParserOutput( $parserOutput );
|
||||
|
|
@ -190,6 +193,14 @@ class LinksUpdate extends DataUpdate {
|
|||
return;
|
||||
}
|
||||
|
||||
// Do any setup that needs to be done prior to acquiring the lock
|
||||
// Calling getAll() here has the side-effect of calling
|
||||
// LinksUpdateBatch::setParserOutput() on all subclasses, allowing
|
||||
// those methods to also do pre-lock operations.
|
||||
foreach ( $this->tableFactory->getAll() as $batch ) {
|
||||
$batch->beforeLock();
|
||||
}
|
||||
|
||||
if ( $this->ticket ) {
|
||||
// Make sure all links update threads see the changes of each other.
|
||||
// This handles the case when updates have to batched into several COMMITs.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
namespace MediaWiki\Deferred\LinksUpdate;
|
||||
|
||||
use MediaWiki\Collation\CollationFactory;
|
||||
use Collation;
|
||||
use MediaWiki\DAO\WikiAwareEntity;
|
||||
use MediaWiki\Languages\LanguageConverterFactory;
|
||||
use MediaWiki\Page\PageReferenceValue;
|
||||
|
|
@ -24,7 +24,8 @@ use Title;
|
|||
class CategoryLinksTable extends TitleLinksTable {
|
||||
/**
|
||||
* @var array Associative array of new links, with the category name in the
|
||||
* key and the sort key prefix in the value
|
||||
* key. The value is a list consisting of the sort key prefix and the sort
|
||||
* key.
|
||||
*/
|
||||
private $newLinks = [];
|
||||
|
||||
|
|
@ -43,6 +44,12 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
/** @var string The collation name for cl_collation */
|
||||
private $collationName;
|
||||
|
||||
/** @var string The table name */
|
||||
private $tableName = 'categorylinks';
|
||||
|
||||
/** @var bool */
|
||||
private $isTempTable;
|
||||
|
||||
/** @var string The category type, which depends on the source page */
|
||||
private $categoryType;
|
||||
|
||||
|
|
@ -52,17 +59,31 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
/** @var WikiPageFactory */
|
||||
private $wikiPageFactory;
|
||||
|
||||
/**
|
||||
* @param LanguageConverterFactory $converterFactory
|
||||
* @param NamespaceInfo $namespaceInfo
|
||||
* @param WikiPageFactory $wikiPageFactory
|
||||
* @param Collation $collation
|
||||
* @param string $collationName
|
||||
* @param string $tableName
|
||||
* @param bool $isTempTable
|
||||
*/
|
||||
public function __construct(
|
||||
LanguageConverterFactory $converterFactory,
|
||||
CollationFactory $collationFactory,
|
||||
NamespaceInfo $namespaceInfo,
|
||||
WikiPageFactory $wikiPageFactory
|
||||
WikiPageFactory $wikiPageFactory,
|
||||
Collation $collation,
|
||||
$collationName,
|
||||
$tableName,
|
||||
$isTempTable
|
||||
) {
|
||||
$this->languageConverter = $converterFactory->getLanguageConverter();
|
||||
$this->collation = $collationFactory->getCategoryCollation();
|
||||
$this->collationName = $collationFactory->getDefaultCollationName();
|
||||
$this->namespaceInfo = $namespaceInfo;
|
||||
$this->wikiPageFactory = $wikiPageFactory;
|
||||
$this->collation = $collation;
|
||||
$this->collationName = $collationName;
|
||||
$this->tableName = $tableName;
|
||||
$this->isTempTable = $isTempTable;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -75,16 +96,32 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
|
||||
public function setParserOutput( ParserOutput $parserOutput ) {
|
||||
$this->newLinks = [];
|
||||
foreach ( $parserOutput->getCategories() as $name => $sortKey ) {
|
||||
// If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match
|
||||
// when comparing existing vs current categories, causing T27254.
|
||||
$sortKey = mb_strcut( $sortKey, 0, 255 );
|
||||
$this->newLinks[(string)$name] = $sortKey;
|
||||
$sourceTitle = Title::castFromPageIdentity( $this->getSourcePage() );
|
||||
$sortKeyInputs = [];
|
||||
foreach ( $parserOutput->getCategories() as $name => $sortKeyPrefix ) {
|
||||
// If the sort key is longer then 255 bytes, it is truncated by DB,
|
||||
// and then doesn't match when comparing existing vs current
|
||||
// categories, causing T27254.
|
||||
$sortKeyPrefix = mb_strcut( $sortKeyPrefix, 0, 255 );
|
||||
|
||||
$targetTitle = Title::makeTitleSafe( NS_CATEGORY, $name );
|
||||
$this->languageConverter->findVariantLink( $name, $targetTitle, true );
|
||||
|
||||
// Treat custom sort keys as a prefix, so that if multiple
|
||||
// things are forced to sort as '*' or something, they'll
|
||||
// sort properly in the category rather than in page_id
|
||||
// order or such.
|
||||
$sortKeyInputs[$name] = $sourceTitle->getCategorySortkey( $sortKeyPrefix );
|
||||
$this->newLinks[$name] = [ $sortKeyPrefix ];
|
||||
}
|
||||
$sortKeys = $this->collation->getSortKeys( $sortKeyInputs );
|
||||
foreach ( $sortKeys as $name => $sortKey ) {
|
||||
$this->newLinks[$name][1] = $sortKey;
|
||||
}
|
||||
}
|
||||
|
||||
protected function getTableName() {
|
||||
return 'categorylinks';
|
||||
return $this->tableName;
|
||||
}
|
||||
|
||||
protected function getFromField() {
|
||||
|
|
@ -102,8 +139,8 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
* @return iterable<array>
|
||||
*/
|
||||
protected function getNewLinkIDs() {
|
||||
foreach ( $this->newLinks as $name => $sortkey ) {
|
||||
yield [ $name, $sortkey ];
|
||||
foreach ( $this->newLinks as $name => [ $prefix, $sortKey ] ) {
|
||||
yield [ $name, $prefix ];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -141,25 +178,16 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
protected function isInNewSet( $linkId ) {
|
||||
[ $name, $prefix ] = $linkId;
|
||||
return \array_key_exists( $name, $this->newLinks )
|
||||
&& $this->newLinks[$name] === $prefix;
|
||||
&& $this->newLinks[$name][0] === $prefix;
|
||||
}
|
||||
|
||||
protected function insertLink( $linkId ) {
|
||||
[ $name, $prefix ] = $linkId;
|
||||
$nt = Title::makeTitleSafe( NS_CATEGORY, $name );
|
||||
$this->languageConverter->findVariantLink( $name, $nt, true );
|
||||
|
||||
// Treat custom sortkeys as a prefix, so that if multiple
|
||||
// things are forced to sort as '*' or something, they'll
|
||||
// sort properly in the category rather than in page_id
|
||||
// order or such.
|
||||
$sortkey = $this->collation->getSortKey(
|
||||
Title::castFromPageIdentity( $this->getSourcePage() )
|
||||
->getCategorySortkey( $prefix ) );
|
||||
$sortKey = $this->newLinks[$name][1];
|
||||
|
||||
$this->insertRow( [
|
||||
'cl_to' => $name,
|
||||
'cl_sortkey' => $sortkey,
|
||||
'cl_sortkey' => $sortKey,
|
||||
'cl_timestamp' => $this->getDB()->timestamp(),
|
||||
'cl_sortkey_prefix' => $prefix,
|
||||
'cl_collation' => $this->collationName,
|
||||
|
|
@ -190,6 +218,10 @@ class CategoryLinksTable extends TitleLinksTable {
|
|||
}
|
||||
|
||||
protected function finishUpdate() {
|
||||
if ( $this->isTempTable ) {
|
||||
// Don't do invalidations for temporary collations
|
||||
return;
|
||||
}
|
||||
$this->invalidateCategories();
|
||||
$this->updateCategoryCounts();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -363,6 +363,15 @@ abstract class LinksTable {
|
|||
$this->rowsToDelete[] = $conds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses can override this to do any necessary setup before the lock
|
||||
* is acquired.
|
||||
*
|
||||
* @stable to override
|
||||
*/
|
||||
public function beforeLock() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses can override this to do any necessary setup before individual
|
||||
* write operations begin.
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
namespace MediaWiki\Deferred\LinksUpdate;
|
||||
|
||||
use MediaWiki\Collation\CollationFactory;
|
||||
use MediaWiki\Config\ServiceOptions;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Page\PageIdentity;
|
||||
|
|
@ -21,16 +22,18 @@ class LinksTableGroup {
|
|||
* - serviceOptions: An array of configuration variable names. If this is
|
||||
* set, the specified configuration will be sent to the subclass
|
||||
* constructor as a ServiceOptions object.
|
||||
* - needCollation: If true, the following additional args will be added:
|
||||
* Collation, collation name and table name.
|
||||
*/
|
||||
private const CORE_LIST = [
|
||||
'categorylinks' => [
|
||||
'class' => CategoryLinksTable::class,
|
||||
'services' => [
|
||||
'LanguageConverterFactory',
|
||||
'CollationFactory',
|
||||
'NamespaceInfo',
|
||||
'WikiPageFactory'
|
||||
]
|
||||
],
|
||||
'needCollation' => true,
|
||||
],
|
||||
'externallinks' => [
|
||||
'class' => ExternalLinksTable::class
|
||||
|
|
@ -65,6 +68,9 @@ class LinksTableGroup {
|
|||
/** @var LBFactory */
|
||||
private $lbFactory;
|
||||
|
||||
/** @var CollationFactory */
|
||||
private $collationFactory;
|
||||
|
||||
/** @var PageIdentity */
|
||||
private $page;
|
||||
|
||||
|
|
@ -86,25 +92,37 @@ class LinksTableGroup {
|
|||
/** @var LinksTable[] */
|
||||
private $tables = [];
|
||||
|
||||
/** @var array */
|
||||
private $tempCollations;
|
||||
|
||||
/**
|
||||
* @param ObjectFactory $objectFactory
|
||||
* @param LBFactory $lbFactory
|
||||
* @param CollationFactory $collationFactory
|
||||
* @param PageIdentity $page
|
||||
* @param int $batchSize
|
||||
* @param callable|null $afterUpdateHook
|
||||
* @param array $tempCollations
|
||||
*/
|
||||
public function __construct(
|
||||
ObjectFactory $objectFactory,
|
||||
LBFactory $lbFactory,
|
||||
CollationFactory $collationFactory,
|
||||
PageIdentity $page,
|
||||
$batchSize,
|
||||
$afterUpdateHook
|
||||
$afterUpdateHook,
|
||||
array $tempCollations
|
||||
) {
|
||||
$this->objectFactory = $objectFactory;
|
||||
$this->lbFactory = $lbFactory;
|
||||
$this->collationFactory = $collationFactory;
|
||||
$this->page = $page;
|
||||
$this->batchSize = $batchSize;
|
||||
$this->afterUpdateHook = $afterUpdateHook;
|
||||
$this->tempCollations = [];
|
||||
foreach ( $tempCollations as $info ) {
|
||||
$this->tempCollations[$info['table']] = $info;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,11 +179,46 @@ class LinksTableGroup {
|
|||
* @return array
|
||||
*/
|
||||
private function getSpec( $tableName ) {
|
||||
if ( !isset( self::CORE_LIST[$tableName] ) ) {
|
||||
throw new \InvalidArgumentException(
|
||||
__CLASS__ . ": unknown table name \"$tableName\"" );
|
||||
if ( isset( self::CORE_LIST[$tableName] ) ) {
|
||||
$spec = self::CORE_LIST[$tableName];
|
||||
return $this->addCollationArgs( $spec, $tableName, false );
|
||||
}
|
||||
return self::CORE_LIST[$tableName];
|
||||
if ( isset( $this->tempCollations[$tableName] ) ) {
|
||||
$info = $this->tempCollations[$tableName];
|
||||
$spec = self::CORE_LIST['categorylinks'];
|
||||
return $this->addCollationArgs( $spec, $tableName, true, $info );
|
||||
}
|
||||
throw new \InvalidArgumentException(
|
||||
__CLASS__ . ": unknown table name \"$tableName\"" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Add extra args to the spec of a table that needs collation information
|
||||
*
|
||||
* @param array $spec
|
||||
* @param string $tableName
|
||||
* @param bool $isTempTable
|
||||
* @param array $info Temporary collation info
|
||||
* @return array ObjectFactory spec
|
||||
*/
|
||||
private function addCollationArgs( $spec, $tableName, $isTempTable, $info = [] ) {
|
||||
if ( isset( $spec['needCollation'] ) ) {
|
||||
if ( isset( $info['collation'] ) ) {
|
||||
$collation = $this->collationFactory->makeCollation( $info['collation'] );
|
||||
$collationName = $info['fakeCollation'] ?? $info['collation'];
|
||||
} else {
|
||||
$collation = $this->collationFactory->getCategoryCollation();
|
||||
$collationName = $this->collationFactory->getDefaultCollationName();
|
||||
}
|
||||
$spec['args'] = [
|
||||
$collation,
|
||||
$info['fakeCollation'] ?? $collationName,
|
||||
$tableName,
|
||||
$isTempTable
|
||||
];
|
||||
unset( $spec['needCollation'] );
|
||||
}
|
||||
return $spec;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -214,5 +267,8 @@ class LinksTableGroup {
|
|||
foreach ( self::CORE_LIST as $tableName => $spec ) {
|
||||
yield $this->get( $tableName );
|
||||
}
|
||||
foreach ( $this->tempCollations as $tableName => $collation ) {
|
||||
yield $this->get( $tableName );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue