2009-02-16 14:26:34 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
2012-05-22 18:06:30 +00:00
|
|
|
* Class for fetching backlink lists, approximate backlink counts and
|
|
|
|
|
* partitions.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2011-03-18 18:12:58 +00:00
|
|
|
* @file
|
2012-05-22 18:06:30 +00:00
|
|
|
* @author Tim Starling
|
|
|
|
|
* @copyright © 2009, Tim Starling, Domas Mituzas
|
|
|
|
|
* @copyright © 2010, Max Sem
|
|
|
|
|
* @copyright © 2011, Antoine Musso
|
2011-03-18 18:12:58 +00:00
|
|
|
*/
|
|
|
|
|
|
2021-08-03 16:01:36 +00:00
|
|
|
use MediaWiki\Cache\CacheKeyHelper;
|
2023-10-02 18:10:11 +00:00
|
|
|
use MediaWiki\Config\ServiceOptions;
|
2022-11-19 21:37:44 +00:00
|
|
|
use MediaWiki\HookContainer\HookContainer;
|
|
|
|
|
use MediaWiki\HookContainer\HookRunner;
|
2023-10-02 18:10:11 +00:00
|
|
|
use MediaWiki\Linker\LinksMigration;
|
2022-04-26 15:48:03 +00:00
|
|
|
use MediaWiki\MainConfigNames;
|
2022-11-14 17:55:42 +00:00
|
|
|
use MediaWiki\Page\PageIdentity;
|
2021-08-03 16:01:36 +00:00
|
|
|
use MediaWiki\Page\PageIdentityValue;
|
|
|
|
|
use MediaWiki\Page\PageReference;
|
2023-03-01 20:33:26 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2023-09-18 13:56:39 +00:00
|
|
|
use MediaWiki\Title\TitleValue;
|
2022-05-19 05:02:12 +00:00
|
|
|
use Wikimedia\Rdbms\Database;
|
2023-07-05 08:57:11 +00:00
|
|
|
use Wikimedia\Rdbms\IConnectionProvider;
|
2023-06-09 07:46:03 +00:00
|
|
|
use Wikimedia\Rdbms\IReadableDatabase;
|
2019-06-06 19:56:18 +00:00
|
|
|
use Wikimedia\Rdbms\IResultWrapper;
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
use Wikimedia\Rdbms\SelectQueryBuilder;
|
2017-02-19 05:03:13 +00:00
|
|
|
|
2011-03-18 18:12:58 +00:00
|
|
|
/**
|
|
|
|
|
* Class for fetching backlink lists, approximate backlink counts and
|
|
|
|
|
* partitions. This is a shared cache.
|
|
|
|
|
*
|
|
|
|
|
* Instances of this class should typically be fetched with the method
|
2021-09-08 22:07:01 +00:00
|
|
|
* ::getBacklinkCache() from the BacklinkCacheFactory service.
|
2011-03-18 18:12:58 +00:00
|
|
|
*
|
|
|
|
|
* Ideally you should only get your backlinks from here when you think
|
2021-09-08 22:07:01 +00:00
|
|
|
* there is some advantage in caching them. Otherwise, it's just a waste
|
2011-03-18 18:12:58 +00:00
|
|
|
* of memory.
|
2009-02-16 14:26:34 +00:00
|
|
|
*
|
2011-03-18 18:12:58 +00:00
|
|
|
* Introduced by r47317
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
|
|
|
|
class BacklinkCache {
|
2023-10-02 18:10:11 +00:00
|
|
|
/**
|
|
|
|
|
* @internal Used by ServiceWiring.php
|
|
|
|
|
*/
|
|
|
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
|
|
|
MainConfigNames::UpdateRowsPerJob,
|
|
|
|
|
];
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
|
2011-03-18 18:12:58 +00:00
|
|
|
/**
|
|
|
|
|
* Multi dimensions array representing batches. Keys are:
|
|
|
|
|
* > (string) links table name
|
2013-11-09 20:33:59 +00:00
|
|
|
* > (int) batch size
|
2011-03-18 18:12:58 +00:00
|
|
|
* > 'numRows' : Number of rows for this link table
|
2016-08-13 01:10:40 +00:00
|
|
|
* > 'batches' : [ $start, $end ]
|
2011-03-18 18:12:58 +00:00
|
|
|
*
|
|
|
|
|
* @see BacklinkCache::partitionResult()
|
2015-11-03 10:56:30 +00:00
|
|
|
* @var array[]
|
2011-03-18 18:12:58 +00:00
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $partitionCache = [];
|
2011-03-18 18:12:58 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Contains the whole links from a database result.
|
|
|
|
|
* This is raw data that will be partitioned in $partitionCache
|
|
|
|
|
*
|
2022-11-12 21:05:08 +00:00
|
|
|
* Initialized with BacklinkCache::queryLinks()
|
2023-02-20 17:04:32 +00:00
|
|
|
*
|
2019-06-06 19:56:18 +00:00
|
|
|
* @var IResultWrapper[]
|
2011-03-18 18:12:58 +00:00
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $fullResultCache = [];
|
2011-03-18 18:12:58 +00:00
|
|
|
|
2021-09-08 17:19:11 +00:00
|
|
|
/** @var WANObjectCache */
|
2018-01-10 18:02:02 +00:00
|
|
|
protected $wanCache;
|
|
|
|
|
|
2022-11-19 21:37:44 +00:00
|
|
|
/** @var HookRunner */
|
|
|
|
|
private $hookRunner;
|
|
|
|
|
|
2011-03-18 18:12:58 +00:00
|
|
|
/**
|
2021-08-03 16:01:36 +00:00
|
|
|
* Local copy of a PageReference object
|
|
|
|
|
* @var PageReference
|
2011-03-18 18:12:58 +00:00
|
|
|
*/
|
2021-08-03 16:01:36 +00:00
|
|
|
protected $page;
|
2011-03-18 18:12:58 +00:00
|
|
|
|
2020-05-15 22:16:46 +00:00
|
|
|
private const CACHE_EXPIRY = 3600;
|
2023-07-05 08:57:11 +00:00
|
|
|
private IConnectionProvider $dbProvider;
|
2023-10-02 18:10:11 +00:00
|
|
|
private ServiceOptions $options;
|
|
|
|
|
private LinksMigration $linksMigration;
|
2009-02-16 14:26:34 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new BacklinkCache
|
2012-09-03 19:45:50 +00:00
|
|
|
*
|
2023-10-02 18:10:11 +00:00
|
|
|
* @param ServiceOptions $options
|
|
|
|
|
* @param LinksMigration $linksMigration
|
2021-09-08 17:19:11 +00:00
|
|
|
* @param WANObjectCache $wanCache
|
2022-11-19 21:37:44 +00:00
|
|
|
* @param HookContainer $hookContainer
|
2023-07-05 08:57:11 +00:00
|
|
|
* @param IConnectionProvider $dbProvider
|
2021-08-03 16:01:36 +00:00
|
|
|
* @param PageReference $page Page to create a backlink cache for
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
2022-11-19 21:37:44 +00:00
|
|
|
public function __construct(
|
2023-10-02 18:10:11 +00:00
|
|
|
ServiceOptions $options,
|
|
|
|
|
LinksMigration $linksMigration,
|
2022-11-19 21:37:44 +00:00
|
|
|
WANObjectCache $wanCache,
|
|
|
|
|
HookContainer $hookContainer,
|
2023-07-05 08:57:11 +00:00
|
|
|
IConnectionProvider $dbProvider,
|
2022-11-19 21:37:44 +00:00
|
|
|
PageReference $page
|
|
|
|
|
) {
|
2023-10-02 18:10:11 +00:00
|
|
|
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
|
|
|
|
$this->options = $options;
|
|
|
|
|
$this->linksMigration = $linksMigration;
|
2021-08-03 16:01:36 +00:00
|
|
|
$this->page = $page;
|
2021-09-08 17:19:11 +00:00
|
|
|
$this->wanCache = $wanCache;
|
2022-11-19 21:37:44 +00:00
|
|
|
$this->hookRunner = new HookRunner( $hookContainer );
|
2023-07-05 08:57:11 +00:00
|
|
|
$this->dbProvider = $dbProvider;
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
|
2021-09-08 17:19:11 +00:00
|
|
|
/**
|
|
|
|
|
* @since 1.37
|
|
|
|
|
* @return PageReference
|
|
|
|
|
*/
|
|
|
|
|
public function getPage(): PageReference {
|
|
|
|
|
return $this->page;
|
2012-09-03 19:45:50 +00:00
|
|
|
}
|
|
|
|
|
|
2011-03-18 18:12:58 +00:00
|
|
|
/**
|
2016-09-05 20:21:26 +00:00
|
|
|
* Get the replica DB connection to the database
|
2022-05-19 05:02:12 +00:00
|
|
|
*
|
2023-06-09 07:46:03 +00:00
|
|
|
* @return IReadableDatabase
|
2011-03-18 18:12:58 +00:00
|
|
|
*/
|
2009-02-16 14:26:34 +00:00
|
|
|
protected function getDB() {
|
2023-07-05 08:57:11 +00:00
|
|
|
return $this->dbProvider->getReplicaDatabase();
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the backlinks for a given table. Cached in process memory only.
|
2013-12-03 13:59:09 +00:00
|
|
|
* @param string $table
|
|
|
|
|
* @param int|bool $startId
|
|
|
|
|
* @param int|bool $endId
|
2021-08-03 16:01:36 +00:00
|
|
|
* @param int|float $max Integer, or INF for no max
|
2022-11-14 17:55:42 +00:00
|
|
|
* @return Iterator<PageIdentity>
|
2021-08-03 16:01:36 +00:00
|
|
|
* @since 1.37
|
|
|
|
|
*/
|
|
|
|
|
public function getLinkPages(
|
|
|
|
|
string $table, $startId = false, $endId = false, $max = INF
|
|
|
|
|
): Iterator {
|
2022-11-14 17:55:42 +00:00
|
|
|
foreach ( $this->queryLinks( $table, $startId, $endId, $max ) as $row ) {
|
|
|
|
|
yield PageIdentityValue::localIdentity(
|
|
|
|
|
$row->page_id, $row->page_namespace, $row->page_title );
|
|
|
|
|
}
|
2021-08-03 16:01:36 +00:00
|
|
|
}
|
|
|
|
|
|
2013-04-04 18:51:22 +00:00
|
|
|
/**
|
|
|
|
|
* Get the backlinks for a given table. Cached in process memory only.
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table
|
|
|
|
|
* @param int|bool $startId
|
|
|
|
|
* @param int|bool $endId
|
2015-08-27 09:20:54 +00:00
|
|
|
* @param int $max
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $select 'all' or 'ids'
|
2019-06-06 19:56:18 +00:00
|
|
|
* @return IResultWrapper
|
2013-04-04 18:51:22 +00:00
|
|
|
*/
|
2013-11-09 20:33:59 +00:00
|
|
|
protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
|
2013-04-04 18:51:22 +00:00
|
|
|
if ( !$startId && !$endId && is_infinite( $max )
|
2013-11-17 20:36:27 +00:00
|
|
|
&& isset( $this->fullResultCache[$table] )
|
|
|
|
|
) {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( __METHOD__ . ": got results from cache" );
|
2013-04-04 18:51:22 +00:00
|
|
|
$res = $this->fullResultCache[$table];
|
|
|
|
|
} else {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( __METHOD__ . ": got results from DB" );
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder = $this->initQueryBuilderForTable( $table, $select );
|
2020-05-07 20:17:02 +00:00
|
|
|
$fromField = $this->getPrefix( $table ) . '_from';
|
2009-02-16 14:26:34 +00:00
|
|
|
// Use the from field in the condition rather than the joined page_id,
|
|
|
|
|
// because databases are stupid and don't necessarily propagate indexes.
|
|
|
|
|
if ( $startId ) {
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->where(
|
2023-10-21 22:01:51 +00:00
|
|
|
$this->getDB()->expr( $fromField, '>=', $startId )
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
);
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
if ( $endId ) {
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->where(
|
2023-10-21 22:01:51 +00:00
|
|
|
$this->getDB()->expr( $fromField, '<=', $endId )
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
);
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->orderBy( $fromField );
|
2013-04-04 18:51:22 +00:00
|
|
|
if ( is_finite( $max ) && $max > 0 ) {
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->limit( $max );
|
2013-04-04 18:51:22 +00:00
|
|
|
}
|
2010-05-30 14:48:30 +00:00
|
|
|
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$res = $queryBuilder->caller( __METHOD__ )->fetchResultSet();
|
2009-02-16 14:26:34 +00:00
|
|
|
|
2013-11-19 23:35:37 +00:00
|
|
|
if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
|
2013-04-04 18:51:22 +00:00
|
|
|
// The full results fit within the limit, so cache them
|
|
|
|
|
$this->fullResultCache[$table] = $res;
|
|
|
|
|
} else {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( __METHOD__ . ": results from DB were uncacheable" );
|
2013-04-04 18:51:22 +00:00
|
|
|
}
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2010-05-30 14:48:30 +00:00
|
|
|
|
2013-04-04 18:51:22 +00:00
|
|
|
return $res;
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the field name prefix for a given table
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table
|
2012-02-09 21:33:27 +00:00
|
|
|
* @return null|string
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
|
|
|
|
protected function getPrefix( $table ) {
|
2016-02-17 09:09:32 +00:00
|
|
|
static $prefixes = [
|
2013-03-07 16:50:43 +00:00
|
|
|
'pagelinks' => 'pl',
|
|
|
|
|
'imagelinks' => 'il',
|
2009-02-16 14:26:34 +00:00
|
|
|
'categorylinks' => 'cl',
|
|
|
|
|
'templatelinks' => 'tl',
|
2013-03-07 16:50:43 +00:00
|
|
|
'redirect' => 'rd',
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
if ( isset( $prefixes[$table] ) ) {
|
|
|
|
|
return $prefixes[$table];
|
|
|
|
|
} else {
|
2011-08-13 22:42:09 +00:00
|
|
|
$prefix = null;
|
2021-10-25 19:15:52 +00:00
|
|
|
// @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
|
2022-11-19 21:37:44 +00:00
|
|
|
$this->hookRunner->onBacklinkCacheGetPrefix( $table, $prefix );
|
2013-04-20 17:18:13 +00:00
|
|
|
if ( $prefix ) {
|
2011-08-13 22:42:09 +00:00
|
|
|
return $prefix;
|
|
|
|
|
} else {
|
2023-06-08 22:30:57 +00:00
|
|
|
throw new LogicException( "Invalid table \"$table\" in " . __CLASS__ );
|
2011-08-13 22:42:09 +00:00
|
|
|
}
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
* Initialize a new SelectQueryBuilder for selecting backlinks,
|
|
|
|
|
* with a join on the page table if needed.
|
|
|
|
|
*
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
* @param string $select
|
|
|
|
|
* @return SelectQueryBuilder
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
private function initQueryBuilderForTable( string $table, string $select ): SelectQueryBuilder {
|
2009-02-16 14:26:34 +00:00
|
|
|
$prefix = $this->getPrefix( $table );
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder = $this->getDB()->newSelectQueryBuilder();
|
|
|
|
|
$joinPageTable = $select !== 'ids';
|
|
|
|
|
|
|
|
|
|
if ( $select === 'ids' ) {
|
|
|
|
|
$queryBuilder->select( [ 'page_id' => $prefix . '_from' ] );
|
|
|
|
|
} else {
|
|
|
|
|
$queryBuilder->select( [ 'page_namespace', 'page_title', 'page_id' ] );
|
|
|
|
|
}
|
|
|
|
|
$queryBuilder->from( $table );
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If the table is one of the tables known to this method,
|
|
|
|
|
* we can use a nice join() method later, always joining on page_id={$prefix}_from.
|
|
|
|
|
* If the table is unknown here, and only supported via a hook,
|
|
|
|
|
* the hook only produces a single $conds array,
|
|
|
|
|
* so we have to use a traditional / ANSI-89 JOIN,
|
|
|
|
|
* with the page table just added to the list of tables and the join conds in the WHERE part.
|
|
|
|
|
*/
|
|
|
|
|
$knownTable = true;
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
switch ( $table ) {
|
|
|
|
|
case 'pagelinks':
|
2022-04-05 20:13:27 +00:00
|
|
|
case 'templatelinks':
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->where(
|
2023-10-02 18:10:11 +00:00
|
|
|
$this->linksMigration->getLinksConditions( $table, TitleValue::newFromPage( $this->page ) )
|
|
|
|
|
);
|
2022-04-05 20:13:27 +00:00
|
|
|
break;
|
2009-02-16 14:26:34 +00:00
|
|
|
case 'redirect':
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->where( [
|
2021-08-03 16:01:36 +00:00
|
|
|
"{$prefix}_namespace" => $this->page->getNamespace(),
|
|
|
|
|
"{$prefix}_title" => $this->page->getDBkey(),
|
2023-07-17 20:17:57 +00:00
|
|
|
"{$prefix}_interwiki" => [ '', null ],
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
] );
|
2009-02-16 14:26:34 +00:00
|
|
|
break;
|
|
|
|
|
case 'imagelinks':
|
|
|
|
|
case 'categorylinks':
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$queryBuilder->where( [
|
2021-08-03 16:01:36 +00:00
|
|
|
"{$prefix}_to" => $this->page->getDBkey(),
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
] );
|
2009-02-16 14:26:34 +00:00
|
|
|
break;
|
|
|
|
|
default:
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$knownTable = false;
|
2011-08-13 22:42:09 +00:00
|
|
|
$conds = null;
|
2022-11-19 21:37:44 +00:00
|
|
|
$this->hookRunner->onBacklinkCacheGetConditions( $table,
|
2023-04-22 13:57:00 +00:00
|
|
|
Title::newFromPageReference( $this->page ),
|
2021-10-25 19:15:52 +00:00
|
|
|
// @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
|
|
|
|
|
$conds
|
|
|
|
|
);
|
2013-04-20 17:18:13 +00:00
|
|
|
if ( !$conds ) {
|
2023-06-08 22:30:57 +00:00
|
|
|
throw new LogicException( "Invalid table \"$table\" in " . __CLASS__ );
|
2012-09-27 19:46:22 +00:00
|
|
|
}
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
if ( $joinPageTable ) {
|
|
|
|
|
$queryBuilder->table( 'page' ); // join condition in $conds
|
|
|
|
|
} else {
|
|
|
|
|
// remove any page_id condition from $conds
|
|
|
|
|
$conds = array_filter( (array)$conds, static function ( $clause ) { // kind of janky
|
|
|
|
|
return !preg_match( '/(\b|=)page_id(\b|=)/', (string)$clause );
|
|
|
|
|
} );
|
|
|
|
|
}
|
|
|
|
|
$queryBuilder->where( $conds );
|
|
|
|
|
break;
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2010-05-30 14:48:30 +00:00
|
|
|
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
if ( $knownTable && $joinPageTable ) {
|
|
|
|
|
$queryBuilder->join( 'page', null, "page_id={$prefix}_from" );
|
|
|
|
|
}
|
|
|
|
|
if ( $joinPageTable ) {
|
|
|
|
|
$queryBuilder->straightJoinOption();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $queryBuilder;
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
|
2012-11-08 22:01:40 +00:00
|
|
|
/**
|
|
|
|
|
* Check if there are any backlinks
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table
|
2012-11-08 22:01:40 +00:00
|
|
|
* @return bool
|
|
|
|
|
*/
|
|
|
|
|
public function hasLinks( $table ) {
|
|
|
|
|
return ( $this->getNumLinks( $table, 1 ) > 0 );
|
|
|
|
|
}
|
|
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
/**
|
|
|
|
|
* Get the approximate number of backlinks
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table
|
2022-03-08 19:01:54 +00:00
|
|
|
* @param int|float $max Only count up to this many backlinks, or INF for no max
|
2014-04-18 23:19:46 +00:00
|
|
|
* @return int
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
2012-11-08 22:01:40 +00:00
|
|
|
public function getNumLinks( $table, $max = INF ) {
|
2009-02-16 14:26:34 +00:00
|
|
|
if ( isset( $this->partitionCache[$table] ) ) {
|
|
|
|
|
$entry = reset( $this->partitionCache[$table] );
|
2013-11-17 20:36:27 +00:00
|
|
|
|
2012-11-08 22:01:40 +00:00
|
|
|
return min( $max, $entry['numRows'] );
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2012-11-27 17:26:02 +00:00
|
|
|
if ( isset( $this->fullResultCache[$table] ) ) {
|
2012-11-08 22:01:40 +00:00
|
|
|
return min( $max, $this->fullResultCache[$table]->numRows() );
|
2012-11-27 17:26:02 +00:00
|
|
|
}
|
|
|
|
|
|
2022-05-19 05:02:12 +00:00
|
|
|
$count = $this->wanCache->getWithSetCallback(
|
|
|
|
|
$this->wanCache->makeKey(
|
|
|
|
|
'numbacklinks',
|
|
|
|
|
CacheKeyHelper::getKeyForPage( $this->page ),
|
|
|
|
|
$table
|
|
|
|
|
),
|
|
|
|
|
self::CACHE_EXPIRY,
|
|
|
|
|
function ( $oldValue, &$ttl, array &$setOpts ) use ( $table, $max ) {
|
|
|
|
|
$setOpts += Database::getCacheSetOptions( $this->getDB() );
|
|
|
|
|
|
|
|
|
|
if ( is_infinite( $max ) ) {
|
|
|
|
|
// Use partition() since it will batch the query and skip the JOIN.
|
|
|
|
|
// Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
|
2023-10-02 18:10:11 +00:00
|
|
|
$batchSize = $this->options->get( MainConfigNames::UpdateRowsPerJob );
|
2022-05-19 05:02:12 +00:00
|
|
|
$this->partition( $table, $batchSize );
|
|
|
|
|
$value = $this->partitionCache[$table][$batchSize]['numRows'];
|
|
|
|
|
} else {
|
|
|
|
|
// Fetch the full title info, since the caller will likely need it.
|
|
|
|
|
// Cache the row count if the result set limit made no difference.
|
|
|
|
|
$value = iterator_count( $this->getLinkPages( $table, false, false, $max ) );
|
|
|
|
|
if ( $value >= $max ) {
|
|
|
|
|
$ttl = WANObjectCache::TTL_UNCACHEABLE;
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-11-27 17:26:02 +00:00
|
|
|
|
2022-05-19 05:02:12 +00:00
|
|
|
return $value;
|
2013-11-09 20:33:59 +00:00
|
|
|
}
|
2022-05-19 05:02:12 +00:00
|
|
|
);
|
2010-05-30 14:48:30 +00:00
|
|
|
|
2013-04-04 18:51:22 +00:00
|
|
|
return min( $max, $count );
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Partition the backlinks into batches.
|
2011-06-26 11:48:55 +00:00
|
|
|
* Returns an array giving the start and end of each range. The first
|
2011-03-18 18:12:58 +00:00
|
|
|
* batch has a start of false, and the last batch has an end of false.
|
2009-02-16 14:26:34 +00:00
|
|
|
*
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param string $table The links table name
|
|
|
|
|
* @param int $batchSize
|
|
|
|
|
* @return array
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
|
|
|
|
public function partition( $table, $batchSize ) {
|
|
|
|
|
if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( __METHOD__ . ": got from partition cache" );
|
2013-11-17 20:36:27 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
return $this->partitionCache[$table][$batchSize]['batches'];
|
|
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
$this->partitionCache[$table][$batchSize] = false;
|
|
|
|
|
$cacheEntry =& $this->partitionCache[$table][$batchSize];
|
|
|
|
|
|
|
|
|
|
if ( isset( $this->fullResultCache[$table] ) ) {
|
|
|
|
|
$cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( __METHOD__ . ": got from full result cache" );
|
2013-11-17 20:36:27 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
return $cacheEntry['batches'];
|
|
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2022-05-19 05:02:12 +00:00
|
|
|
$cacheEntry = $this->wanCache->getWithSetCallback(
|
|
|
|
|
$this->wanCache->makeKey(
|
|
|
|
|
'backlinks',
|
|
|
|
|
CacheKeyHelper::getKeyForPage( $this->page ),
|
|
|
|
|
$table,
|
|
|
|
|
$batchSize
|
|
|
|
|
),
|
|
|
|
|
self::CACHE_EXPIRY,
|
|
|
|
|
function ( $oldValue, &$ttl, array &$setOpts ) use ( $table, $batchSize ) {
|
|
|
|
|
$setOpts += Database::getCacheSetOptions( $this->getDB() );
|
|
|
|
|
|
|
|
|
|
$value = [ 'numRows' => 0, 'batches' => [] ];
|
|
|
|
|
|
|
|
|
|
// Do the selects in batches to avoid client-side OOMs (T45452).
|
|
|
|
|
// Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
|
2023-12-07 23:08:12 +00:00
|
|
|
$selectSize = max( $batchSize, 200_000 - ( 200_000 % $batchSize ) );
|
2022-05-19 05:02:12 +00:00
|
|
|
$start = false;
|
|
|
|
|
do {
|
|
|
|
|
$res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
|
|
|
|
|
$partitions = $this->partitionResult( $res, $batchSize, false );
|
|
|
|
|
// Merge the link count and range partitions for this chunk
|
|
|
|
|
$value['numRows'] += $partitions['numRows'];
|
|
|
|
|
$value['batches'] = array_merge( $value['batches'], $partitions['batches'] );
|
|
|
|
|
if ( count( $partitions['batches'] ) ) {
|
|
|
|
|
[ , $lEnd ] = end( $partitions['batches'] );
|
|
|
|
|
$start = $lEnd + 1; // pick up after this inclusive range
|
|
|
|
|
}
|
|
|
|
|
} while ( $partitions['numRows'] >= $selectSize );
|
|
|
|
|
// Make sure the first range has start=false and the last one has end=false
|
|
|
|
|
if ( count( $value['batches'] ) ) {
|
|
|
|
|
$value['batches'][0][0] = false;
|
|
|
|
|
$value['batches'][count( $value['batches'] ) - 1][1] = false;
|
|
|
|
|
}
|
2010-05-30 14:48:30 +00:00
|
|
|
|
2022-05-19 05:02:12 +00:00
|
|
|
return $value;
|
2013-04-04 18:51:22 +00:00
|
|
|
}
|
2017-04-20 19:26:23 +00:00
|
|
|
);
|
2013-11-17 20:36:27 +00:00
|
|
|
|
2009-02-16 14:26:34 +00:00
|
|
|
return $cacheEntry['batches'];
|
|
|
|
|
}
|
|
|
|
|
|
2010-02-14 22:07:30 +00:00
|
|
|
/**
|
2009-02-16 14:26:34 +00:00
|
|
|
* Partition a DB result with backlinks in it into batches
|
2019-06-06 19:56:18 +00:00
|
|
|
* @param IResultWrapper $res Database result
|
2014-04-18 23:19:46 +00:00
|
|
|
* @param int $batchSize
|
|
|
|
|
* @param bool $isComplete Whether $res includes all the backlinks
|
2013-04-04 18:51:22 +00:00
|
|
|
* @return array
|
2009-02-16 14:26:34 +00:00
|
|
|
*/
|
2013-04-04 18:51:22 +00:00
|
|
|
protected function partitionResult( $res, $batchSize, $isComplete = true ) {
|
2016-02-17 09:09:32 +00:00
|
|
|
$batches = [];
|
2009-02-16 14:26:34 +00:00
|
|
|
$numRows = $res->numRows();
|
|
|
|
|
$numBatches = ceil( $numRows / $batchSize );
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2009-02-19 02:10:55 +00:00
|
|
|
for ( $i = 0; $i < $numBatches; $i++ ) {
|
2013-04-04 18:51:22 +00:00
|
|
|
if ( $i == 0 && $isComplete ) {
|
2009-02-19 02:10:55 +00:00
|
|
|
$start = false;
|
|
|
|
|
} else {
|
2013-04-04 18:51:22 +00:00
|
|
|
$rowNum = $i * $batchSize;
|
2009-02-19 02:10:55 +00:00
|
|
|
$res->seek( $rowNum );
|
|
|
|
|
$row = $res->fetchObject();
|
2013-04-04 18:51:22 +00:00
|
|
|
$start = (int)$row->page_id;
|
2009-02-19 02:10:55 +00:00
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2013-04-04 18:51:22 +00:00
|
|
|
if ( $i == ( $numBatches - 1 ) && $isComplete ) {
|
2009-02-19 02:10:55 +00:00
|
|
|
$end = false;
|
|
|
|
|
} else {
|
2013-04-04 18:51:22 +00:00
|
|
|
$rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
|
2009-02-19 02:10:55 +00:00
|
|
|
$res->seek( $rowNum );
|
|
|
|
|
$row = $res->fetchObject();
|
2013-04-04 18:51:22 +00:00
|
|
|
$end = (int)$row->page_id;
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2009-12-04 01:55:05 +00:00
|
|
|
|
2021-11-19 23:19:42 +00:00
|
|
|
# Check order
|
2009-12-04 01:55:05 +00:00
|
|
|
if ( $start && $end && $start > $end ) {
|
2023-06-08 22:30:57 +00:00
|
|
|
throw new RuntimeException( __METHOD__ . ': Internal error: query result out of order' );
|
2009-12-04 01:55:05 +00:00
|
|
|
}
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$batches[] = [ $start, $end ];
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2010-05-30 14:48:30 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
return [ 'numRows' => $numRows, 'batches' => $batches ];
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|
2015-02-12 23:03:24 +00:00
|
|
|
|
2021-08-03 16:01:36 +00:00
|
|
|
/**
|
|
|
|
|
* Get a PageIdentity iterator for cascade-protected template/file use backlinks
|
|
|
|
|
*
|
2022-11-14 17:55:42 +00:00
|
|
|
* @return Iterator<PageIdentity>
|
2021-08-03 16:01:36 +00:00
|
|
|
* @since 1.37
|
|
|
|
|
*/
|
|
|
|
|
public function getCascadeProtectedLinkPages(): Iterator {
|
2022-11-14 17:55:42 +00:00
|
|
|
foreach ( $this->getCascadeProtectedLinksInternal() as $row ) {
|
|
|
|
|
yield PageIdentityValue::localIdentity(
|
|
|
|
|
$row->page_id, $row->page_namespace, $row->page_title );
|
|
|
|
|
}
|
2021-08-03 16:01:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get an array of cascade-protected template/file use backlinks
|
|
|
|
|
*
|
|
|
|
|
* @return stdClass[]
|
|
|
|
|
*/
|
|
|
|
|
private function getCascadeProtectedLinksInternal(): array {
|
2015-02-12 23:03:24 +00:00
|
|
|
$dbr = $this->getDB();
|
|
|
|
|
|
|
|
|
|
// @todo: use UNION without breaking tests that use temp tables
|
2016-02-17 09:09:32 +00:00
|
|
|
$resSets = [];
|
2023-10-02 18:10:11 +00:00
|
|
|
$linkConds = $this->linksMigration->getLinksConditions(
|
|
|
|
|
'templatelinks', TitleValue::newFromPage( $this->page )
|
|
|
|
|
);
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$resSets[] = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'page_namespace', 'page_title', 'page_id' ] )
|
|
|
|
|
->from( 'templatelinks' )
|
|
|
|
|
->join( 'page_restrictions', null, 'tl_from = pr_page' )
|
|
|
|
|
->join( 'page', null, 'page_id = tl_from' )
|
|
|
|
|
->where( $linkConds )
|
|
|
|
|
->andWhere( [ 'pr_cascade' => 1 ] )
|
|
|
|
|
->distinct()
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2021-08-03 16:01:36 +00:00
|
|
|
if ( $this->page->getNamespace() === NS_FILE ) {
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
$resSets[] = $dbr->newSelectQueryBuilder()
|
|
|
|
|
->select( [ 'page_namespace', 'page_title', 'page_id' ] )
|
|
|
|
|
->from( 'imagelinks' )
|
|
|
|
|
->join( 'page_restrictions', null, 'il_from = pr_page' )
|
|
|
|
|
->join( 'page', null, 'page_id = il_from' )
|
|
|
|
|
->where( [
|
2021-08-03 16:01:36 +00:00
|
|
|
'il_to' => $this->page->getDBkey(),
|
2015-02-12 23:03:24 +00:00
|
|
|
'pr_cascade' => 1,
|
Use SelectQueryBuilder in BacklinkCache
Mostly replace the traditional / ANSI-89 JOINs in this class (i.e.,
select from two tables, empty $joinConds, actual join condition in
$conds) with proper JOINs, and also mostly replace the “kind of janky”
$conds filter in queryLinks(): instead, initQueryBuilderForTable(), the
new replacement for getConditions(), now knows whether the page table
needs to be joined or not.
However, the hook still needs to be supported (there is one extension in
codesearch that actually uses it and seems to be maintained), and in
that hook we don’t know which part of the $conds are JOIN conditions; in
this case, we have to keep the “kind of janky” filter, and also join the
page table using table() instead of join().
Bug: T311866
Change-Id: Ic49bf5d8fd136d296752e8807f6032bfae74dcdf
2022-11-28 16:08:48 +00:00
|
|
|
] )
|
|
|
|
|
->distinct()
|
|
|
|
|
->caller( __METHOD__ )->fetchResultSet();
|
2015-02-12 23:03:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Combine and de-duplicate the results
|
2016-02-17 09:09:32 +00:00
|
|
|
$mergedRes = [];
|
2015-02-12 23:03:24 +00:00
|
|
|
foreach ( $resSets as $res ) {
|
|
|
|
|
foreach ( $res as $row ) {
|
2021-08-03 16:01:36 +00:00
|
|
|
// Index by page_id to remove duplicates
|
2015-02-12 23:03:24 +00:00
|
|
|
$mergedRes[$row->page_id] = $row;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-03 16:01:36 +00:00
|
|
|
// Now that we've de-duplicated, throw away the keys
|
|
|
|
|
return array_values( $mergedRes );
|
2015-02-12 23:03:24 +00:00
|
|
|
}
|
2009-02-16 14:26:34 +00:00
|
|
|
}
|