RCFilters: Filter duplicates when filtering for multiple tags
When filtering for multiple tags, add DISTINCT to the query.
To make this not result in terrible query performance, we also
have to add the rc_id to the end of the ORDER BY, and add
a GROUP BY equal to the ORDER BY.
Make ChangeTags::modifyDisplayQuery() take an array of tags
instead of a pipe-separated string. This allows each caller
to explicitly opt into supporting multi-tag filters and the
conditional GROUP BY mess that that entails.
Only support multi-tag filters in SpecialRecentChanges and
SpecialRecentchangesLinked. This means we don't have to adapt
the queries in HistoryAction, ContribsPager, LogPager and
NewPagesPager to deal with a possible DISTINCT modifier.
Example query with no tag filters:
SELECT rc_id, ... FROM recentchanges ... ORDER BY rc_timestamp DESC
Example query with one tag filter:
SELECT rc_id, ... FROM recentchanges JOIN change_tag ON ...
WHERE ... AND ct_tag='foo' ORDER BY rc_timestamp DESC
Example query with two tag filters:
SELECT DISTINCT rc_id, ... FROM recentchanges JOIN change_tag ON ...
WHERE ... AND ct_tag IN ('foo', 'bar') GROUP BY rc_timestamp, rc_id
ORDER BY rc_timestamp DESC, rc_id DESC
Bug: T168501
Change-Id: I54a270a563d99b143b55ce83c7d6f70ac4861c64
This commit is contained in:
parent
024a0a9d84
commit
2a04f2dbf9
3 changed files with 51 additions and 15 deletions
|
|
@ -643,12 +643,18 @@ class ChangeTags {
|
|||
* Handles selecting tags, and filtering.
|
||||
* Needs $tables to be set up properly, so we can figure out which join conditions to use.
|
||||
*
|
||||
* WARNING: If $filter_tag contains more than one tag, this function will add DISTINCT,
|
||||
* which may cause performance problems for your query unless you put the ID field of your
|
||||
* table at the end of the ORDER BY, and set a GROUP BY equal to the ORDER BY. For example,
|
||||
* if you had ORDER BY foo_timestamp DESC, you will now need GROUP BY foo_timestamp, foo_id
|
||||
* ORDER BY foo_timestamp DESC, foo_id DESC.
|
||||
*
|
||||
* @param string|array $tables Table names, see Database::select
|
||||
* @param string|array $fields Fields used in query, see Database::select
|
||||
* @param string|array $conds Conditions used in query, see Database::select
|
||||
* @param array $join_conds Join conditions, see Database::select
|
||||
* @param array $options Options, see Database::select
|
||||
* @param bool|string $filter_tag Tag to select on
|
||||
* @param bool|string|array $filter_tag Tag(s) to select on
|
||||
*
|
||||
* @throws MWException When unable to determine appropriate JOIN condition for tagging
|
||||
*/
|
||||
|
|
@ -660,7 +666,7 @@ class ChangeTags {
|
|||
$filter_tag = $wgRequest->getVal( 'tagfilter' );
|
||||
}
|
||||
|
||||
// Figure out which conditions can be done.
|
||||
// Figure out which ID field to use
|
||||
if ( in_array( 'recentchanges', $tables ) ) {
|
||||
$join_cond = 'ct_rc_id=rc_id';
|
||||
} elseif ( in_array( 'logging', $tables ) ) {
|
||||
|
|
@ -683,7 +689,13 @@ class ChangeTags {
|
|||
|
||||
$tables[] = 'change_tag';
|
||||
$join_conds['change_tag'] = [ 'INNER JOIN', $join_cond ];
|
||||
$conds['ct_tag'] = explode( '|', $filter_tag );
|
||||
$conds['ct_tag'] = $filter_tag;
|
||||
if (
|
||||
is_array( $filter_tag ) && count( $filter_tag ) > 1 &&
|
||||
!in_array( 'DISTINCT', $options )
|
||||
) {
|
||||
$options[] = 'DISTINCT';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -962,7 +974,7 @@ class ChangeTags {
|
|||
|
||||
// tags cannot contain commas (used as a delimiter in tag_summary table),
|
||||
// pipe (used as a delimiter between multiple tags in
|
||||
// modifyDisplayQuery), or slashes (would break tag description messages in
|
||||
// SpecialRecentchanges and friends), or slashes (would break tag description messages in
|
||||
// MediaWiki namespace)
|
||||
if ( strpos( $tag, ',' ) !== false || strpos( $tag, '|' ) !== false
|
||||
|| strpos( $tag, '/' ) !== false ) {
|
||||
|
|
|
|||
|
|
@ -422,13 +422,14 @@ class SpecialRecentChanges extends ChangesListSpecialPage {
|
|||
$fields[] = 'page_latest';
|
||||
$join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ];
|
||||
|
||||
$tagFilter = explode( '|', $opts['tagfilter'] );
|
||||
ChangeTags::modifyDisplayQuery(
|
||||
$tables,
|
||||
$fields,
|
||||
$conds,
|
||||
$join_conds,
|
||||
$query_options,
|
||||
$opts['tagfilter']
|
||||
$tagFilter
|
||||
);
|
||||
|
||||
if ( !$this->runMainQueryHook( $tables, $fields, $conds, $query_options, $join_conds,
|
||||
|
|
@ -441,13 +442,24 @@ class SpecialRecentChanges extends ChangesListSpecialPage {
|
|||
return false;
|
||||
}
|
||||
|
||||
$orderByAndLimit = [
|
||||
'ORDER BY' => 'rc_timestamp DESC',
|
||||
'LIMIT' => $opts['limit']
|
||||
];
|
||||
if ( in_array( 'DISTINCT', $query_options ) ) {
|
||||
// ChangeTags::modifyDisplayQuery() adds DISTINCT when filtering on multiple tags.
|
||||
// In order to prevent DISTINCT from causing query performance problems,
|
||||
// we have to GROUP BY the primary key. This in turn requires us to add
|
||||
// the primary key to the end of the ORDER BY, and the old ORDER BY to the
|
||||
// start of the GROUP BY
|
||||
$orderByAndLimit['ORDER BY'] = 'rc_timestamp DESC, rc_id DESC';
|
||||
$orderByAndLimit['GROUP BY'] = 'rc_timestamp, rc_id';
|
||||
}
|
||||
// array_merge() is used intentionally here so that hooks can, should
|
||||
// they so desire, override the ORDER BY / LIMIT condition(s); prior to
|
||||
// MediaWiki 1.26 this used to use the plus operator instead, which meant
|
||||
// that extensions weren't able to change these conditions
|
||||
$query_options = array_merge( [
|
||||
'ORDER BY' => 'rc_timestamp DESC',
|
||||
'LIMIT' => $opts['limit'] ], $query_options );
|
||||
$query_options = array_merge( $orderByAndLimit, $query_options );
|
||||
$rows = $dbr->select(
|
||||
$tables,
|
||||
$fields,
|
||||
|
|
|
|||
|
|
@ -103,15 +103,33 @@ class SpecialRecentChangesLinked extends SpecialRecentChanges {
|
|||
$join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ];
|
||||
$select[] = 'page_latest';
|
||||
}
|
||||
|
||||
$tagFilter = explode( '|', $opts['tagfilter'] );
|
||||
ChangeTags::modifyDisplayQuery(
|
||||
$tables,
|
||||
$select,
|
||||
$conds,
|
||||
$join_conds,
|
||||
$query_options,
|
||||
$opts['tagfilter']
|
||||
$tagFilter
|
||||
);
|
||||
|
||||
if ( $dbr->unionSupportsOrderAndLimit() ) {
|
||||
if ( count( $tagFilter ) > 1 ) {
|
||||
// ChangeTags::modifyDisplayQuery() will have added DISTINCT.
|
||||
// To prevent this from causing query performance problems, we need to add
|
||||
// a GROUP BY, and add rc_id to the ORDER BY.
|
||||
$order = [
|
||||
'GROUP BY' => 'rc_timestamp, rc_id',
|
||||
'ORDER BY' => 'rc_timestamp DESC, rc_id DESC'
|
||||
];
|
||||
} else {
|
||||
$order = [ 'ORDER BY' => 'rc_timestamp DESC' ];
|
||||
}
|
||||
} else {
|
||||
$order = [];
|
||||
}
|
||||
|
||||
if ( !$this->runMainQueryHook( $tables, $select, $conds, $query_options, $join_conds,
|
||||
$opts )
|
||||
) {
|
||||
|
|
@ -181,12 +199,6 @@ class SpecialRecentChangesLinked extends SpecialRecentChanges {
|
|||
}
|
||||
}
|
||||
|
||||
if ( $dbr->unionSupportsOrderAndLimit() ) {
|
||||
$order = [ 'ORDER BY' => 'rc_timestamp DESC' ];
|
||||
} else {
|
||||
$order = [];
|
||||
}
|
||||
|
||||
$query = $dbr->selectSQLText(
|
||||
array_merge( $tables, [ $link_table ] ),
|
||||
$select,
|
||||
|
|
|
|||
Loading…
Reference in a new issue