SQLPlatform: Introduce buildComparison()

Builds a condition comparing multiple values, for use with indexes
that cover multiple fields, common when e.g. paging through results
or doing batch operations. Can also be to generate a simple comparison
without writing raw SQL (see T210206).

Update a few manually constructed conditions to use this method.
There are more maintenance scripts and API classes that use the
same patterns, but this is a start.

As you can see by the code I'm replacing, there are many ways to do
this. I picked the one used by maintenance/TableCleanup.php, since
I found it the easiest to understand.

Change-Id: Ic368a87fb5ce4c13608b03206cd68518ec9732d4
This commit is contained in:
Bartosz Dziewoński 2022-09-04 00:48:19 +02:00
parent af60bf699c
commit ec79aa3943
15 changed files with 192 additions and 162 deletions

View file

@ -2698,12 +2698,14 @@ class RevisionStore
return null;
}
}
$dbts = $db->addQuotes( $db->timestamp( $ts ) );
$revId = $db->selectField( 'revision', 'rev_id',
[
'rev_page' => $rev->getPageId( $this->wikiId ),
"rev_timestamp $op $dbts OR (rev_timestamp = $dbts AND rev_id $op $revisionIdValue )"
$db->buildComparison( $op, [
'rev_timestamp' => $db->timestamp( $ts ),
'rev_id' => $revisionIdValue,
] ),
],
__METHOD__,
[

View file

@ -188,15 +188,15 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
}
if ( $params['continue'] !== null ) {
$op = ( $dir == 'newer' ? '>' : '<' );
$op = ( $dir == 'newer' ? '>=' : '<=' );
$cont = explode( '|', $params['continue'] );
$this->dieContinueUsageIf( count( $cont ) != 2 );
$ts = $db->addQuotes( $db->timestamp( $cont[0] ) );
$rev_id = (int)$cont[1];
$this->dieContinueUsageIf( strval( $rev_id ) !== $cont[1] );
$this->addWhere( "$tsField $op $ts OR " .
"($tsField = $ts AND " .
"$idField $op= $rev_id)" );
$this->addWhere( $db->buildComparison( $op, [
$tsField => $db->timestamp( $cont[0] ),
$idField => (int)$cont[1],
] ) );
}
$this->addOption( 'LIMIT', $this->limit + 1 );

View file

@ -122,14 +122,13 @@ class ApiQueryBlocks extends ApiQueryBase {
if ( $params['continue'] !== null ) {
$cont = explode( '|', $params['continue'] );
$this->dieContinueUsageIf( count( $cont ) != 2 );
$op = ( $params['dir'] == 'newer' ? '>' : '<' );
$continueTimestamp = $db->addQuotes( $db->timestamp( $cont[0] ) );
$op = ( $params['dir'] == 'newer' ? '>=' : '<=' );
$continueId = (int)$cont[1];
$this->dieContinueUsageIf( $continueId != $cont[1] );
$this->addWhere( "ipb_timestamp $op $continueTimestamp OR " .
"(ipb_timestamp = $continueTimestamp AND " .
"ipb_id $op= $continueId)"
);
$this->addWhere( $db->buildComparison( $op, [
'ipb_timestamp' => $db->timestamp( $cont[0] ),
'ipb_id' => (int)$cont[1],
] ) );
}
if ( $params['ids'] ) {

View file

@ -129,15 +129,14 @@ class ApiQueryCategoryMembers extends ApiQueryGeneratorBase {
if ( $params['continue'] !== null ) {
$cont = explode( '|', $params['continue'] );
$this->dieContinueUsageIf( count( $cont ) != 2 );
$op = ( $dir === 'newer' ? '>' : '<' );
$op = ( $dir === 'newer' ? '>=' : '<=' );
$db = $this->getDB();
$continueTimestamp = $db->addQuotes( $db->timestamp( $cont[0] ) );
$continueFrom = (int)$cont[1];
$this->dieContinueUsageIf( $continueFrom != $cont[1] );
$this->addWhere( "cl_timestamp $op $continueTimestamp OR " .
"(cl_timestamp = $continueTimestamp AND " .
"cl_from $op= $continueFrom)"
);
$this->addWhere( $db->buildComparison( $op, [
'cl_timestamp' => $db->timestamp( $cont[0] ),
'cl_from' => (int)$cont[1],
] ) );
}
$this->addOption( 'USE INDEX', [ 'categorylinks' => 'cl_timestamp' ] );

View file

@ -387,6 +387,10 @@ class DBConnRef implements IMaintainableDatabase {
return $this->__call( __FUNCTION__, func_get_args() );
}
public function buildComparison( string $op, array $conds ): string {
return $this->__call( __FUNCTION__, func_get_args() );
}
public function makeList( array $a, $mode = self::LIST_COMMA ) {
return $this->__call( __FUNCTION__, func_get_args() );
}

View file

@ -3839,6 +3839,10 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware
return $this->platform->selectSQLText( $table, $vars, $conds, $fname, $options, $join_conds );
}
public function buildComparison( string $op, array $conds ): string {
return $this->platform->buildComparison( $op, $conds );
}
public function makeList( array $a, $mode = self::LIST_COMMA ) {
return $this->platform->makeList( $a, $mode );
}

View file

@ -132,6 +132,34 @@ interface ISQLPlatform {
*/
public function buildLeast( $fields, $values );
/**
* Build a condition comparing multiple values, for use with indexes that cover
* multiple fields, common when e.g. paging through results or doing batch operations.
*
* For example, you might be displaying a list of people ordered alphabetically by their last
* and first name, split across multiple pages. The first page of the results ended at Jane Doe.
* When building the query for the next page, you would use:
*
* $queryBuilder->where( $db->buildComparison( '>', [ 'last' => 'Doe', 'first' => 'Jane' ] ) );
*
* This will return people whose last name follows Doe, or whose last name is Doe and first name
* follows Jane.
*
* Note that the order of keys in the associative array $conds is significant,
* and must match the order of fields used by the index.
*
* You might also use it to generate a simple comparison without writing raw SQL:
*
* $db->buildComparison( '<=', [ 'key' => $val ] )
* // equivalent to:
* 'key <= ' . $db->addQuotes( $val )
*
* @param string $op Comparison operator, one of '>', '>=', '<', '<='
* @param array $conds Map of field names to their values to use in the comparison
* @return string SQL code
*/
public function buildComparison( string $op, array $conds ): string;
/**
* Makes an encoded list of strings from an array
*

View file

@ -157,6 +157,49 @@ class SQLPlatform implements ISQLPlatform {
return $sqlfunc . '(' . implode( ',', $encValues ) . ')';
}
public function buildComparison( string $op, array $conds ): string {
if ( !in_array( $op, [ '>', '>=', '<', '<=' ] ) ) {
throw new InvalidArgumentException( "Comparison operator must be one of '>', '>=', '<', '<='" );
}
if ( count( $conds ) === 0 ) {
throw new InvalidArgumentException( "Empty input" );
}
// Construct a condition string by starting with the least significant part of the index, and
// adding more significant parts progressively to the left of the string.
//
// For example, given $conds = [ 'a' => 4, 'b' => 7, 'c' => 1 ], this will generate a condition
// like this:
//
// WHERE a > 4
// OR (a = 4 AND (b > 7
// OR (b = 7 AND (c > 1))))
//
// …which is equivalent to the following, which might be easier to understand:
//
// WHERE a > 4
// OR a = 4 AND b > 7
// OR a = 4 AND b = 7 AND c > 1
//
// …and also equivalent to the following, using tuple comparison syntax, which is most intuitive
// but apparently performs worse:
//
// WHERE (a, b, c) > (4, 7, 1)
$sql = '';
foreach ( array_reverse( $conds ) as $field => $value ) {
$encValue = $this->quoter->addQuotes( $value );
if ( $sql === '' ) {
$sql = "$field $op $encValue";
// Change '>=' to '>' etc. for remaining fields, as the equality is handled separately
$op = rtrim( $op, '=' );
} else {
$sql = "$field $op $encValue OR ($field = $encValue AND ($sql))";
}
}
return $sql;
}
public function makeList( array $a, $mode = self::LIST_COMMA ) {
$first = true;
$list = '';

View file

@ -511,29 +511,6 @@ abstract class IndexPager extends ContextSource implements Pager {
* single column or multiple columns. Where we paginate on multiple columns,
* the sort order is defined by the order of the columns in $mIndexField.
*
* Some examples, with up to three columns. Each condition consists of inner
* conditions, at least one of which must be true (joined by OR):
*
* - column X, with offset value 'x':
* WHERE X>'x'
*
* - columns X and Y, with offsets 'x' and 'y':
* WHERE X>'x'
* OR ( X='x' AND Y>'y' )
*
* - columns X, Y and Z, with offsets 'x', 'y' and 'z':
* WHERE X>'x'
* OR ( X='x' AND Y>'y' )
* OR ( X='x' AND Y='y' AND Z>'z' )
*
* - and so on...
*
* (The examples assume we want the next page and do not want to include the
* offset in the results; otherwise the operators will be slightly different,
* as handled in buildQueryInfo.)
*
* Note that the above performs better than: WHERE (X,Y,Z)>('x','y','z').
*
* @param string[] $offsets The offset for each index field
* @param string[] $columns The name of each index field
* @param string $operator Operator for the final part of each inner
@ -543,42 +520,8 @@ abstract class IndexPager extends ContextSource implements Pager {
* @return string The conditions for getting results from the offset
*/
private function buildOffsetConds( $offsets, $columns, $operator ) {
$innerConds = [];
// $offsets and $columns are the same length
for ( $i = 1; $i <= count( $offsets ); $i++ ) {
$innerConds[] = $this->buildOffsetInnerConds(
array_slice( $offsets, 0, $i ),
array_slice( $columns, 0, $i ),
// When weak inequality is requested, only use the weak operator for the last item, because
// (A, B) >= (1, 2)
// is equivalent to:
// ((A > 1) OR (A = 1 AND B >= 2))
// and not:
// ((A >= 1) OR (A = 1 AND B >= 2))
$i === count( $offsets ) ? $operator : rtrim( $operator, '=' )
);
}
return $this->mDb->makeList( $innerConds, IDatabase::LIST_OR );
}
/**
* Build an inner part of an offset condition, consisting of inequalities
* joined by AND, as described in buildOffsetConds.
*
* @param string[] $offsets
* @param string[] $columns
* @param string $operator
* @return string The inner condition; to be concatenated in buildOffsetConds
*/
private function buildOffsetInnerConds( $offsets, $columns, $operator ) {
$conds = [];
while ( count( $offsets ) > 1 ) {
$conds[] = $columns[0] . '=' . $this->mDb->addQuotes( $offsets[0] );
array_shift( $columns );
array_shift( $offsets );
}
$conds[] = $columns[0] . $operator . $this->mDb->addQuotes( $offsets[0] );
return $this->mDb->makeList( $conds, IDatabase::LIST_AND );
$conds = array_combine( $columns, $offsets );
return $this->mDb->buildComparison( $operator, $conds );
}
/**

View file

@ -258,11 +258,7 @@ class BatchRowIterator implements RecursiveIterator {
/**
* Uses the primary key list and the maximal result row from the
* previous iteration to build an SQL condition sufficient for
* selecting the next page of results. All except the final key use
* `=` conditions while the final key uses a `>` condition
*
* Example output:
* [ '( foo = 42 AND bar > 7 ) OR ( foo > 42 )' ]
* selecting the next page of results.
*
* @return array The SQL conditions necessary to select the next set
* of rows in the batched query
@ -276,50 +272,12 @@ class BatchRowIterator implements RecursiveIterator {
$maximumValues = [];
foreach ( $this->primaryKey as $alias => $column ) {
$name = is_numeric( $alias ) ? $column : $alias;
$maximumValues[$column] = $this->db->addQuotes( $maxRow->{$name} );
}
$pkConditions = [];
// For example: If we have 3 primary keys
// first run through will generate
// col1 = 4 AND col2 = 7 AND col3 > 1
// second run through will generate
// col1 = 4 AND col2 > 7
// and the final run through will generate
// col1 > 4
while ( $maximumValues ) {
$pkConditions[] = $this->buildGreaterThanCondition( $maximumValues );
array_pop( $maximumValues );
$maximumValues[$column] = $maxRow->$name;
}
$conditions = $this->conditions;
$conditions[] = sprintf( '( %s )', implode( ' ) OR ( ', $pkConditions ) );
$conditions[] = $this->db->buildComparison( '>', $maximumValues );
return $conditions;
}
/**
* Given an array of column names and their maximum value generate
* an SQL condition where all keys except the last match $quotedMaximumValues
* exactly and the last column is greater than the matching value in
* $quotedMaximumValues
*
* @param array $quotedMaximumValues The maximum values quoted with
* $this->db->addQuotes()
* @return string An SQL condition that will select rows where all
* columns match the maximum value exactly except the last column
* which must be greater than the provided maximum value
*/
protected function buildGreaterThanCondition( array $quotedMaximumValues ) {
$keys = array_keys( $quotedMaximumValues );
$lastColumn = end( $keys );
$lastValue = array_pop( $quotedMaximumValues );
$conditions = [];
foreach ( $quotedMaximumValues as $column => $value ) {
$conditions[] = "$column = $value";
}
$conditions[] = "$lastColumn > $lastValue";
return implode( ' AND ', $conditions );
}
}

View file

@ -148,20 +148,12 @@ class TableCleanup extends Maintenance {
}
// Update the conditions to select the next batch.
// Construct a condition string by starting with the least significant part
// of the index, and adding more significant parts progressively to the left
// of the string.
$nextCond = '';
foreach ( array_reverse( $index ) as $field ) {
$conds = [];
foreach ( $index as $field ) {
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable $res has at at least one item
$encValue = $dbr->addQuotes( $row->$field );
if ( $nextCond === '' ) {
$nextCond = "$field > $encValue";
} else {
$nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))";
}
$conds[ $field ] = $row->$field;
}
$indexConds = [ $nextCond ];
$indexConds = [ $dbr->buildComparison( '>', $conds ) ];
}
$this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );

View file

@ -488,14 +488,13 @@ class NamespaceDupes extends Maintenance {
$this->resolvableLinks -= $dbw->affectedRows();
}
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
$encLastTitle = $dbw->addQuotes( $row->$titleField );
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
$encLastFrom = $dbw->addQuotes( $row->$fromField );
$batchConds = [
"$titleField > $encLastTitle " .
"OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)"
$dbw->buildComparison( '>', [
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
$titleField => $dbw->addQuotes( $row->$titleField ),
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
$fromField => $dbw->addQuotes( $row->$fromField ),
] )
];
$lbFactory->waitForReplication();

View file

@ -271,31 +271,19 @@ TEXT
} else {
$fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ];
}
$first = true;
$cond = false;
$prefix = false;
$conds = [];
foreach ( $fields as $field ) {
if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) {
// Range conditions with enums are weird in mysql
// This must be a numeric literal, or it won't work.
$encValue = intval( $row->cl_type_numeric );
$value = intval( $row->cl_type_numeric );
} else {
$encValue = $dbw->addQuotes( $row->$field );
}
$inequality = "$field > $encValue";
$equality = "$field = $encValue";
if ( $first ) {
$cond = $inequality;
$prefix = $equality;
$first = false;
} else {
// @phan-suppress-next-line PhanTypeSuspiciousStringExpression False positive
$cond .= " OR ($prefix AND $inequality)";
$prefix .= " AND $equality";
$value = $row->$field;
}
$conds[ $field ] = $value;
}
return $cond;
return $dbw->buildComparison( '>', $conds );
}
/**

View file

@ -1,5 +1,7 @@
<?php
use Wikimedia\Rdbms\Platform\SQLPlatform;
/**
* Tests for BatchRowUpdate and its components
*
@ -145,7 +147,7 @@ class BatchRowUpdateTest extends MediaWikiIntegrationTestCase {
[
"With single primary key must generate id > 'value'",
// Expected second iteration
[ "( id_field > '3' )" ],
[ "id_field > '3'" ],
// Primary key(s)
'id_field',
],
@ -154,7 +156,7 @@ class BatchRowUpdateTest extends MediaWikiIntegrationTestCase {
'With multiple primary keys the first conditions ' .
'must use >= and the final condition must use >',
// Expected second iteration
[ "( id_field = '3' AND foo > '103' ) OR ( id_field > '3' )" ],
[ "id_field > '3' OR (id_field = '3' AND (foo > '103'))" ],
// Primary key(s)
[ 'id_field', 'foo' ],
],
@ -234,11 +236,17 @@ class BatchRowUpdateTest extends MediaWikiIntegrationTestCase {
protected function mockDb( $methods = [] ) {
// @TODO: mock from Database
// FIXME: the constructor normally sets mAtomicLevels and mSrvCache
// FIXME: the constructor normally sets mAtomicLevels and mSrvCache, and platform
$databaseMysql = $this->getMockBuilder( Wikimedia\Rdbms\DatabaseMysqli::class )
->disableOriginalConstructor()
->onlyMethods( array_merge( [ 'isOpen', 'getApproximateLagStatus' ], $methods ) )
->getMock();
$reflection = new ReflectionClass( $databaseMysql );
$reflectionProperty = $reflection->getProperty( 'platform' );
$reflectionProperty->setAccessible( true );
$reflectionProperty->setValue( $databaseMysql, new SQLPlatform( $databaseMysql ) );
$databaseMysql->method( 'isOpen' )
->willReturn( true );
$databaseMysql->method( 'getApproximateLagStatus' )

View file

@ -91,6 +91,69 @@ class SQLPlatformTest extends PHPUnit\Framework\TestCase {
];
}
/**
* @dataProvider provideBuildComparison
* @covers Wikimedia\Rdbms\Database::buildComparison
*/
public function testBuildComparison( string $op, array $conds, string $sqlText ) {
$this->assertEquals(
$sqlText,
$this->platform->buildComparison( $op, $conds )
);
}
public static function provideBuildComparison() {
return [
"Simple '>'" => [
'>',
[ 'a' => 1 ],
'a > 1',
],
"Simple '>='" => [
'>=',
[ 'a' => 1 ],
'a >= 1',
],
"Simple '<'" => [
'<',
[ 'a' => 1 ],
'a < 1',
],
"Simple '<='" => [
'<=',
[ 'a' => 1 ],
'a <= 1',
],
"Complex '>'" => [
'>',
[ 'a' => 1, 'b' => 2, 'c' => 3 ],
'a > 1 OR (a = 1 AND (b > 2 OR (b = 2 AND (c > 3))))',
],
"Complex '>='" => [
'>=',
[ 'a' => 1, 'b' => 2, 'c' => 3 ],
'a > 1 OR (a = 1 AND (b > 2 OR (b = 2 AND (c >= 3))))',
],
"Complex '<'" => [
'<',
[ 'a' => 1, 'b' => 2, 'c' => 3 ],
'a < 1 OR (a = 1 AND (b < 2 OR (b = 2 AND (c < 3))))',
],
"Complex '<='" => [
'<=',
[ 'a' => 1, 'b' => 2, 'c' => 3 ],
'a < 1 OR (a = 1 AND (b < 2 OR (b = 2 AND (c <= 3))))',
],
"Quoting: fields are SQL identifiers, values are values" => [
// Note that the quoting here doesn't match any real database because
// SQLPlatformTestHelper overrides it
'>',
[ '`quoted\'as"field' => '`quoted\'as"value' ],
'`quoted\'as"field > \'`quoted\\\'as"value\'',
],
];
}
/**
* @dataProvider provideBuildLike
* @covers Wikimedia\Rdbms\Database::buildLike