wiki.techinc.nl/includes/api/ApiQueryAllPages.php
Brad Jorsch c394cdc04d API: Avoid MySQL filesort with list=allpages&apfilterlanglinks=withlanglinks
I'm not sure whether r44584 didn't go far enough or if MySQL's behavior
has changed since 2008, but MySQL is now filesorting when a
constant-in-WHERE field is included in GROUP BY.

If all our supported databases used the 1999 SQL standard rules for
GROUP BY[1] this would be an easy fix. But PostgreSQL before 9.1 uses
the older 1992 rules.[2] And then there's Oracle and MSSQL, which aren't
listed as supported[1] but are still in the code. Simplest thing to do
is probably to check if we're on MySQL, Sqlite, or Postgres >= 9.1 and
use the 1999 rules, and otherwise use the older rules.

 [1]: Basically "any non-aggregate field in the SELECT must be
      functionally dependent on the grouped-by fields", meaning if you
      include the primary key you're good.
 [2]: Basically "any non-aggregate field in the SELECT must be in the
      GROUP BY".
 [3]: https://www.mediawiki.org/wiki/Manual:Installation_requirements#Database_server

Bug: T78276
Change-Id: I80b515bb06d194b146897155b318a3d1c908e8b6
2014-12-19 00:08:36 +00:00

331 lines
9.5 KiB
PHP

<?php
/**
*
*
* Created on Sep 25, 2006
*
* Copyright © 2006 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* Query module to enumerate all available pages.
*
* @ingroup API
*/
class ApiQueryAllPages extends ApiQueryGeneratorBase {
public function __construct( ApiQuery $query, $moduleName ) {
parent::__construct( $query, $moduleName, 'ap' );
}
public function execute() {
$this->run();
}
public function getCacheMode( $params ) {
return 'public';
}
/**
* @param ApiPageSet $resultPageSet
* @return void
*/
public function executeGenerator( $resultPageSet ) {
if ( $resultPageSet->isResolvingRedirects() ) {
$this->dieUsage(
'Use "gapfilterredir=nonredirects" option instead of "redirects" ' .
'when using allpages as a generator',
'params'
);
}
$this->run( $resultPageSet );
}
/**
* @param ApiPageSet $resultPageSet
* @return void
*/
private function run( $resultPageSet = null ) {
$db = $this->getDB();
$params = $this->extractRequestParams();
// Page filters
$this->addTables( 'page' );
if ( !is_null( $params['continue'] ) ) {
$cont = explode( '|', $params['continue'] );
$this->dieContinueUsageIf( count( $cont ) != 1 );
$op = $params['dir'] == 'descending' ? '<' : '>';
$cont_from = $db->addQuotes( $cont[0] );
$this->addWhere( "page_title $op= $cont_from" );
}
if ( $params['filterredir'] == 'redirects' ) {
$this->addWhereFld( 'page_is_redirect', 1 );
} elseif ( $params['filterredir'] == 'nonredirects' ) {
$this->addWhereFld( 'page_is_redirect', 0 );
}
$this->addWhereFld( 'page_namespace', $params['namespace'] );
$dir = ( $params['dir'] == 'descending' ? 'older' : 'newer' );
$from = ( $params['from'] === null
? null
: $this->titlePartToKey( $params['from'], $params['namespace'] ) );
$to = ( $params['to'] === null
? null
: $this->titlePartToKey( $params['to'], $params['namespace'] ) );
$this->addWhereRange( 'page_title', $dir, $from, $to );
if ( isset( $params['prefix'] ) ) {
$this->addWhere( 'page_title' . $db->buildLike(
$this->titlePartToKey( $params['prefix'], $params['namespace'] ),
$db->anyString() ) );
}
if ( is_null( $resultPageSet ) ) {
$selectFields = array(
'page_namespace',
'page_title',
'page_id'
);
} else {
$selectFields = $resultPageSet->getPageTableFields();
}
$this->addFields( $selectFields );
$forceNameTitleIndex = true;
if ( isset( $params['minsize'] ) ) {
$this->addWhere( 'page_len>=' . intval( $params['minsize'] ) );
$forceNameTitleIndex = false;
}
if ( isset( $params['maxsize'] ) ) {
$this->addWhere( 'page_len<=' . intval( $params['maxsize'] ) );
$forceNameTitleIndex = false;
}
// Page protection filtering
if ( count( $params['prtype'] ) || $params['prexpiry'] != 'all' ) {
$this->addTables( 'page_restrictions' );
$this->addWhere( 'page_id=pr_page' );
$this->addWhere( "pr_expiry > {$db->addQuotes( $db->timestamp() )} OR pr_expiry IS NULL" );
if ( count( $params['prtype'] ) ) {
$this->addWhereFld( 'pr_type', $params['prtype'] );
if ( isset( $params['prlevel'] ) ) {
// Remove the empty string and '*' from the prlevel array
$prlevel = array_diff( $params['prlevel'], array( '', '*' ) );
if ( count( $prlevel ) ) {
$this->addWhereFld( 'pr_level', $prlevel );
}
}
if ( $params['prfiltercascade'] == 'cascading' ) {
$this->addWhereFld( 'pr_cascade', 1 );
} elseif ( $params['prfiltercascade'] == 'noncascading' ) {
$this->addWhereFld( 'pr_cascade', 0 );
}
}
$forceNameTitleIndex = false;
if ( $params['prexpiry'] == 'indefinite' ) {
$this->addWhere( "pr_expiry = {$db->addQuotes( $db->getInfinity() )} OR pr_expiry IS NULL" );
} elseif ( $params['prexpiry'] == 'definite' ) {
$this->addWhere( "pr_expiry != {$db->addQuotes( $db->getInfinity() )}" );
}
$this->addOption( 'DISTINCT' );
} elseif ( isset( $params['prlevel'] ) ) {
$this->dieUsage( 'prlevel may not be used without prtype', 'params' );
}
if ( $params['filterlanglinks'] == 'withoutlanglinks' ) {
$this->addTables( 'langlinks' );
$this->addJoinConds( array( 'langlinks' => array( 'LEFT JOIN', 'page_id=ll_from' ) ) );
$this->addWhere( 'll_from IS NULL' );
$forceNameTitleIndex = false;
} elseif ( $params['filterlanglinks'] == 'withlanglinks' ) {
$this->addTables( 'langlinks' );
$this->addWhere( 'page_id=ll_from' );
$this->addOption( 'STRAIGHT_JOIN' );
// MySQL filesorts if we use a GROUP BY that works with the rules
// in the 1992 SQL standard (it doesn't like having the
// constant-in-WHERE page_namespace column in there). Using the
// 1999 rules works fine, but that breaks other DBs. Sigh.
/// @todo Once we drop support for 1992-rule DBs, we can simplify this.
$dbType = $db->getType();
if ( $dbType === 'mysql' || $dbType === 'sqlite' ||
$dbType === 'postgres' && $db->getServerVersion() >= 9.1
) {
// 1999 rules, or screw-the-rules
$this->addOption( 'GROUP BY', array( 'page_title', 'page_id' ) );
} else {
// 1992 rules
$this->addOption( 'GROUP BY', $selectFields );
}
$forceNameTitleIndex = false;
}
if ( $forceNameTitleIndex ) {
$this->addOption( 'USE INDEX', 'name_title' );
}
$limit = $params['limit'];
$this->addOption( 'LIMIT', $limit + 1 );
$res = $this->select( __METHOD__ );
//Get gender information
if ( MWNamespace::hasGenderDistinction( $params['namespace'] ) ) {
$users = array();
foreach ( $res as $row ) {
$users[] = $row->page_title;
}
GenderCache::singleton()->doQuery( $users, __METHOD__ );
$res->rewind(); //reset
}
$count = 0;
$result = $this->getResult();
foreach ( $res as $row ) {
if ( ++$count > $limit ) {
// We've reached the one extra which shows that there are
// additional pages to be had. Stop here...
$this->setContinueEnumParameter( 'continue', $row->page_title );
break;
}
if ( is_null( $resultPageSet ) ) {
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
$vals = array(
'pageid' => intval( $row->page_id ),
'ns' => intval( $title->getNamespace() ),
'title' => $title->getPrefixedText()
);
$fit = $result->addValue( array( 'query', $this->getModuleName() ), null, $vals );
if ( !$fit ) {
$this->setContinueEnumParameter( 'continue', $row->page_title );
break;
}
} else {
$resultPageSet->processDbRow( $row );
}
}
if ( is_null( $resultPageSet ) ) {
$result->setIndexedTagName_internal( array( 'query', $this->getModuleName() ), 'p' );
}
}
public function getAllowedParams() {
return array(
'from' => null,
'continue' => array(
ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
),
'to' => null,
'prefix' => null,
'namespace' => array(
ApiBase::PARAM_DFLT => NS_MAIN,
ApiBase::PARAM_TYPE => 'namespace',
),
'filterredir' => array(
ApiBase::PARAM_DFLT => 'all',
ApiBase::PARAM_TYPE => array(
'all',
'redirects',
'nonredirects'
)
),
'minsize' => array(
ApiBase::PARAM_TYPE => 'integer',
),
'maxsize' => array(
ApiBase::PARAM_TYPE => 'integer',
),
'prtype' => array(
ApiBase::PARAM_TYPE => Title::getFilteredRestrictionTypes( true ),
ApiBase::PARAM_ISMULTI => true
),
'prlevel' => array(
ApiBase::PARAM_TYPE => $this->getConfig()->get( 'RestrictionLevels' ),
ApiBase::PARAM_ISMULTI => true
),
'prfiltercascade' => array(
ApiBase::PARAM_DFLT => 'all',
ApiBase::PARAM_TYPE => array(
'cascading',
'noncascading',
'all'
),
),
'limit' => array(
ApiBase::PARAM_DFLT => 10,
ApiBase::PARAM_TYPE => 'limit',
ApiBase::PARAM_MIN => 1,
ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
),
'dir' => array(
ApiBase::PARAM_DFLT => 'ascending',
ApiBase::PARAM_TYPE => array(
'ascending',
'descending'
)
),
'filterlanglinks' => array(
ApiBase::PARAM_TYPE => array(
'withlanglinks',
'withoutlanglinks',
'all'
),
ApiBase::PARAM_DFLT => 'all'
),
'prexpiry' => array(
ApiBase::PARAM_TYPE => array(
'indefinite',
'definite',
'all'
),
ApiBase::PARAM_DFLT => 'all'
),
);
}
protected function getExamplesMessages() {
return array(
'action=query&list=allpages&apfrom=B'
=> 'apihelp-query+allpages-example-B',
'action=query&generator=allpages&gaplimit=4&gapfrom=T&prop=info'
=> 'apihelp-query+allpages-example-generator',
'action=query&generator=allpages&gaplimit=2&' .
'gapfilterredir=nonredirects&gapfrom=Re&prop=revisions&rvprop=content'
=> 'apihelp-query+allpages-example-generator-revisions',
);
}
public function getHelpUrls() {
return 'https://www.mediawiki.org/wiki/API:Allpages';
}
}