wiki.techinc.nl/includes/api/ApiQuerySearch.php

247 lines
7.3 KiB
PHP
Raw Normal View History

<?php
/**
* Created on July 30, 2007
*
* API for MediaWiki 1.8+
*
* Copyright © 2007 Yuri Astrakhan <Firstname><Lastname>@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
if ( !defined( 'MEDIAWIKI' ) ) {
// Eclipse helper - will be ignored in production
require_once( 'ApiQueryBase.php' );
}
/**
* Query module to perform full text search within wiki titles and content
*
* @ingroup API
*/
class ApiQuerySearch extends ApiQueryGeneratorBase {
public function __construct( $query, $moduleName ) {
parent::__construct( $query, $moduleName, 'sr' );
}
public function execute() {
$this->run();
}
public function executeGenerator( $resultPageSet ) {
$this->run( $resultPageSet );
}
private function run( $resultPageSet = null ) {
global $wgContLang;
$params = $this->extractRequestParams();
// Extract parameters
$limit = $params['limit'];
$query = $params['search'];
$what = $params['what'];
$searchInfo = array_flip( $params['info'] );
$prop = array_flip( $params['prop'] );
if ( strval( $query ) === '' ) {
$this->dieUsage( 'empty search string is not allowed', 'param-search' );
}
// Create search engine instance and set options
$search = SearchEngine::create();
$search->setLimitOffset( $limit + 1, $params['offset'] );
$search->setNamespaces( $params['namespace'] );
$search->showRedirects = $params['redirects'];
// Perform the actual search
if ( $what == 'text' ) {
$matches = $search->searchText( $query );
} elseif ( $what == 'title' ) {
$matches = $search->searchTitle( $query );
} else {
// We default to title searches; this is a terrible legacy
// of the way we initially set up the MySQL fulltext-based
// search engine with separate title and text fields.
// In the future, the default should be for a combined index.
$what = 'title';
$matches = $search->searchTitle( $query );
// Not all search engines support a separate title search,
// for instance the Lucene-based engine we use on Wikipedia.
// In this case, fall back to full-text search (which will
// include titles in it!)
if ( is_null( $matches ) ) {
$what = 'text';
$matches = $search->searchText( $query );
}
}
if ( is_null( $matches ) ) {
$this->dieUsage( "{$what} search is disabled", "search-{$what}-disabled" );
}
// Add search meta data to result
if ( isset( $searchInfo['totalhits'] ) ) {
$totalhits = $matches->getTotalHits();
if ( $totalhits !== null ) {
$this->getResult()->addValue( array( 'query', 'searchinfo' ),
'totalhits', $totalhits );
}
* Added fields to list=search output: size, wordcount, timestamp, snippet * Where supported by backend, list=search adds a 'searchinfo' element with optional info: 'totalhits' count and 'suggestion' alternate query term Snippets added to result items earlier by Roan; extended this with the other byte size, word count, and timestamp available on the result items and exposed through the regular UI. Had to work out a backwards-compatible method for the search meta-information with Roan; added a second 'searchinfo' element since adding attributes to 'search' would break compatibility for JSON output (despite being safe in XML). 'searchinfo' is present only if the backend supports the extra info and has something available; 'totalhits' with a total hit count and 'suggestion' for an alternate query suggestion (exposed as "Did you mean X?" link in UI). Note that total hit counts can be enabled for MySQL backend now by setting the experimental option $wgSearchMySQLTotalHits, but did-you-mean suggestions are not yet supported and need to be tested with a hack or another backend. Sample XML and JSON output with the new searchinfo items (which can be present whether or not there are any result items): <?xml version="1.0"?> <api> <query> <searchinfo totalhits="0" suggestion="joe momma" /> <search /> </query> </api> { "query": { "searchinfo": { "totalhits": 0, "suggestion": "joe momma" }, "search": [ ] } } The suggestion value is suitable for plugging back in as a search term, if present.
2009-07-28 21:13:48 +00:00
}
if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
$this->getResult()->addValue( array( 'query', 'searchinfo' ),
'suggestion', $matches->getSuggestionQuery() );
* Added fields to list=search output: size, wordcount, timestamp, snippet * Where supported by backend, list=search adds a 'searchinfo' element with optional info: 'totalhits' count and 'suggestion' alternate query term Snippets added to result items earlier by Roan; extended this with the other byte size, word count, and timestamp available on the result items and exposed through the regular UI. Had to work out a backwards-compatible method for the search meta-information with Roan; added a second 'searchinfo' element since adding attributes to 'search' would break compatibility for JSON output (despite being safe in XML). 'searchinfo' is present only if the backend supports the extra info and has something available; 'totalhits' with a total hit count and 'suggestion' for an alternate query suggestion (exposed as "Did you mean X?" link in UI). Note that total hit counts can be enabled for MySQL backend now by setting the experimental option $wgSearchMySQLTotalHits, but did-you-mean suggestions are not yet supported and need to be tested with a hack or another backend. Sample XML and JSON output with the new searchinfo items (which can be present whether or not there are any result items): <?xml version="1.0"?> <api> <query> <searchinfo totalhits="0" suggestion="joe momma" /> <search /> </query> </api> { "query": { "searchinfo": { "totalhits": 0, "suggestion": "joe momma" }, "search": [ ] } } The suggestion value is suitable for plugging back in as a search term, if present.
2009-07-28 21:13:48 +00:00
}
// Add the search results to the result
$terms = $wgContLang->convertForSearchResult( $matches->termMatches() );
$titles = array();
$count = 0;
while ( $result = $matches->next() ) {
if ( ++ $count > $limit ) {
// We've reached the one extra which shows that there are additional items to be had. Stop here...
$this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
break;
}
// Silently skip broken and missing titles
if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
continue;
}
$title = $result->getTitle();
if ( is_null( $resultPageSet ) ) {
$vals = array();
ApiQueryBase::addTitleInfo( $vals, $title );
if ( isset( $prop['snippet'] ) ) {
$vals['snippet'] = $result->getTextSnippet( $terms );
}
if ( isset( $prop['size'] ) ) {
$vals['size'] = $result->getByteSize();
}
if ( isset( $prop['wordcount'] ) ) {
$vals['wordcount'] = $result->getWordCount();
}
if ( isset( $prop['timestamp'] ) ) {
$vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
}
// Add item to results and see whether it fits
$fit = $this->getResult()->addValue( array( 'query', $this->getModuleName() ),
null, $vals );
if ( !$fit ) {
$this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
* API: BREAKING CHANGE: (bug 11430) Return fewer results than the limit in some cases to prevent running out of memory * This means queries could possibly return fewer results than the limit and still set a query-continue * Add iicontinue, rvcontinue, cicontinue, incontinue, amfrom to faciliate query-continue for these modules * Implemented by blocking additions to the ApiResult object if they would make it too large ** Important things like query-continue values and warnings are exempt from this check ** RSS feeds and exported XML are also exempted (size-checking them would be too messy) ** Result size is checked against $wgAPIMaxResultSize, which defaults to 8 MB For those who really care, per-file details follow: ApiResult.php: * Introduced ApiResult::$mSize which keeps track of the result size. * Introduced ApiResult::size() which calculates an array's size (which is the sum of the strlen()s of its elements). * ApiResult::addValue() now checks that the result size stays below $wgAPIMaxResultSize. If the item won't fit, it won't be added and addValue() will return false. Callers should check the return value and set a query-continue if it's false. * Closed the back door that is ApiResult::getData(): callers can't manipulate the data array directly anymore so they can't bypass the result size limit. * Added ApiResult::setIndexedTagName_internal() which will call setIndexedTagName() on an array already in the result. This is needed for the 'new' order of adding results, which means addValue()ing one result at a time until you hit the limit or run out, then calling this function to set the tag name. * Added ApiResult::disableSizeCheck() and enableSizeCheck() which disable and enable size checking in addValue(). This is used for stuff like query-continue elements and warnings which shouldn't count towards the result size. * Added ApiResult::unsetValue() which removes an element from the result and decreases $mSize. ApiBase.php: * Like ApiResult::getData(), ApiBase::getResultData() no longer returns a reference. * Use ApiResult::disableSizeCheck() in ApiBase::setWarning() ApiQueryBase.php: * Added ApiQueryBase::addPageSubItem(), which adds page subitems one item at a time. * addPageSubItem() and addPageSubItems() now return whether the subitem fit in the result. * Use ApiResult::disableSizeCheck() in setContinueEnumParameter() ApiMain.php: * Use ApiResult::disableSizeCheck() in ApiMain::substituteResultWithError() * Use getParameter() rather than $mRequest to obtain requestid DefaultSettings.php: * Added $wgAPIMaxResultSize, with a default value of 8 MB ApiQuery*.php: * Added results one at a time, and set a query-continue if the result is full. ApiQueryLangLinks.php and friends: * Migrated from addPageSubItems() to addPageSubItem(). This eliminates the need for $lastId. ApiQueryAllLinks.php, ApiQueryWatchlist.php, ApiQueryAllimages.php, ApiQuerySearch.php: * Renamed $data to something more appropriate ($pageids, $ids or $titles) ApiQuerySiteinfo.php: * Abuse siprop as a query-continue parameter and set it to all props that couldn't be processed. ApiQueryRandom.php: * Doesn't do continuations, because the result is supposed to be random. * Be smart enough to not run the second query if the results of the first didn't fit. ApiQueryImageInfo.php, ApiQueryRevisions.php, ApiQueryCategoryInfo.php, ApiQueryInfo.php: * Added continue parameter which basically skips the first so many items ApiQueryBacklinks.php: * Throw the result in a big array first and addValue() that one element at a time if necessary ** This is necessary because the results aren't retrieved in order * Introduced $this->pageMap to map namespace and title to page ID * Rewritten extractRowInfo() and extractRedirRowInfo() a little * Declared all private member variables explicitly ApiQueryDeletedrevs.php: * Use a pagemap just like in Backlinks * Introduce fake page IDs and keep track of them so we know where to add what ** This doesn't change the output format, because the fake page IDs start at 0 and are consecutive ApiQueryAllmessages.php: * Add amfrom to facilitate query-continue ApiQueryUsers.php: * Rewrite: put the getOtherUsersInfo() code in execute()
2009-02-05 14:30:59 +00:00
break;
}
} else {
* API: BREAKING CHANGE: (bug 11430) Return fewer results than the limit in some cases to prevent running out of memory * This means queries could possibly return fewer results than the limit and still set a query-continue * Add iicontinue, rvcontinue, cicontinue, incontinue, amfrom to faciliate query-continue for these modules * Implemented by blocking additions to the ApiResult object if they would make it too large ** Important things like query-continue values and warnings are exempt from this check ** RSS feeds and exported XML are also exempted (size-checking them would be too messy) ** Result size is checked against $wgAPIMaxResultSize, which defaults to 8 MB For those who really care, per-file details follow: ApiResult.php: * Introduced ApiResult::$mSize which keeps track of the result size. * Introduced ApiResult::size() which calculates an array's size (which is the sum of the strlen()s of its elements). * ApiResult::addValue() now checks that the result size stays below $wgAPIMaxResultSize. If the item won't fit, it won't be added and addValue() will return false. Callers should check the return value and set a query-continue if it's false. * Closed the back door that is ApiResult::getData(): callers can't manipulate the data array directly anymore so they can't bypass the result size limit. * Added ApiResult::setIndexedTagName_internal() which will call setIndexedTagName() on an array already in the result. This is needed for the 'new' order of adding results, which means addValue()ing one result at a time until you hit the limit or run out, then calling this function to set the tag name. * Added ApiResult::disableSizeCheck() and enableSizeCheck() which disable and enable size checking in addValue(). This is used for stuff like query-continue elements and warnings which shouldn't count towards the result size. * Added ApiResult::unsetValue() which removes an element from the result and decreases $mSize. ApiBase.php: * Like ApiResult::getData(), ApiBase::getResultData() no longer returns a reference. * Use ApiResult::disableSizeCheck() in ApiBase::setWarning() ApiQueryBase.php: * Added ApiQueryBase::addPageSubItem(), which adds page subitems one item at a time. * addPageSubItem() and addPageSubItems() now return whether the subitem fit in the result. * Use ApiResult::disableSizeCheck() in setContinueEnumParameter() ApiMain.php: * Use ApiResult::disableSizeCheck() in ApiMain::substituteResultWithError() * Use getParameter() rather than $mRequest to obtain requestid DefaultSettings.php: * Added $wgAPIMaxResultSize, with a default value of 8 MB ApiQuery*.php: * Added results one at a time, and set a query-continue if the result is full. ApiQueryLangLinks.php and friends: * Migrated from addPageSubItems() to addPageSubItem(). This eliminates the need for $lastId. ApiQueryAllLinks.php, ApiQueryWatchlist.php, ApiQueryAllimages.php, ApiQuerySearch.php: * Renamed $data to something more appropriate ($pageids, $ids or $titles) ApiQuerySiteinfo.php: * Abuse siprop as a query-continue parameter and set it to all props that couldn't be processed. ApiQueryRandom.php: * Doesn't do continuations, because the result is supposed to be random. * Be smart enough to not run the second query if the results of the first didn't fit. ApiQueryImageInfo.php, ApiQueryRevisions.php, ApiQueryCategoryInfo.php, ApiQueryInfo.php: * Added continue parameter which basically skips the first so many items ApiQueryBacklinks.php: * Throw the result in a big array first and addValue() that one element at a time if necessary ** This is necessary because the results aren't retrieved in order * Introduced $this->pageMap to map namespace and title to page ID * Rewritten extractRowInfo() and extractRedirRowInfo() a little * Declared all private member variables explicitly ApiQueryDeletedrevs.php: * Use a pagemap just like in Backlinks * Introduce fake page IDs and keep track of them so we know where to add what ** This doesn't change the output format, because the fake page IDs start at 0 and are consecutive ApiQueryAllmessages.php: * Add amfrom to facilitate query-continue ApiQueryUsers.php: * Rewrite: put the getOtherUsersInfo() code in execute()
2009-02-05 14:30:59 +00:00
$titles[] = $title;
}
}
if ( is_null( $resultPageSet ) ) {
$this->getResult()->setIndexedTagName_internal( array(
'query', $this->getModuleName()
), 'p' );
} else {
$resultPageSet->populateFromTitles( $titles );
}
}
public function getAllowedParams() {
return array(
'search' => null,
'namespace' => array(
ApiBase::PARAM_DFLT => 0,
ApiBase::PARAM_TYPE => 'namespace',
ApiBase::PARAM_ISMULTI => true,
),
'what' => array(
ApiBase::PARAM_DFLT => null,
ApiBase::PARAM_TYPE => array(
'title',
'text',
)
),
'info' => array(
ApiBase::PARAM_DFLT => 'totalhits|suggestion',
ApiBase::PARAM_TYPE => array(
'totalhits',
'suggestion',
),
ApiBase::PARAM_ISMULTI => true,
),
'prop' => array(
ApiBase::PARAM_DFLT => 'size|wordcount|timestamp|snippet',
ApiBase::PARAM_TYPE => array(
'size',
'wordcount',
'timestamp',
'snippet',
),
ApiBase::PARAM_ISMULTI => true,
),
'redirects' => false,
'offset' => 0,
'limit' => array(
ApiBase::PARAM_DFLT => 10,
ApiBase::PARAM_TYPE => 'limit',
ApiBase::PARAM_MIN => 1,
ApiBase::PARAM_MAX => ApiBase::LIMIT_SML1,
ApiBase::PARAM_MAX2 => ApiBase::LIMIT_SML2
)
);
}
public function getParamDescription() {
return array(
'search' => 'Search for all page titles (or content) that has this value.',
'namespace' => 'The namespace(s) to enumerate.',
'what' => 'Search inside the text or titles.',
'info' => 'What metadata to return.',
'prop' => 'What properties to return.',
'redirects' => 'Include redirect pages in the search.',
'offset' => 'Use this value to continue paging (return by query)',
'limit' => 'How many total pages to return.'
);
}
public function getDescription() {
return 'Perform a full text search';
}
public function getPossibleErrors() {
return array_merge( parent::getPossibleErrors(), array(
array( 'code' => 'param-search', 'info' => 'empty search string is not allowed' ),
array( 'code' => 'search-text-disabled', 'info' => 'text search is disabled' ),
array( 'code' => 'search-title-disabled', 'info' => 'title search is disabled' ),
) );
}
protected function getExamples() {
return array(
'api.php?action=query&list=search&srsearch=meaning',
'api.php?action=query&list=search&srwhat=text&srsearch=meaning',
'api.php?action=query&generator=search&gsrsearch=meaning&prop=info',
);
}
public function getVersion() {
2007-12-06 18:33:18 +00:00
return __CLASS__ . ': $Id$';
}
}