SearchHandler: add support for completion search

This defines a new REST route, /coredev/v0/search/title

Bug: T246387
Change-Id: Ib8d64f3028d3fa2ce239f922181064a3fc91a488
This commit is contained in:
daniel 2020-03-25 18:21:12 +01:00
parent a749000d39
commit 76b703fe5c
4 changed files with 237 additions and 24 deletions

View file

@ -2,16 +2,21 @@
namespace MediaWiki\Rest\Handler;
use InvalidArgumentException;
use ISearchResultSet;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\RequestInterface;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\ResponseFactory;
use MediaWiki\Rest\Router;
use RequestContext;
use SearchEngine;
use SearchEngineConfig;
use SearchEngineFactory;
use SearchResult;
use SearchSuggestion;
use Status;
use User;
use Wikimedia\Message\MessageValue;
@ -35,6 +40,26 @@ class SearchHandler extends Handler {
/** @var User */
private $user;
/**
* Search page body and titles.
*/
public const FULLTEXT_MODE = 'fulltext';
/**
* Search title completion matches.
*/
public const COMPLETION_MODE = 'completion';
/**
* Supported modes
*/
private const SUPPORTED_MODES = [ self::FULLTEXT_MODE, self::COMPLETION_MODE ];
/**
* @var string
*/
private $mode = null;
/** Limit results to 50 pages per default */
private const LIMIT = 50;
@ -62,6 +87,29 @@ class SearchHandler extends Handler {
$this->user = RequestContext::getMain()->getUser();
}
public function init(
Router $router,
RequestInterface $request,
array $config,
ResponseFactory $responseFactory
) {
parent::init(
$router,
$request,
$config,
$responseFactory
);
$this->mode = $config['mode'] ?? self::FULLTEXT_MODE;
if ( !in_array( $this->mode, self::SUPPORTED_MODES ) ) {
throw new InvalidArgumentException(
"Unsupported search mode `{$this->mode}` configured. Supported modes: " .
implode( ', ', self::SUPPORTED_MODES )
);
}
}
/**
* @return SearchEngine
*/
@ -84,7 +132,7 @@ class SearchHandler extends Handler {
* @return SearchResult[]
* @throws LocalizedHttpException
*/
private function getResultsOrThrow( $results ) {
private function getSearchResultsOrThrow( $results ) {
if ( $results ) {
if ( $results instanceof Status ) {
$status = $results;
@ -109,7 +157,8 @@ class SearchHandler extends Handler {
}
/**
* Execute search on both title and text fields
* Execute search and return results.
*
* @param SearchEngine $searchEngine
* @return SearchResult[]
* @throws LocalizedHttpException
@ -117,34 +166,72 @@ class SearchHandler extends Handler {
private function doSearch( $searchEngine ) {
$query = $this->getValidatedParams()['q'];
$titleSearch = $searchEngine->searchTitle( $query );
$textSearch = $searchEngine->searchText( $query );
if ( $this->mode == self::COMPLETION_MODE ) {
$completionSearch = $searchEngine->completionSearchWithVariants( $query );
return $this->buildOutputFromSuggestions( $completionSearch->getSuggestions() );
} else {
$titleSearch = $searchEngine->searchTitle( $query );
$textSearch = $searchEngine->searchText( $query );
$titleSearchResults = $this->getResultsOrThrow( $titleSearch );
$textSearchResults = $this->getResultsOrThrow( $textSearch );
$titleSearchResults = $this->getSearchResultsOrThrow( $titleSearch );
$textSearchResults = $this->getSearchResultsOrThrow( $textSearch );
$mergedResults = array_merge( $titleSearchResults, $textSearchResults );
return $mergedResults;
$mergedResults = array_merge( $titleSearchResults, $textSearchResults );
return $this->buildOutputFromSearchResults( $mergedResults );
}
}
/**
* Remove duplicate pages and turn results into response json objects
* @param SearchResult[] $textAndTitleResults
*
* @param SearchSuggestion[] $suggestions
*
* @return array page objects
*/
private function parseResults( $textAndTitleResults ) {
private function buildOutputFromSuggestions( array $suggestions ) {
$pages = [];
$foundPageIds = [];
foreach ( $textAndTitleResults as $result ) {
foreach ( $suggestions as $suggestion ) {
$title = $suggestion->getSuggestedTitle();
if ( $title && $title->exists() ) {
$pageID = $title->getArticleID();
if ( !isset( $foundPageIds[$pageID] ) &&
$this->permissionManager->quickUserCan( 'read', $this->user, $title )
) {
$page = [
'id' => $pageID,
'key' => $title->getPrefixedDBkey(),
'title' => $title->getPrefixedText(),
'excerpt' => $suggestion->getText() ?: null,
];
$pages[] = $page;
$foundPageIds[$pageID] = true;
}
}
}
return $pages;
}
/**
* Remove duplicate pages and turn results into response json objects
*
* @param SearchResult[] $searchResults
*
* @return array page objects
*/
private function buildOutputFromSearchResults( array $searchResults ) {
$pages = [];
$foundPageIds = [];
foreach ( $searchResults as $result ) {
if ( !$result->isBrokenTitle() && !$result->isMissingRevision() ) {
$title = $result->getTitle();
$pageID = $title->getArticleID();
if ( !isset( $foundPageIds[$pageID] ) &&
$this->permissionManager->userCan( 'read', $this->user, $title )
$this->permissionManager->quickUserCan( 'read', $this->user, $title )
) {
$page = [
'id' => $pageID,
'key' => $title->getPrefixedDbKey(),
'key' => $title->getPrefixedDBkey(),
'title' => $title->getPrefixedText(),
'excerpt' => $result->getTextSnippet() ?: null,
];
@ -163,8 +250,7 @@ class SearchHandler extends Handler {
public function execute() {
$searchEngine = $this->createSearchEngine();
$results = $this->doSearch( $searchEngine );
$pages = $this->parseResults( $results );
return $this->getResponseFactory()->createJson( [ 'pages' => $pages ] );
return $this->getResponseFactory()->createJson( [ 'pages' => $results ] );
}
public function getParamSettings() {

View file

@ -6,7 +6,18 @@
"PermissionManager",
"SearchEngineFactory",
"SearchEngineConfig"
]
],
"mode": "fulltext"
},
{
"path": "/coredev/v0/search/title",
"class": "MediaWiki\\Rest\\Handler\\SearchHandler",
"services": [
"PermissionManager",
"SearchEngineFactory",
"SearchEngineConfig"
],
"mode": "completion"
},
{
"path": "/coredev/v0/page/{title}/links/language",

View file

@ -2,8 +2,9 @@ const { action, assert, REST, utils } = require( 'api-testing' );
describe( 'Search', () => {
const client = new REST( 'rest.php/coredev/v0' );
const pageWithBothTerms = utils.title( 'XXX' );
const pageWithOneTerm = utils.title( 'YYY' );
const sharedTitleTerm = 'X' + utils.uniq(); // NOTE: must start with upper-case letter
const pageWithBothTerms = utils.title( `${sharedTitleTerm}XXX` );
const pageWithOneTerm = utils.title( `${sharedTitleTerm}YYY` );
const pageWithOwnTitle = utils.title( 'ZZZ' );
const searchTerm = utils.uniq();
const searchTerm2 = utils.uniq();
@ -57,7 +58,8 @@ describe( 'Search', () => {
const { body } = await client.get( `/search/page?q=${searchTerm2}` );
assert.lengthOf( body.pages, 2 );
const returnedPages = [ body.pages[ 0 ].title, body.pages[ 1 ].title ];
assert.sameMembers( returnedPages, [ pageWithBothTerms, pageWithOneTerm ] );
const expectedPages = [ pageWithBothTerms, pageWithOneTerm ];
assert.equal( expectedPages.sort().join( '|' ), returnedPages.sort().join( '|' ) );
} );
it( 'should return only one page when two pages match but limit is 1', async () => {
const { body } = await client.get( `/search/page?q=${searchTerm2}&limit=1` );
@ -76,4 +78,48 @@ describe( 'Search', () => {
assert.lengthOf( body.pages, 0 );
} );
} );
describe( 'GET /search/title?q={term}', () => {
it( 'should return empty array when search term has no title matches', async () => {
const nonExistentTerm = utils.uniq();
const { body } = await client.get( `/search/title?q=${nonExistentTerm}` );
assert.lengthOf( body.pages, 0 );
} );
it( 'should not return pages when there is only a text match', async () => {
const { body } = await client.get( `/search/title?q=${searchTerm}` );
assert.lengthOf( body.pages, 0 );
} );
it( 'should return array of pages when there is a title match', async () => {
const { body } = await client.get( `/search/title?q=${pageWithBothTerms}` );
assert.lengthOf( body.pages, 1 );
const returnPage = body.pages[ 0 ];
assert.nestedProperty( returnPage, 'title' );
assert.nestedProperty( returnPage, 'id' );
assert.nestedProperty( returnPage, 'key' );
assert.nestedProperty( returnPage, 'excerpt' );
} );
it( 'should return two pages when both pages match', async () => {
const { body } = await client.get( `/search/title?q=${sharedTitleTerm}` );
assert.lengthOf( body.pages, 2 );
const returnedPages = [ body.pages[ 0 ].title, body.pages[ 1 ].title ];
const expectedPages = [ pageWithBothTerms, pageWithOneTerm ];
assert.equal( expectedPages.sort().join( '|' ), returnedPages.sort().join( '|' ) );
} );
it( 'should return only one page when two pages match but limit is 1', async () => {
const { body } = await client.get( `/search/title?q=${sharedTitleTerm}&limit=1` );
assert.lengthOf( body.pages, 1 );
} );
it( 'should not return deleted page', async () => {
const deleteTerm = utils.uniq();
const pageToDelete = utils.title( `${deleteTerm}_test_` );
const { title } = await alice.edit( pageToDelete, { text: deleteTerm } );
await mindy.action( 'delete', {
title,
summary: 'testing',
token: await mindy.token( 'csrf' )
}, 'POST' );
const { body } = await client.get( `/search/title?q=${deleteTerm}` );
assert.lengthOf( body.pages, 0 );
} );
} );
} );

View file

@ -14,6 +14,9 @@ use PHPUnit\Framework\MockObject\MockObject;
use SearchEngine;
use SearchEngineFactory;
use SearchResult;
use SearchResultSet;
use SearchSuggestion;
use SearchSuggestionSet;
use Status;
use User;
use Wikimedia\Message\MessageValue;
@ -30,10 +33,19 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
*/
private $searchEngine = null;
/**
* @param $query
* @param SearchResultSet|Status $titleResult
* @param SearchResultSet|Status $textResult
* @param SearchSuggestionSet|null $completionResult
*
* @return SearchHandler
*/
private function newHandler(
$query,
$titleResult,
$textResult
$textResult,
$completionResult = null
) {
$config = new HashConfig( [
'SearchType' => 'test',
@ -41,14 +53,15 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
'NamespacesToBeSearchedDefault' => [ NS_MAIN => true ],
] );
/** @var Language|MockObject $language */
$language = $this->createNoOpMock( Language::class );
$searchEngineConfig = new \SearchEngineConfig( $config, $language );
/** @var PermissionManager|MockObject $permissionManager */
$permissionManager = $this->createNoOpMock(
PermissionManager::class, [ 'userCan' ]
PermissionManager::class, [ 'quickUserCan' ]
);
$permissionManager->method( 'userCan' )
$permissionManager->method( 'quickUserCan' )
->willReturnCallback( function ( $action, User $user, LinkTarget $page ) {
return !preg_match( '/Forbidden/', $page->getText() );
} );
@ -62,6 +75,12 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
->with( $query )
->willReturn( $textResult );
if ( $completionResult ) {
$this->searchEngine->method( 'completionSearchWithVariants' )
->with( $query )
->willReturn( $completionResult );
}
/** @var SearchEngineFactory|MockObject $searchEngineFactory */
$searchEngineFactory = $this->createNoOpMock( SearchEngineFactory::class, [ 'create' ] );
$searchEngineFactory->method( 'create' )
@ -75,6 +94,11 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
}
/**
* @param string $pageName
* @param string $textSnippet
* @param bool $broken
* @param bool $missing
*
* @return SearchResult
*/
private function makeMockSearchResult(
@ -97,7 +121,27 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
return $result;
}
public function testExecute() {
/**
* @param string $pageName
*
* @return SearchSuggestion
*/
private function makeMockSearchSuggestion( $pageName ) {
$title = $this->makeMockTitle( $pageName );
/** @var SearchSuggestion|MockObject $suggestion */
$suggestion = $this->createNoOpMock(
SearchSuggestion::class,
[ 'getSuggestedTitle', 'getSuggestedTitleID', 'getText' ]
);
$suggestion->method( 'getSuggestedTitle' )->willReturn( $title );
$suggestion->method( 'getSuggestedTitleID' )->willReturn( $title->getArticleID() );
$suggestion->method( 'getText' )->willReturn( $title->getPrefixedText() );
return $suggestion;
}
public function testExecuteFulltextSearch() {
$titleResults = new MockSearchResultSet( [
$this->makeMockSearchResult( 'Foo', 'one' ),
$this->makeMockSearchResult( 'Forbidden Foo', 'two' ), // forbidden
@ -115,7 +159,8 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
$request = new RequestData( [ 'queryParams' => [ 'q' => $query ] ] );
$handler = $this->newHandler( $query, $titleResults, $textResults );
$data = $this->executeHandlerAndGetBodyData( $handler, $request );
$config = [ 'mode' => SearchHandler::FULLTEXT_MODE ];
$data = $this->executeHandlerAndGetBodyData( $handler, $request, $config );
$this->assertArrayHasKey( 'pages', $data );
$this->assertCount( 4, $data['pages'] );
@ -129,6 +174,31 @@ class SearchHandlerTest extends \MediaWikiUnitTestCase {
$this->assertSame( 'three', $data['pages'][3]['excerpt'] );
}
public function testExecuteCompletionSearch() {
$titleResults = new MockSearchResultSet( [] );
$textResults = new MockSearchResultSet( [
$this->makeMockSearchResult( 'Quux', 'one' ),
] );
$completionResults = new SearchSuggestionSet( [
$this->makeMockSearchSuggestion( 'Frob' ),
$this->makeMockSearchSuggestion( 'Frobnitz' ),
] );
$query = 'foo';
$request = new RequestData( [ 'queryParams' => [ 'q' => $query ] ] );
$handler = $this->newHandler( $query, $titleResults, $textResults, $completionResults );
$config = [ 'mode' => SearchHandler::COMPLETION_MODE ];
$data = $this->executeHandlerAndGetBodyData( $handler, $request, $config );
$this->assertArrayHasKey( 'pages', $data );
$this->assertCount( 2, $data['pages'] );
$this->assertSame( 'Frob', $data['pages'][0]['title'] );
$this->assertSame( 'Frob', $data['pages'][0]['excerpt'] );
$this->assertSame( 'Frobnitz', $data['pages'][1]['title'] );
$this->assertSame( 'Frobnitz', $data['pages'][1]['excerpt'] );
}
public function testExecute_limit() {
$titleResults = new MockSearchResultSet( [
$this->makeMockSearchResult( 'Foo' ),