Partially revert I61dc536 that broke phrase search support. Fix phrase search by making explicit that there are two kind of legalSearchChars() usecases : - the chars allowed to be part of the search query (including special syntax chars such as " and *). Used by SearchDatabase::filter() to cleanup the whole query string (the default). - the chars allowed to be part of a search term (excluding special syntax chars) Used by search engine implementaions when parsing with a regex. For future reference: Originally this distinction was made "explicit" by calling directly SearchEngine::legalSearchChars() during the parsing stage. This was broken by Iaabc10c by enabling inheritance. This patch adds a new optional param to legalSearchChars to make this more explicit. Also remove the function I introduced in I61dc536 (I wrongly assumed that the disctinction made between legalSearchChars usecases was due to a difference in behavior between indexing and searching). Added more tests to prevent this from happening in the future. Bug: T167798 Change-Id: Ibdc796bb2881a2ed8194099d8c9f491980010f0f
313 lines
9.7 KiB
PHP
313 lines
9.7 KiB
PHP
<?php
|
||
|
||
/**
|
||
* @group Search
|
||
* @group Database
|
||
*
|
||
* @covers SearchEngine<extended>
|
||
* @note Coverage will only ever show one of on of the Search* classes
|
||
*/
|
||
class SearchEngineTest extends MediaWikiLangTestCase {
|
||
|
||
/**
|
||
* @var SearchEngine
|
||
*/
|
||
protected $search;
|
||
|
||
/**
|
||
* Checks for database type & version.
|
||
* Will skip current test if DB does not support search.
|
||
*/
|
||
protected function setUp() {
|
||
parent::setUp();
|
||
|
||
// Search tests require MySQL or SQLite with FTS
|
||
$dbType = $this->db->getType();
|
||
$dbSupported = ( $dbType === 'mysql' )
|
||
|| ( $dbType === 'sqlite' && $this->db->getFulltextSearchModule() == 'FTS3' );
|
||
|
||
if ( !$dbSupported ) {
|
||
$this->markTestSkipped( "MySQL or SQLite with FTS3 only" );
|
||
}
|
||
|
||
$searchType = SearchEngineFactory::getSearchEngineClass( $this->db );
|
||
$this->setMwGlobals( [
|
||
'wgSearchType' => $searchType
|
||
] );
|
||
|
||
$this->search = new $searchType( $this->db );
|
||
}
|
||
|
||
protected function tearDown() {
|
||
unset( $this->search );
|
||
|
||
parent::tearDown();
|
||
}
|
||
|
||
public function addDBDataOnce() {
|
||
if ( !$this->isWikitextNS( NS_MAIN ) ) {
|
||
// @todo cover the case of non-wikitext content in the main namespace
|
||
return;
|
||
}
|
||
|
||
// Reset the search type back to default - some extensions may have
|
||
// overridden it.
|
||
$this->setMwGlobals( [ 'wgSearchType' => null ] );
|
||
|
||
$this->insertPage( 'Not_Main_Page', 'This is not a main page' );
|
||
$this->insertPage(
|
||
'Talk:Not_Main_Page',
|
||
'This is not a talk page to the main page, see [[smithee]]'
|
||
);
|
||
$this->insertPage( 'Smithee', 'A smithee is one who smiths. See also [[Alan Smithee]]' );
|
||
$this->insertPage( 'Talk:Smithee', 'This article sucks.' );
|
||
$this->insertPage( 'Unrelated_page', 'Nothing in this page is about the S word.' );
|
||
$this->insertPage( 'Another_page', 'This page also is unrelated.' );
|
||
$this->insertPage( 'Help:Help', 'Help me!' );
|
||
$this->insertPage( 'Thppt', 'Blah blah' );
|
||
$this->insertPage( 'Alan_Smithee', 'yum' );
|
||
$this->insertPage( 'Pages', 'are\'food' );
|
||
$this->insertPage( 'HalfOneUp', 'AZ' );
|
||
$this->insertPage( 'FullOneUp', 'AZ' );
|
||
$this->insertPage( 'HalfTwoLow', 'az' );
|
||
$this->insertPage( 'FullTwoLow', 'az' );
|
||
$this->insertPage( 'HalfNumbers', '1234567890' );
|
||
$this->insertPage( 'FullNumbers', '1234567890' );
|
||
$this->insertPage( 'DomainName', 'example.com' );
|
||
}
|
||
|
||
protected function fetchIds( $results ) {
|
||
if ( !$this->isWikitextNS( NS_MAIN ) ) {
|
||
$this->markTestIncomplete( __CLASS__ . " does no yet support non-wikitext content "
|
||
. "in the main namespace" );
|
||
}
|
||
$this->assertTrue( is_object( $results ) );
|
||
|
||
$matches = [];
|
||
$row = $results->next();
|
||
while ( $row ) {
|
||
$matches[] = $row->getTitle()->getPrefixedText();
|
||
$row = $results->next();
|
||
}
|
||
$results->free();
|
||
# Search is not guaranteed to return results in a certain order;
|
||
# sort them numerically so we will compare simply that we received
|
||
# the expected matches.
|
||
sort( $matches );
|
||
|
||
return $matches;
|
||
}
|
||
|
||
public function testFullWidth() {
|
||
$this->assertEquals(
|
||
[ 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ],
|
||
$this->fetchIds( $this->search->searchText( 'AZ' ) ),
|
||
"Search for normalized from Half-width Upper" );
|
||
$this->assertEquals(
|
||
[ 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ],
|
||
$this->fetchIds( $this->search->searchText( 'az' ) ),
|
||
"Search for normalized from Half-width Lower" );
|
||
$this->assertEquals(
|
||
[ 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ],
|
||
$this->fetchIds( $this->search->searchText( 'AZ' ) ),
|
||
"Search for normalized from Full-width Upper" );
|
||
$this->assertEquals(
|
||
[ 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ],
|
||
$this->fetchIds( $this->search->searchText( 'az' ) ),
|
||
"Search for normalized from Full-width Lower" );
|
||
}
|
||
|
||
public function testTextSearch() {
|
||
$this->assertEquals(
|
||
[ 'Smithee' ],
|
||
$this->fetchIds( $this->search->searchText( 'smithee' ) ),
|
||
"Plain search" );
|
||
}
|
||
|
||
public function testWildcardSearch() {
|
||
$res = $this->search->searchText( 'smith*' );
|
||
$this->assertEquals(
|
||
[ 'Smithee' ],
|
||
$this->fetchIds( $res ),
|
||
"Search with wildcards" );
|
||
|
||
$res = $this->search->searchText( 'smithson*' );
|
||
$this->assertEquals(
|
||
[],
|
||
$this->fetchIds( $res ),
|
||
"Search with wildcards must not find unrelated articles" );
|
||
|
||
$res = $this->search->searchText( 'smith* smithee' );
|
||
$this->assertEquals(
|
||
[ 'Smithee' ],
|
||
$this->fetchIds( $res ),
|
||
"Search with wildcards can be combined with simple terms" );
|
||
|
||
$res = $this->search->searchText( 'smith* "one who smiths"' );
|
||
$this->assertEquals(
|
||
[ 'Smithee' ],
|
||
$this->fetchIds( $res ),
|
||
"Search with wildcards can be combined with phrase search" );
|
||
}
|
||
|
||
public function testPhraseSearch() {
|
||
$res = $this->search->searchText( '"smithee is one who smiths"' );
|
||
$this->assertEquals(
|
||
[ 'Smithee' ],
|
||
$this->fetchIds( $res ),
|
||
"Search a phrase" );
|
||
|
||
$res = $this->search->searchText( '"smithee is who smiths"' );
|
||
$this->assertEquals(
|
||
[],
|
||
$this->fetchIds( $res ),
|
||
"Phrase search is not sloppy, search terms must be adjacent" );
|
||
|
||
$res = $this->search->searchText( '"is smithee one who smiths"' );
|
||
$this->assertEquals(
|
||
[],
|
||
$this->fetchIds( $res ),
|
||
"Phrase search is ordered" );
|
||
}
|
||
|
||
public function testPhraseSearchHighlight() {
|
||
$phrase = "smithee is one who smiths";
|
||
$res = $this->search->searchText( "\"$phrase\"" );
|
||
$match = $res->next();
|
||
$snippet = "A <span class='searchmatch'>" . $phrase . "</span>";
|
||
$this->assertStringStartsWith( $snippet,
|
||
$match->getTextSnippet( $res->termMatches() ),
|
||
"Highlight a phrase search" );
|
||
}
|
||
|
||
public function testTextPowerSearch() {
|
||
$this->search->setNamespaces( [ 0, 1, 4 ] );
|
||
$this->assertEquals(
|
||
[
|
||
'Smithee',
|
||
'Talk:Not Main Page',
|
||
],
|
||
$this->fetchIds( $this->search->searchText( 'smithee' ) ),
|
||
"Power search" );
|
||
}
|
||
|
||
public function testTitleSearch() {
|
||
$this->assertEquals(
|
||
[
|
||
'Alan Smithee',
|
||
'Smithee',
|
||
],
|
||
$this->fetchIds( $this->search->searchTitle( 'smithee' ) ),
|
||
"Title search" );
|
||
}
|
||
|
||
public function testTextTitlePowerSearch() {
|
||
$this->search->setNamespaces( [ 0, 1, 4 ] );
|
||
$this->assertEquals(
|
||
[
|
||
'Alan Smithee',
|
||
'Smithee',
|
||
'Talk:Smithee',
|
||
],
|
||
$this->fetchIds( $this->search->searchTitle( 'smithee' ) ),
|
||
"Title power search" );
|
||
}
|
||
|
||
/**
|
||
* @covers SearchEngine::getSearchIndexFields
|
||
*/
|
||
public function testSearchIndexFields() {
|
||
/**
|
||
* @var $mockEngine SearchEngine
|
||
*/
|
||
$mockEngine = $this->getMockBuilder( 'SearchEngine' )
|
||
->setMethods( [ 'makeSearchFieldMapping' ] )->getMock();
|
||
|
||
$mockFieldBuilder = function ( $name, $type ) {
|
||
$mockField =
|
||
$this->getMockBuilder( 'SearchIndexFieldDefinition' )->setConstructorArgs( [
|
||
$name,
|
||
$type
|
||
] )->getMock();
|
||
|
||
$mockField->expects( $this->any() )->method( 'getMapping' )->willReturn( [
|
||
'testData' => 'test',
|
||
'name' => $name,
|
||
'type' => $type,
|
||
] );
|
||
|
||
$mockField->expects( $this->any() )
|
||
->method( 'merge' )
|
||
->willReturn( $mockField );
|
||
|
||
return $mockField;
|
||
};
|
||
|
||
$mockEngine->expects( $this->atLeastOnce() )
|
||
->method( 'makeSearchFieldMapping' )
|
||
->willReturnCallback( $mockFieldBuilder );
|
||
|
||
// Not using mock since PHPUnit mocks do not work properly with references in params
|
||
$this->setTemporaryHook( 'SearchIndexFields',
|
||
function ( &$fields, SearchEngine $engine ) use ( $mockFieldBuilder ) {
|
||
$fields['testField'] =
|
||
$mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT );
|
||
return true;
|
||
} );
|
||
|
||
$fields = $mockEngine->getSearchIndexFields();
|
||
$this->assertArrayHasKey( 'language', $fields );
|
||
$this->assertArrayHasKey( 'category', $fields );
|
||
$this->assertInstanceOf( 'SearchIndexField', $fields['testField'] );
|
||
|
||
$mapping = $fields['testField']->getMapping( $mockEngine );
|
||
$this->assertArrayHasKey( 'testData', $mapping );
|
||
$this->assertEquals( 'test', $mapping['testData'] );
|
||
}
|
||
|
||
public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) {
|
||
$fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT );
|
||
return true;
|
||
}
|
||
|
||
public function testAugmentorSearch() {
|
||
$this->search->setNamespaces( [ 0, 1, 4 ] );
|
||
$resultSet = $this->search->searchText( 'smithee' );
|
||
// Not using mock since PHPUnit mocks do not work properly with references in params
|
||
$this->mergeMwGlobalArrayValue( 'wgHooks',
|
||
[ 'SearchResultsAugment' => [ [ $this, 'addAugmentors' ] ] ] );
|
||
$this->search->augmentSearchResults( $resultSet );
|
||
for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) {
|
||
$id = $result->getTitle()->getArticleID();
|
||
$augmentData = "Result:$id:" . $result->getTitle()->getText();
|
||
$augmentData2 = "Result2:$id:" . $result->getTitle()->getText();
|
||
$this->assertEquals( [ 'testSet' => $augmentData, 'testRow' => $augmentData2 ],
|
||
$result->getExtensionData() );
|
||
}
|
||
}
|
||
|
||
public function addAugmentors( &$setAugmentors, &$rowAugmentors ) {
|
||
$setAugmentor = $this->createMock( 'ResultSetAugmentor' );
|
||
$setAugmentor->expects( $this->once() )
|
||
->method( 'augmentAll' )
|
||
->willReturnCallback( function ( SearchResultSet $resultSet ) {
|
||
$data = [];
|
||
for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) {
|
||
$id = $result->getTitle()->getArticleID();
|
||
$data[$id] = "Result:$id:" . $result->getTitle()->getText();
|
||
}
|
||
$resultSet->rewind();
|
||
return $data;
|
||
} );
|
||
$setAugmentors['testSet'] = $setAugmentor;
|
||
|
||
$rowAugmentor = $this->createMock( 'ResultAugmentor' );
|
||
$rowAugmentor->expects( $this->exactly( 2 ) )
|
||
->method( 'augment' )
|
||
->willReturnCallback( function ( SearchResult $result ) {
|
||
$id = $result->getTitle()->getArticleID();
|
||
return "Result2:$id:" . $result->getTitle()->getText();
|
||
} );
|
||
$rowAugmentors['testRow'] = $rowAugmentor;
|
||
}
|
||
}
|