Silently drop unknown titles in completion search
This mimics how full text works by silenty dropping results returned from search that no longer exist. This could be because the search index is slightly out of sync with reality, or the search engine could simply be broken. Only silent from the users perspective. We maintain a count in statsd of the number of titles dropped. This can be monitored over time to recognize any increases. Bug: T115756 Change-Id: I2f29d73e258cd448a14d35a2b4902a4fb6f61c68
This commit is contained in:
parent
2a43939ffb
commit
83bae78c3a
7 changed files with 109 additions and 31 deletions
|
|
@ -675,13 +675,20 @@ abstract class SearchEngine {
|
|||
|
||||
$search = trim( $search );
|
||||
// preload the titles with LinkBatch
|
||||
$titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
|
||||
$lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
|
||||
return $sugg->getSuggestedTitle();
|
||||
} );
|
||||
$lb = new LinkBatch( $titles );
|
||||
} ) );
|
||||
$lb->setCaller( __METHOD__ );
|
||||
$lb->execute();
|
||||
|
||||
$diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
|
||||
return $sugg->getSuggestedTitle()->isKnown();
|
||||
} );
|
||||
if ( $diff > 0 ) {
|
||||
MediaWikiServices::getInstance()->getStatsdDataFactory()
|
||||
->updateCount( 'search.completion.missing', $diff );
|
||||
}
|
||||
|
||||
$results = $suggestions->map( function ( SearchSuggestion $sugg ) {
|
||||
return $sugg->getSuggestedTitle()->getPrefixedText();
|
||||
} );
|
||||
|
|
|
|||
|
|
@ -87,6 +87,18 @@ class SearchSuggestionSet {
|
|||
return array_map( $callback, $this->suggestions );
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the suggestions array
|
||||
* @param callback $callback Callable accepting single SearchSuggestion
|
||||
* instance returning bool false to remove the item.
|
||||
* @return int The number of suggestions removed
|
||||
*/
|
||||
public function filter( $callback ) {
|
||||
$before = count( $this->suggestions );
|
||||
$this->suggestions = array_values( array_filter( $this->suggestions, $callback ) );
|
||||
return $before - count( $this->suggestions );
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new suggestion at the end.
|
||||
* If the score of the new suggestion is greater than the worst one,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,23 @@
|
|||
* @covers ApiQueryPrefixSearch
|
||||
*/
|
||||
class ApiQueryPrefixSearchTest extends ApiTestCase {
|
||||
const TEST_QUERY = 'unittest';
|
||||
|
||||
public function setUp() {
|
||||
parent::setUp();
|
||||
$this->setMwGlobals( [
|
||||
'wgSearchType' => MockCompletionSearchEngine::class,
|
||||
] );
|
||||
MockCompletionSearchEngine::clearMockResults();
|
||||
$results = [];
|
||||
foreach ( range( 0, 10 ) as $i ) {
|
||||
$title = "Search_Result_$i";
|
||||
$results[] = $title;
|
||||
$this->editPage( $title, 'hi there' );
|
||||
}
|
||||
MockCompletionSearchEngine::addMockResults( self::TEST_QUERY, $results );
|
||||
}
|
||||
|
||||
public function offsetContinueProvider() {
|
||||
return [
|
||||
'no offset' => [ 2, 2, 0, 2 ],
|
||||
|
|
@ -20,11 +37,10 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
|
|||
* @dataProvider offsetContinueProvider
|
||||
*/
|
||||
public function testOffsetContinue( $expectedOffset, $expectedResults, $offset, $limit ) {
|
||||
$this->registerMockSearchEngine();
|
||||
$response = $this->doApiRequest( [
|
||||
'action' => 'query',
|
||||
'list' => 'prefixsearch',
|
||||
'pssearch' => 'example query terms',
|
||||
'pssearch' => self::TEST_QUERY,
|
||||
'psoffset' => $offset,
|
||||
'pslimit' => $limit,
|
||||
] );
|
||||
|
|
@ -39,10 +55,4 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
|
|||
$this->assertEquals( $expectedOffset, $result['continue']['psoffset'] );
|
||||
}
|
||||
}
|
||||
|
||||
private function registerMockSearchEngine() {
|
||||
$this->setMwGlobals( [
|
||||
'wgSearchType' => MockCompletionSearchEngine::class,
|
||||
] );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,9 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
|
|||
$this->insertPage( 'Talk:Example' );
|
||||
|
||||
$this->insertPage( 'User:Example' );
|
||||
$this->insertPage( 'Barcelona' );
|
||||
$this->insertPage( 'Barbara' );
|
||||
$this->insertPage( 'External' );
|
||||
}
|
||||
|
||||
protected function setUp() {
|
||||
|
|
@ -238,7 +241,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
|
|||
],
|
||||
] ],
|
||||
[ [
|
||||
'Exact match not on top (T72958)',
|
||||
'Exact match not in first result should be moved to the first result (T72958)',
|
||||
'provision' => [
|
||||
'Barcelona',
|
||||
'Bar',
|
||||
|
|
@ -252,7 +255,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
|
|||
],
|
||||
] ],
|
||||
[ [
|
||||
'Exact match missing (T72958)',
|
||||
'Exact match missing from results should be added as first result (T72958)',
|
||||
'provision' => [
|
||||
'Barcelona',
|
||||
'Barbara',
|
||||
|
|
@ -266,7 +269,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
|
|||
],
|
||||
] ],
|
||||
[ [
|
||||
'Exact match missing and not existing',
|
||||
'Exact match missing and not existing pages should be dropped',
|
||||
'provision' => [
|
||||
'Exile',
|
||||
'Exist',
|
||||
|
|
@ -274,8 +277,6 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
|
|||
],
|
||||
'query' => 'Ex',
|
||||
'results' => [
|
||||
'Exile',
|
||||
'Exist',
|
||||
'External',
|
||||
],
|
||||
] ],
|
||||
|
|
|
|||
|
|
@ -307,4 +307,37 @@ class SearchEngineTest extends MediaWikiLangTestCase {
|
|||
} );
|
||||
$rowAugmentors['testRow'] = $rowAugmentor;
|
||||
}
|
||||
|
||||
public function testFiltersMissing() {
|
||||
$availableResults = [];
|
||||
foreach ( range( 0, 11 ) as $i ) {
|
||||
$title = "Search_Result_$i";
|
||||
$availableResults[] = $title;
|
||||
// pages not created must be filtered
|
||||
if ( $i % 2 == 0 ) {
|
||||
$this->editPage( $title );
|
||||
}
|
||||
}
|
||||
MockCompletionSearchEngine::addMockResults( 'foo', $availableResults );
|
||||
|
||||
$engine = new MockCompletionSearchEngine();
|
||||
$engine->setLimitOffset( 10, 0 );
|
||||
$results = $engine->completionSearch( 'foo' );
|
||||
$this->assertEquals( 5, $results->getSize() );
|
||||
$this->assertTrue( $results->hasMoreResults() );
|
||||
|
||||
$engine->setLimitOffset( 10, 10 );
|
||||
$results = $engine->completionSearch( 'foo' );
|
||||
$this->assertEquals( 1, $results->getSize() );
|
||||
$this->assertFalse( $results->hasMoreResults() );
|
||||
}
|
||||
|
||||
private function editPage( $title ) {
|
||||
$page = WikiPage::factory( Title::newFromText( $title ) );
|
||||
$page->doEditContent(
|
||||
new WikitextContent( 'UTContent' ),
|
||||
'UTPageSummary',
|
||||
EDIT_NEW | EDIT_SUPPRESS_RC
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,27 +1,42 @@
|
|||
<?php
|
||||
|
||||
use MediaWiki\MediaWikiServices;
|
||||
|
||||
/**
|
||||
* SearchEngine implementation for returning mocked completion search results.
|
||||
*/
|
||||
class MockCompletionSearchEngine extends SearchEngine {
|
||||
private static $completionSearchResult = [];
|
||||
/** @var string[][] */
|
||||
private static $results = [];
|
||||
|
||||
/**
|
||||
* Reset any mocked results
|
||||
*/
|
||||
public static function clearMockResults() {
|
||||
self::$results = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows returning arbitrary lists of titles for completion search.
|
||||
* Provided results will be sliced based on offset/limit of query.
|
||||
*
|
||||
* For results to exit the search engine they must pass Title::isKnown.
|
||||
* Injecting into link cache is not enough, as LinkBatch will mark them
|
||||
* bad, they need to be injected into the DB.
|
||||
*
|
||||
* @param string $query Search term as seen in completionSearchBackend
|
||||
* @param string[] $result List of titles to respond to query with
|
||||
*/
|
||||
public static function addMockResults( $query, array $result ) {
|
||||
// Leading : ensures we don't treat another : as a namespace separator
|
||||
$normalized = Title::newFromText( ":$query" )->getText();
|
||||
self::$results[$normalized] = $result;
|
||||
}
|
||||
|
||||
public function completionSearchBackend( $search ) {
|
||||
if ( self::$completionSearchResult == null ) {
|
||||
self::$completionSearchResult = [];
|
||||
// TODO: Or does this have to be setup per-test?
|
||||
$lc = MediaWikiServices::getInstance()->getLinkCache();
|
||||
foreach ( range( 0, 10 ) as $i ) {
|
||||
$dbkey = "Search_Result_$i";
|
||||
$lc->addGoodLinkObj( 6543 + $i, new TitleValue( NS_MAIN, $dbkey ) );
|
||||
self::$completionSearchResult[] = "Search Result $i";
|
||||
}
|
||||
if ( !isset( self::$results[$search] ) ) {
|
||||
return SearchSuggestionSet::emptySuggestionSet();
|
||||
}
|
||||
$results = array_slice( self::$completionSearchResult, $this->offset, $this->limit );
|
||||
$results = array_slice( self::$results[$search], $this->offset, $this->limit );
|
||||
|
||||
return SearchSuggestionSet::fromStrings( $results );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class MockSearchEngine extends SearchEngine {
|
|||
self::$results[$query] = $results;
|
||||
$lc = MediaWikiServices::getInstance()->getLinkCache();
|
||||
foreach ( $results as $result ) {
|
||||
// TODO: better page ids?
|
||||
// TODO: better page ids? Does it matter?
|
||||
$lc->addGoodLinkObj( mt_rand(), $result->getTitle() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue