Silently drop unknown titles in completion search

This mimics how full text works by silenty dropping results returned from
search that no longer exist. This could be because the search index is slightly
out of sync with reality, or the search engine could simply be broken.

Only silent from the users perspective. We maintain a count in statsd of
the number of titles dropped. This can be monitored over time to
recognize any increases.

Bug: T115756
Change-Id: I2f29d73e258cd448a14d35a2b4902a4fb6f61c68
This commit is contained in:
Erik Bernhardson 2017-12-13 17:03:20 -08:00
parent 2a43939ffb
commit 83bae78c3a
7 changed files with 109 additions and 31 deletions

View file

@ -675,13 +675,20 @@ abstract class SearchEngine {
$search = trim( $search );
// preload the titles with LinkBatch
$titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
$lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
return $sugg->getSuggestedTitle();
} );
$lb = new LinkBatch( $titles );
} ) );
$lb->setCaller( __METHOD__ );
$lb->execute();
$diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
return $sugg->getSuggestedTitle()->isKnown();
} );
if ( $diff > 0 ) {
MediaWikiServices::getInstance()->getStatsdDataFactory()
->updateCount( 'search.completion.missing', $diff );
}
$results = $suggestions->map( function ( SearchSuggestion $sugg ) {
return $sugg->getSuggestedTitle()->getPrefixedText();
} );

View file

@ -87,6 +87,18 @@ class SearchSuggestionSet {
return array_map( $callback, $this->suggestions );
}
/**
* Filter the suggestions array
* @param callback $callback Callable accepting single SearchSuggestion
* instance returning bool false to remove the item.
* @return int The number of suggestions removed
*/
public function filter( $callback ) {
$before = count( $this->suggestions );
$this->suggestions = array_values( array_filter( $this->suggestions, $callback ) );
return $before - count( $this->suggestions );
}
/**
* Add a new suggestion at the end.
* If the score of the new suggestion is greater than the worst one,

View file

@ -7,6 +7,23 @@
* @covers ApiQueryPrefixSearch
*/
class ApiQueryPrefixSearchTest extends ApiTestCase {
const TEST_QUERY = 'unittest';
public function setUp() {
parent::setUp();
$this->setMwGlobals( [
'wgSearchType' => MockCompletionSearchEngine::class,
] );
MockCompletionSearchEngine::clearMockResults();
$results = [];
foreach ( range( 0, 10 ) as $i ) {
$title = "Search_Result_$i";
$results[] = $title;
$this->editPage( $title, 'hi there' );
}
MockCompletionSearchEngine::addMockResults( self::TEST_QUERY, $results );
}
public function offsetContinueProvider() {
return [
'no offset' => [ 2, 2, 0, 2 ],
@ -20,11 +37,10 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
* @dataProvider offsetContinueProvider
*/
public function testOffsetContinue( $expectedOffset, $expectedResults, $offset, $limit ) {
$this->registerMockSearchEngine();
$response = $this->doApiRequest( [
'action' => 'query',
'list' => 'prefixsearch',
'pssearch' => 'example query terms',
'pssearch' => self::TEST_QUERY,
'psoffset' => $offset,
'pslimit' => $limit,
] );
@ -39,10 +55,4 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
$this->assertEquals( $expectedOffset, $result['continue']['psoffset'] );
}
}
private function registerMockSearchEngine() {
$this->setMwGlobals( [
'wgSearchType' => MockCompletionSearchEngine::class,
] );
}
}

View file

@ -45,6 +45,9 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
$this->insertPage( 'Talk:Example' );
$this->insertPage( 'User:Example' );
$this->insertPage( 'Barcelona' );
$this->insertPage( 'Barbara' );
$this->insertPage( 'External' );
}
protected function setUp() {
@ -238,7 +241,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
],
] ],
[ [
'Exact match not on top (T72958)',
'Exact match not in first result should be moved to the first result (T72958)',
'provision' => [
'Barcelona',
'Bar',
@ -252,7 +255,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
],
] ],
[ [
'Exact match missing (T72958)',
'Exact match missing from results should be added as first result (T72958)',
'provision' => [
'Barcelona',
'Barbara',
@ -266,7 +269,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
],
] ],
[ [
'Exact match missing and not existing',
'Exact match missing and not existing pages should be dropped',
'provision' => [
'Exile',
'Exist',
@ -274,8 +277,6 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
],
'query' => 'Ex',
'results' => [
'Exile',
'Exist',
'External',
],
] ],

View file

@ -307,4 +307,37 @@ class SearchEngineTest extends MediaWikiLangTestCase {
} );
$rowAugmentors['testRow'] = $rowAugmentor;
}
public function testFiltersMissing() {
$availableResults = [];
foreach ( range( 0, 11 ) as $i ) {
$title = "Search_Result_$i";
$availableResults[] = $title;
// pages not created must be filtered
if ( $i % 2 == 0 ) {
$this->editPage( $title );
}
}
MockCompletionSearchEngine::addMockResults( 'foo', $availableResults );
$engine = new MockCompletionSearchEngine();
$engine->setLimitOffset( 10, 0 );
$results = $engine->completionSearch( 'foo' );
$this->assertEquals( 5, $results->getSize() );
$this->assertTrue( $results->hasMoreResults() );
$engine->setLimitOffset( 10, 10 );
$results = $engine->completionSearch( 'foo' );
$this->assertEquals( 1, $results->getSize() );
$this->assertFalse( $results->hasMoreResults() );
}
private function editPage( $title ) {
$page = WikiPage::factory( Title::newFromText( $title ) );
$page->doEditContent(
new WikitextContent( 'UTContent' ),
'UTPageSummary',
EDIT_NEW | EDIT_SUPPRESS_RC
);
}
}

View file

@ -1,27 +1,42 @@
<?php
use MediaWiki\MediaWikiServices;
/**
* SearchEngine implementation for returning mocked completion search results.
*/
class MockCompletionSearchEngine extends SearchEngine {
private static $completionSearchResult = [];
/** @var string[][] */
private static $results = [];
/**
* Reset any mocked results
*/
public static function clearMockResults() {
self::$results = [];
}
/**
* Allows returning arbitrary lists of titles for completion search.
* Provided results will be sliced based on offset/limit of query.
*
* For results to exit the search engine they must pass Title::isKnown.
* Injecting into link cache is not enough, as LinkBatch will mark them
* bad, they need to be injected into the DB.
*
* @param string $query Search term as seen in completionSearchBackend
* @param string[] $result List of titles to respond to query with
*/
public static function addMockResults( $query, array $result ) {
// Leading : ensures we don't treat another : as a namespace separator
$normalized = Title::newFromText( ":$query" )->getText();
self::$results[$normalized] = $result;
}
public function completionSearchBackend( $search ) {
if ( self::$completionSearchResult == null ) {
self::$completionSearchResult = [];
// TODO: Or does this have to be setup per-test?
$lc = MediaWikiServices::getInstance()->getLinkCache();
foreach ( range( 0, 10 ) as $i ) {
$dbkey = "Search_Result_$i";
$lc->addGoodLinkObj( 6543 + $i, new TitleValue( NS_MAIN, $dbkey ) );
self::$completionSearchResult[] = "Search Result $i";
}
if ( !isset( self::$results[$search] ) ) {
return SearchSuggestionSet::emptySuggestionSet();
}
$results = array_slice( self::$completionSearchResult, $this->offset, $this->limit );
$results = array_slice( self::$results[$search], $this->offset, $this->limit );
return SearchSuggestionSet::fromStrings( $results );
}
}

View file

@ -22,7 +22,7 @@ class MockSearchEngine extends SearchEngine {
self::$results[$query] = $results;
$lc = MediaWikiServices::getInstance()->getLinkCache();
foreach ( $results as $result ) {
// TODO: better page ids?
// TODO: better page ids? Does it matter?
$lc->addGoodLinkObj( mt_rand(), $result->getTitle() );
}
}