Fix highlighting for phrase queries

I think the bug was introduced during a cleanup in Iaabc10c.
I don't think that " should be part of the legalSearchChars at query
time, it seems to break the regex.
The strategy here is to distinguish legalSearchChars used query time vs
the ones used at index time by introducing:
SearchEngine::legalSearchCharsForUpdate()

Bug: T167798
Change-Id: I61dc53665e26d3c6c48caed78dd3bbde9a33def7
This commit is contained in:
David Causse 2017-06-22 14:48:14 +02:00
parent b2b983a3bd
commit f230f5dcc7
6 changed files with 37 additions and 9 deletions

View file

@ -124,7 +124,7 @@ class SearchUpdate implements DeferrableUpdate {
# Language-specific strip/conversion
$text = $wgContLang->normalizeForSearch( $text );
$se = $se ?: MediaWikiServices::getInstance()->newSearchEngine();
$lc = $se->legalSearchChars() . '&#;';
$lc = $se->legalSearchCharsForUpdate() . '&#;';
$text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/",
' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup
@ -207,7 +207,7 @@ class SearchUpdate implements DeferrableUpdate {
$ns = $this->title->getNamespace();
$title = $this->title->getText();
$lc = $search->legalSearchChars() . '&#;';
$lc = $search->legalSearchCharsForUpdate() . '&#;';
$t = $wgContLang->normalizeForSearch( $title );
$t = preg_replace( "/[^{$lc}]+/", ' ', $t );
$t = $wgContLang->lc( $t );

View file

@ -206,7 +206,7 @@ abstract class SearchEngine {
}
/**
* Get chars legal for search.
* Get chars legal for search (at query time).
* NOTE: usage as static is deprecated and preserved only as BC measure
* @return string
*/
@ -214,6 +214,16 @@ abstract class SearchEngine {
return "A-Za-z_'.0-9\\x80-\\xFF\\-";
}
/**
* Get chars legal for search (at index time).
*
* @since 1.30
* @return string
*/
public function legalSearchCharsForUpdate() {
return static::legalSearchChars();
}
/**
* Set the maximum number of results to return
* and how many to skip before returning the first.

View file

@ -149,8 +149,8 @@ class SearchMySQL extends SearchDatabase {
return $regex;
}
public static function legalSearchChars() {
return "\"*" . parent::legalSearchChars();
public function legalSearchCharsForUpdate() {
return "\"*" . parent::legalSearchCharsForUpdate();
}
/**

View file

@ -266,7 +266,7 @@ class SearchOracle extends SearchDatabase {
[] );
}
public static function legalSearchChars() {
return "\"" . parent::legalSearchChars();
public function legalSearchCharsForUpdate() {
return "\"" . parent::legalSearchCharsForUpdate();
}
}

View file

@ -141,8 +141,8 @@ class SearchSqlite extends SearchDatabase {
return $regex;
}
public static function legalSearchChars() {
return "\"*" . parent::legalSearchChars();
public function legalSearchCharsForUpdate() {
return "\"*" . parent::legalSearchCharsForUpdate();
}
/**

View file

@ -124,6 +124,24 @@ class SearchEngineTest extends MediaWikiLangTestCase {
"Plain search failed" );
}
public function testPhraseSearch() {
$res = $this->search->searchText( '"smithee is one who smiths"' );
$this->assertEquals(
[ 'Smithee' ],
$this->fetchIds( $res ),
"Phrase search failed" );
$res = $this->search->searchText( '"smithee is one who smiths"' );
$match = $res->next();
$terms = [ 'smithee', 'is', 'one', 'who', 'smiths' ];
$snippet = "";
foreach ( $terms as $term ) {
$snippet .= " <span class='searchmatch'>" . $term . "</span>";
}
$this->assertRegexp( '/' . preg_quote( $snippet, '/' ) . '/',
$match->getTextSnippet( $res->termMatches() ),
"Phrase search failed to highlight" );
}
public function testTextPowerSearch() {
$this->search->setNamespaces( [ 0, 1, 4 ] );
$this->assertEquals(