Fixed r61214: moved MySQL munging to SearchEngine, updated calls. Can we kill $doStrip now?
This commit is contained in:
parent
42df7b3112
commit
eff719b75d
5 changed files with 95 additions and 101 deletions
|
|
@ -2366,18 +2366,6 @@ abstract class DatabaseBase {
|
|||
return "SearchMySQL";
|
||||
}
|
||||
|
||||
/**
|
||||
* When overridden in derived class, performs database-specific conversions
|
||||
* on text to be used for searching or updating search index.
|
||||
* Default implementation does nothing (simply returns $string).
|
||||
*
|
||||
* @param $string string: String to strip
|
||||
* @return string
|
||||
*/
|
||||
public function stripForSearch( $string ) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow or deny "big selects" for this session only. This is done by setting
|
||||
* the sql_big_selects session variable.
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* @see Database
|
||||
*/
|
||||
class DatabaseMysql extends DatabaseBase {
|
||||
static $mMinSearchLength;
|
||||
|
||||
function getType() {
|
||||
return 'mysql';
|
||||
}
|
||||
|
|
@ -368,84 +366,6 @@ class DatabaseMysql extends DatabaseBase {
|
|||
public function unlockTables( $method ) {
|
||||
$this->query( "UNLOCK TABLES", $method );
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts some characters for MySQL's indexing to grok it correctly,
|
||||
* and pads short words to overcome limitations.
|
||||
*/
|
||||
function stripForSearch( $string ) {
|
||||
global $wgContLang;
|
||||
|
||||
wfProfileIn( __METHOD__ );
|
||||
|
||||
// MySQL fulltext index doesn't grok utf-8, so we
|
||||
// need to fold cases and convert to hex
|
||||
$out = preg_replace_callback(
|
||||
"/([\\xc0-\\xff][\\x80-\\xbf]*)/",
|
||||
array( $this, 'stripForSearchCallback' ),
|
||||
$wgContLang->lc( $string ) );
|
||||
|
||||
// And to add insult to injury, the default indexing
|
||||
// ignores short words... Pad them so we can pass them
|
||||
// through without reconfiguring the server...
|
||||
$minLength = $this->minSearchLength();
|
||||
if( $minLength > 1 ) {
|
||||
$n = $minLength - 1;
|
||||
$out = preg_replace(
|
||||
"/\b(\w{1,$n})\b/",
|
||||
"$1u800",
|
||||
$out );
|
||||
}
|
||||
|
||||
// Periods within things like hostnames and IP addresses
|
||||
// are also important -- we want a search for "example.com"
|
||||
// or "192.168.1.1" to work sanely.
|
||||
//
|
||||
// MySQL's search seems to ignore them, so you'd match on
|
||||
// "example.wikipedia.com" and "192.168.83.1" as well.
|
||||
$out = preg_replace(
|
||||
"/(\w)\.(\w|\*)/u",
|
||||
"$1u82e$2",
|
||||
$out );
|
||||
|
||||
wfProfileOut( __METHOD__ );
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Armor a case-folded UTF-8 string to get through MySQL's
|
||||
* fulltext search without being mucked up by funny charset
|
||||
* settings or anything else of the sort.
|
||||
*/
|
||||
protected function stripForSearchCallback( $matches ) {
|
||||
return 'u8' . bin2hex( $matches[1] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check MySQL server's ft_min_word_len setting so we know
|
||||
* if we need to pad short words...
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
protected function minSearchLength() {
|
||||
if( is_null( self::$mMinSearchLength ) ) {
|
||||
$sql = "show global variables like 'ft\\_min\\_word\\_len'";
|
||||
|
||||
// Even though this query is pretty fast, let's not overload the master
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
$result = $dbr->query( $sql );
|
||||
$row = $result->fetchObject();
|
||||
$result->free();
|
||||
|
||||
if( $row && $row->Variable_name == 'ft_min_word_len' ) {
|
||||
self::$mMinSearchLength = intval( $row->Value );
|
||||
} else {
|
||||
self::$mMinSearchLength = 0;
|
||||
}
|
||||
}
|
||||
return self::$mMinSearchLength;
|
||||
}
|
||||
|
||||
public function setBigSelects( $value = true ) {
|
||||
if ( $value === 'default' ) {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,18 @@ class SearchEngine {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* When overridden in derived class, performs database-specific conversions
|
||||
* on text to be used for searching or updating search index.
|
||||
* Default implementation does nothing (simply returns $string).
|
||||
*
|
||||
* @param $string string: String to process
|
||||
* @return string
|
||||
*/
|
||||
public function normalizeText( $string ) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform search term in cases when parts of the query came as different GET params (when supported)
|
||||
* e.g. for prefix queries: search=test&prefix=Main_Page/Archive -> test prefix:Main Page/Archive
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
*/
|
||||
class SearchMySQL extends SearchEngine {
|
||||
var $strictMatching = true;
|
||||
static $mMinSearchLength;
|
||||
|
||||
/** @todo document */
|
||||
function __construct( $db ) {
|
||||
|
|
@ -91,6 +92,7 @@ class SearchMySQL extends SearchEngine {
|
|||
if( count( $strippedVariants) > 1 )
|
||||
$searchon .= '(';
|
||||
foreach( $strippedVariants as $stripped ) {
|
||||
$stripped = $this->normalizeText( $stripped );
|
||||
if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
|
||||
// Hack for Chinese: we need to toss in quotes for
|
||||
// multiple-character phrases since stripForSearch()
|
||||
|
|
@ -292,8 +294,8 @@ class SearchMySQL extends SearchEngine {
|
|||
array( 'si_page' ),
|
||||
array(
|
||||
'si_page' => $id,
|
||||
'si_title' => $title,
|
||||
'si_text' => $text
|
||||
'si_title' => $this->normalizeText( $title ),
|
||||
'si_text' => $this->normalizeText( $text )
|
||||
), __METHOD__ );
|
||||
}
|
||||
|
||||
|
|
@ -308,11 +310,88 @@ class SearchMySQL extends SearchEngine {
|
|||
$dbw = wfGetDB( DB_MASTER );
|
||||
|
||||
$dbw->update( 'searchindex',
|
||||
array( 'si_title' => $title ),
|
||||
array( 'si_title' => $this->normalizeText( $title ) ),
|
||||
array( 'si_page' => $id ),
|
||||
__METHOD__,
|
||||
array( $dbw->lowPriorityOption() ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts some characters for MySQL's indexing to grok it correctly,
|
||||
* and pads short words to overcome limitations.
|
||||
*/
|
||||
function normalizeText( $string ) {
|
||||
global $wgContLang;
|
||||
|
||||
wfProfileIn( __METHOD__ );
|
||||
|
||||
// MySQL fulltext index doesn't grok utf-8, so we
|
||||
// need to fold cases and convert to hex
|
||||
$out = preg_replace_callback(
|
||||
"/([\\xc0-\\xff][\\x80-\\xbf]*)/",
|
||||
array( $this, 'stripForSearchCallback' ),
|
||||
$wgContLang->lc( $string ) );
|
||||
|
||||
// And to add insult to injury, the default indexing
|
||||
// ignores short words... Pad them so we can pass them
|
||||
// through without reconfiguring the server...
|
||||
$minLength = $this->minSearchLength();
|
||||
if( $minLength > 1 ) {
|
||||
$n = $minLength - 1;
|
||||
$out = preg_replace(
|
||||
"/\b(\w{1,$n})\b/",
|
||||
"$1u800",
|
||||
$out );
|
||||
}
|
||||
|
||||
// Periods within things like hostnames and IP addresses
|
||||
// are also important -- we want a search for "example.com"
|
||||
// or "192.168.1.1" to work sanely.
|
||||
//
|
||||
// MySQL's search seems to ignore them, so you'd match on
|
||||
// "example.wikipedia.com" and "192.168.83.1" as well.
|
||||
$out = preg_replace(
|
||||
"/(\w)\.(\w|\*)/u",
|
||||
"$1u82e$2",
|
||||
$out );
|
||||
|
||||
wfProfileOut( __METHOD__ );
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Armor a case-folded UTF-8 string to get through MySQL's
|
||||
* fulltext search without being mucked up by funny charset
|
||||
* settings or anything else of the sort.
|
||||
*/
|
||||
protected function stripForSearchCallback( $matches ) {
|
||||
return 'u8' . bin2hex( $matches[1] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check MySQL server's ft_min_word_len setting so we know
|
||||
* if we need to pad short words...
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
protected function minSearchLength() {
|
||||
if( is_null( self::$mMinSearchLength ) ) {
|
||||
$sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
|
||||
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
$result = $dbr->query( $sql );
|
||||
$row = $result->fetchObject();
|
||||
$result->free();
|
||||
|
||||
if( $row && $row->Variable_name == 'ft_min_word_len' ) {
|
||||
self::$mMinSearchLength = intval( $row->Value );
|
||||
} else {
|
||||
self::$mMinSearchLength = 0;
|
||||
}
|
||||
}
|
||||
return self::$mMinSearchLength;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1695,12 +1695,7 @@ class Language {
|
|||
* @return String
|
||||
*/
|
||||
function stripForSearch( $string, $doStrip = true ) {
|
||||
if ( !$doStrip ) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
return $dbr->stripForSearch( $string );
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in a new issue