Reorganization of SearchEngine for legibility
* Removed 'fuzzy title search', it's not been maintained and generally produces unexpected and unwanted results * Separated search guts (in SearchEngine) from display/control (in SpecialSearch) * Extracted MySQL3 and MySQL4 variants to subclasses * Added PHPUnit tests for MySQL3 and MySQL4 search engines, which try to use temporary tables to fiddle in (if configured) * comments n stuff
This commit is contained in:
parent
64829524a8
commit
8ffff3e2e0
11 changed files with 919 additions and 604 deletions
|
|
@ -497,7 +497,6 @@ $wgDebugSquid = false; # Lots of debugging output from SquidUpdate.php
|
|||
|
||||
$wgDisableCounters = false;
|
||||
$wgDisableTextSearch = false;
|
||||
$wgDisableFuzzySearch = false;
|
||||
$wgDisableSearchUpdate = false; # If you've disabled search semi-permanently, this also disables updates to the table. If you ever re-enable, be sure to rebuild the search table.
|
||||
$wgDisableUploads = true; # Uploads have to be specially set up to be secure
|
||||
$wgRemoteUploads = false; # Set to true to enable the upload _link_ while local uploads are disabled. Assumes that the special page link will be bounced to another server where uploads do work.
|
||||
|
|
|
|||
|
|
@ -1,645 +1,203 @@
|
|||
<?php
|
||||
/**
|
||||
* Contain site class
|
||||
* See search.doc
|
||||
* @package MediaWiki
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
define( 'MW_SEARCH_OK', true );
|
||||
define( 'MW_SEARCH_BAD_QUERY', false );
|
||||
|
||||
/**
|
||||
* @todo document
|
||||
* @package MediaWiki
|
||||
*/
|
||||
class SearchEngine {
|
||||
/* private */ var $rawText, $filteredText, $searchTerms;
|
||||
/* private */ var $titleCond, $textCond;
|
||||
|
||||
var $doSearchRedirects = true;
|
||||
var $addToQuery = array();
|
||||
var $namespacesToSearch = array();
|
||||
var $alternateTitle;
|
||||
var $allTitles = false;
|
||||
|
||||
function SearchEngine( $text ) {
|
||||
$this->rawText = trim( $text );
|
||||
|
||||
# We display the query, so let's strip it for safety
|
||||
#
|
||||
global $wgDBmysql4;
|
||||
$lc = SearchEngine::legalSearchChars() . '()';
|
||||
if( $wgDBmysql4 ) {
|
||||
$lc .= "\"~<>*+-";
|
||||
}
|
||||
$this->filteredText = trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
|
||||
$this->searchTerms = array();
|
||||
$this->strictMatching = true; # Google-style, add '+' on all terms
|
||||
|
||||
$this->db =& wfGetDB( DB_SLAVE );
|
||||
}
|
||||
|
||||
var $limit = 10;
|
||||
var $offset = 0;
|
||||
var $searchTerms = array();
|
||||
var $namespaces = array( 0 );
|
||||
var $showRedirects = false;
|
||||
|
||||
/**
|
||||
* Return a partial WHERE clause to limit the search to the given namespaces
|
||||
* Perform a full text search query and return a result set.
|
||||
*
|
||||
* @param string $term - Raw search term
|
||||
* @param array $namespaces - List of namespaces to search
|
||||
* @return ResultWrapper
|
||||
* @access public
|
||||
*/
|
||||
function queryNamespaces() {
|
||||
$namespaces = implode( ',', $this->namespacesToSearch );
|
||||
if ($namespaces == '') {
|
||||
$namespaces = '0';
|
||||
}
|
||||
return "AND cur_namespace IN (" . $namespaces . ')';
|
||||
function searchText( $term ) {
|
||||
return $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a partial WHERE clause to include or exclude redirects from results
|
||||
* Perform a title-only search query and return a result set.
|
||||
*
|
||||
* @param string $term - Raw search term
|
||||
* @param array $namespaces - List of namespaces to search
|
||||
* @return ResultWrapper
|
||||
* @access public
|
||||
*/
|
||||
function searchRedirects() {
|
||||
if ( $this->doSearchRedirects ) {
|
||||
return '';
|
||||
} else {
|
||||
return 'AND cur_is_redirect=0 ';
|
||||
}
|
||||
function searchTitle( $term ) {
|
||||
return $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If an exact title match can be find, or a very slightly close match,
|
||||
* return the title. If no match, returns NULL.
|
||||
*
|
||||
* @param string $term
|
||||
* @return Title
|
||||
* @access private
|
||||
*/ function initNamespaceCheckbox( $i ) {
|
||||
global $wgUser, $wgNamespacesToBeSearchedDefault;
|
||||
|
||||
if ($wgUser->getID()) {
|
||||
// User is logged in so we retrieve his default namespaces
|
||||
return $wgUser->getOption( 'searchNs'.$i );
|
||||
} else {
|
||||
// User is not logged in so we give him the global default namespaces
|
||||
return !empty($wgNamespacesToBeSearchedDefault[ $i ]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Display the "power search" footer. Does not actually perform the search,
|
||||
* that is done by showResults()
|
||||
*/
|
||||
function powersearch() {
|
||||
global $wgUser, $wgOut, $wgContLang, $wgTitle, $wgRequest;
|
||||
$sk =& $wgUser->getSkin();
|
||||
|
||||
$search = $this->rawText;
|
||||
$searchx = $wgRequest->getVal( 'searchx' );
|
||||
$listredirs = $wgRequest->getVal( 'redirs' );
|
||||
|
||||
$ret = wfMsg('powersearchtext'); # Text to be returned
|
||||
$tempText = ''; # Temporary text, for substitution into $ret
|
||||
|
||||
if( isset( $_REQUEST['searchx'] ) ) {
|
||||
$this->addToQuery['searchx'] = '1';
|
||||
}
|
||||
|
||||
# Do namespace checkboxes
|
||||
$namespaces = $wgContLang->getNamespaces();
|
||||
foreach ( $namespaces as $i => $namespace ) {
|
||||
# Skip virtual namespaces
|
||||
if ( $i < 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$formVar = 'ns'.$i;
|
||||
|
||||
# Initialise checkboxValues, either from defaults or from
|
||||
# a previous invocation
|
||||
if ( !isset( $searchx ) ) {
|
||||
$checkboxValue = $this->initNamespaceCheckbox( $i );
|
||||
} else {
|
||||
$checkboxValue = $wgRequest->getVal( $formVar );
|
||||
}
|
||||
|
||||
$checked = '';
|
||||
if ( $checkboxValue == 1 ) {
|
||||
$checked = ' checked="checked"';
|
||||
$this->addToQuery['ns'.$i] = 1;
|
||||
array_push( $this->namespacesToSearch, $i );
|
||||
}
|
||||
$name = str_replace( '_', ' ', $namespaces[$i] );
|
||||
if ( '' == $name ) {
|
||||
$name = wfMsg( 'blanknamespace' );
|
||||
}
|
||||
|
||||
if ( $tempText !== '' ) {
|
||||
$tempText .= ' ';
|
||||
}
|
||||
$tempText .= "<input type='checkbox' value=\"1\" name=\"" .
|
||||
"ns{$i}\"{$checked} />{$name}\n";
|
||||
}
|
||||
$ret = str_replace ( '$1', $tempText, $ret );
|
||||
|
||||
# List redirects checkbox
|
||||
|
||||
$checked = '';
|
||||
if ( $listredirs == 1 ) {
|
||||
$this->addToQuery['redirs'] = 1;
|
||||
$checked = ' checked="checked"';
|
||||
}
|
||||
$tempText = "<input type='checkbox' value='1' name=\"redirs\"{$checked} />\n";
|
||||
$ret = str_replace( '$2', $tempText, $ret );
|
||||
|
||||
# Search field
|
||||
|
||||
$tempText = "<input type='text' name=\"search\" value=\"" .
|
||||
htmlspecialchars( $search ) ."\" width=\"80\" />\n";
|
||||
$ret = str_replace( "$3", $tempText, $ret );
|
||||
|
||||
# Searchx button
|
||||
|
||||
$tempText = '<input type="submit" name="searchx" value="' .
|
||||
wfMsg('powersearch') . "\" />\n";
|
||||
$ret = str_replace( '$9', $tempText, $ret );
|
||||
|
||||
$action = $sk->escapeSearchLink();
|
||||
$ret = "<br /><br />\n<form id=\"powersearch\" method=\"get\" " .
|
||||
"action=\"$action\">\n{$ret}\n</form>\n";
|
||||
|
||||
if ( isset ( $searchx ) ) {
|
||||
if ( ! $listredirs ) {
|
||||
$this->doSearchRedirects = false;
|
||||
}
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
function setupPage() {
|
||||
global $wgOut;
|
||||
$wgOut->setPageTitle( wfMsg( 'searchresults' ) );
|
||||
$wgOut->setSubtitle( wfMsg( 'searchquery', htmlspecialchars( $this->rawText ) ) );
|
||||
$wgOut->setArticleRelated( false );
|
||||
$wgOut->setRobotpolicy( 'noindex,nofollow' );
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the search and construct the results page
|
||||
*/
|
||||
function showResults() {
|
||||
global $wgUser, $wgTitle, $wgOut, $wgLang;
|
||||
global $wgDisableTextSearch, $wgInputEncoding;
|
||||
$fname = 'SearchEngine::showResults';
|
||||
|
||||
$search = $this->rawText;
|
||||
|
||||
$powersearch = $this->powersearch(); /* Need side-effects here? */
|
||||
|
||||
$this->setupPage();
|
||||
|
||||
$sk = $wgUser->getSkin();
|
||||
$wgOut->addWikiText( wfMsg( 'searchresulttext' ) );
|
||||
|
||||
if ( !$this->parseQuery() ) {
|
||||
$wgOut->addWikiText(
|
||||
'==' . wfMsg( 'badquery' ) . "==\n" .
|
||||
wfMsg( 'badquerytext' ) );
|
||||
return;
|
||||
}
|
||||
list( $limit, $offset ) = wfCheckLimits( 20, 'searchlimit' );
|
||||
|
||||
if ( $wgDisableTextSearch ) {
|
||||
$wgOut->addHTML( wfMsg( 'searchdisabled' ) );
|
||||
$wgOut->addHTML( wfMsg( 'googlesearch',
|
||||
htmlspecialchars( $this->rawText ),
|
||||
htmlspecialchars( $wgInputEncoding ) ) );
|
||||
return;
|
||||
}
|
||||
|
||||
$titleMatches = $this->getMatches( $this->titleCond, $limit, $offset );
|
||||
$textMatches = $this->getMatches( $this->textCond, $limit, $offset );
|
||||
|
||||
$sk = $wgUser->getSkin();
|
||||
|
||||
$num = count( $titleMatches ) + count( $textMatches );
|
||||
if ( $num >= $limit ) {
|
||||
$top = wfShowingResults( $offset, $limit );
|
||||
} else {
|
||||
$top = wfShowingResultsNum( $offset, $limit, $num );
|
||||
}
|
||||
$wgOut->addHTML( "<p>{$top}</p>\n" );
|
||||
|
||||
# For powersearch
|
||||
$a2l = '';
|
||||
$akk = array_keys( $this->addToQuery );
|
||||
foreach ( $akk AS $ak ) {
|
||||
$a2l .= "&{$ak}={$this->addToQuery[$ak]}" ;
|
||||
}
|
||||
|
||||
$prevnext = wfViewPrevNext( $offset, $limit, 'Special:Search',
|
||||
'search=' . wfUrlencode( $this->filteredText ) . $a2l );
|
||||
$wgOut->addHTML( "<br />{$prevnext}\n" );
|
||||
|
||||
$foundsome = $this->showMatches( $titleMatches, $offset, 'notitlematches', 'titlematches' )
|
||||
|| $this->showMatches( $textMatches, $offset, 'notextmatches', 'textmatches' );
|
||||
|
||||
if ( !$foundsome ) {
|
||||
$wgOut->addWikiText( wfMsg( 'nonefound' ) );
|
||||
}
|
||||
$wgOut->addHTML( "<p>{$prevnext}</p>\n" );
|
||||
$wgOut->addHTML( $powersearch );
|
||||
}
|
||||
|
||||
function legalSearchChars() {
|
||||
$lc = "A-Za-z_'0-9\\x80-\\xFF\\-";
|
||||
return $lc;
|
||||
}
|
||||
|
||||
function parseQuery() {
|
||||
global $wgDBmysql4;
|
||||
if (strlen($this->filteredText) < 1)
|
||||
return MW_SEARCH_BAD_QUERY;
|
||||
|
||||
if( $wgDBmysql4 ) {
|
||||
# Use cleaner boolean search if available
|
||||
return $this->parseQuery4();
|
||||
} else {
|
||||
# Fall back to ugly hack with multiple search clauses
|
||||
return $this->parseQuery3();
|
||||
}
|
||||
}
|
||||
|
||||
function parseQuery3() {
|
||||
global $wgDBminWordLen, $wgContLang;
|
||||
|
||||
# on non mysql4 database: get list of words we don't want to search for
|
||||
require_once( 'FulltextStoplist.php' );
|
||||
|
||||
$lc = SearchEngine::legalSearchChars() . '()';
|
||||
$q = preg_replace( "/([()])/", " \\1 ", $this->filteredText );
|
||||
$q = preg_replace( "/\\s+/", " ", $q );
|
||||
$w = explode( ' ', trim( $q ) );
|
||||
|
||||
$last = $cond = '';
|
||||
foreach ( $w as $word ) {
|
||||
$word = $wgContLang->stripForSearch( $word );
|
||||
if ( 'and' == $word || 'or' == $word || 'not' == $word
|
||||
|| '(' == $word || ')' == $word ) {
|
||||
$cond .= ' ' . strtoupper( $word );
|
||||
$last = '';
|
||||
} else if ( strlen( $word ) < $wgDBminWordLen ) {
|
||||
continue;
|
||||
} else if ( FulltextStoplist::inList( $word ) ) {
|
||||
continue;
|
||||
} else {
|
||||
if ( '' != $last ) { $cond .= ' AND'; }
|
||||
$cond .= " (MATCH (##field##) AGAINST ('" .
|
||||
$this->db->strencode( $word ). "'))";
|
||||
$last = $word;
|
||||
array_push( $this->searchTerms, "\\b" . $word . "\\b" );
|
||||
}
|
||||
}
|
||||
if ( 0 == count( $this->searchTerms ) ) {
|
||||
return MW_SEARCH_BAD_QUERY;
|
||||
}
|
||||
|
||||
$this->titleCond = '(' . str_replace( '##field##',
|
||||
'si_title', $cond ) . ' )';
|
||||
|
||||
$this->textCond = '(' . str_replace( '##field##',
|
||||
'si_text', $cond ) . ' AND (cur_is_redirect=0) )';
|
||||
|
||||
return MW_SEARCH_OK;
|
||||
}
|
||||
|
||||
function parseQuery4() {
|
||||
global $wgContLang;
|
||||
$lc = SearchEngine::legalSearchChars();
|
||||
$searchon = '';
|
||||
$this->searchTerms = array();
|
||||
|
||||
# FIXME: This doesn't handle parenthetical expressions.
|
||||
if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
|
||||
$this->filteredText, $m, PREG_SET_ORDER ) ) {
|
||||
foreach( $m as $terms ) {
|
||||
if( $searchon !== '' ) $searchon .= ' ';
|
||||
if( $this->strictMatching && ($terms[1] == '') ) {
|
||||
$terms[1] = '+';
|
||||
}
|
||||
$searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
|
||||
if( !empty( $terms[3] ) ) {
|
||||
$regexp = preg_quote( $terms[3] );
|
||||
if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
|
||||
} else {
|
||||
$regexp = preg_quote( str_replace( '"', '', $terms[2] ) );
|
||||
}
|
||||
$this->searchTerms[] = $regexp;
|
||||
}
|
||||
wfDebug( "Would search with '$searchon'\n" );
|
||||
wfDebug( "Match with /\b" . implode( '\b|\b', $this->searchTerms ) . "\b/\n" );
|
||||
} else {
|
||||
wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
|
||||
}
|
||||
|
||||
$searchon = $this->db->strencode( $searchon );
|
||||
$this->titleCond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)";
|
||||
$this->textCond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)";
|
||||
return MW_SEARCH_OK;
|
||||
}
|
||||
|
||||
function &getMatches( $cond, $limit, $offset = 0 ) {
|
||||
$searchindex = $this->db->tableName( 'searchindex' );
|
||||
$cur = $this->db->tableName( 'cur' );
|
||||
$searchnamespaces = $this->queryNamespaces();
|
||||
$redircond = $this->searchRedirects();
|
||||
|
||||
$sql = "SELECT cur_id,cur_namespace,cur_title," .
|
||||
"cur_text FROM $cur,$searchindex " .
|
||||
"WHERE cur_id=si_page AND {$cond} " .
|
||||
"{$searchnamespaces} {$redircond} " .
|
||||
$this->db->limitResult( $limit, $offset );
|
||||
|
||||
$res = $this->db->query( $sql, 'SearchEngine::getMatches' );
|
||||
$matches = array();
|
||||
while ( $row = $this->db->fetchObject( $res ) ) {
|
||||
$matches[] = $row;
|
||||
}
|
||||
$this->db->freeResult( $res );
|
||||
|
||||
return $matches;
|
||||
}
|
||||
|
||||
function showMatches( &$matches, $offset, $msgEmpty, $msgFound ) {
|
||||
global $wgOut;
|
||||
if ( 0 == count( $matches ) ) {
|
||||
$wgOut->addHTML( "<h2>" . wfMsg( $msgEmpty ) .
|
||||
"</h2>\n" );
|
||||
return false;
|
||||
} else {
|
||||
$off = $offset + 1;
|
||||
$wgOut->addHTML( "<h2>" . wfMsg( $msgFound ) .
|
||||
"</h2>\n<ol start='{$off}'>" );
|
||||
|
||||
foreach( $matches as $row ) {
|
||||
$this->showHit( $row );
|
||||
}
|
||||
$wgOut->addHTML( "</ol>\n" );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
function showHit( $row ) {
|
||||
global $wgUser, $wgOut, $wgContLang;
|
||||
|
||||
$t = Title::makeName( $row->cur_namespace, $row->cur_title );
|
||||
if( is_null( $t ) ) {
|
||||
$wgOut->addHTML( "<!-- Broken link in search result -->\n" );
|
||||
return;
|
||||
}
|
||||
$sk = $wgUser->getSkin();
|
||||
|
||||
$contextlines = $wgUser->getOption( 'contextlines' );
|
||||
if ( '' == $contextlines ) { $contextlines = 5; }
|
||||
$contextchars = $wgUser->getOption( 'contextchars' );
|
||||
if ( '' == $contextchars ) { $contextchars = 50; }
|
||||
|
||||
$link = $sk->makeKnownLink( $t, '' );
|
||||
$size = wfMsg( 'nbytes', strlen( $row->cur_text ) );
|
||||
$wgOut->addHTML( "<li>{$link} ({$size})" );
|
||||
|
||||
$lines = explode( "\n", $row->cur_text );
|
||||
$pat1 = "/(.*)(" . implode( "|", $this->searchTerms ) . ")(.*)/i";
|
||||
$lineno = 0;
|
||||
|
||||
foreach ( $lines as $line ) {
|
||||
if ( 0 == $contextlines ) {
|
||||
break;
|
||||
}
|
||||
--$contextlines;
|
||||
++$lineno;
|
||||
if ( ! preg_match( $pat1, $line, $m ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$pre = $wgContLang->truncate( $m[1], -$contextchars, '...' );
|
||||
|
||||
if ( count( $m ) < 3 ) {
|
||||
$post = '';
|
||||
} else {
|
||||
$post = $wgContLang->truncate( $m[3], $contextchars, '...' );
|
||||
}
|
||||
|
||||
$found = $m[2];
|
||||
|
||||
$line = htmlspecialchars( $pre . $found . $post );
|
||||
$pat2 = '/(' . implode( '|', $this->searchTerms ) . ")/i";
|
||||
$line = preg_replace( $pat2,
|
||||
"<span class='searchmatch'>\\1</span>", $line );
|
||||
|
||||
$wgOut->addHTML( "<br /><small>{$lineno}: {$line}</small>\n" );
|
||||
}
|
||||
$wgOut->addHTML( "</li>\n" );
|
||||
}
|
||||
|
||||
function getNearMatch() {
|
||||
function getNearMatch( $term ) {
|
||||
# Exact match? No need to look further.
|
||||
$title = Title::newFromText( $this->rawText );
|
||||
$title = Title::newFromText( $term );
|
||||
if ( $title->getNamespace() == NS_SPECIAL || 0 != $title->getArticleID() ) {
|
||||
return $title;
|
||||
}
|
||||
|
||||
# Now try all lower case (i.e. first letter capitalized)
|
||||
#
|
||||
$title = Title::newFromText( strtolower( $this->rawText ) );
|
||||
$title = Title::newFromText( strtolower( $term ) );
|
||||
if ( 0 != $title->getArticleID() ) {
|
||||
return $title;
|
||||
}
|
||||
|
||||
# Now try capitalized string
|
||||
#
|
||||
$title = Title::newFromText( ucwords( strtolower( $this->rawText ) ) );
|
||||
$title = Title::newFromText( ucwords( strtolower( $term ) ) );
|
||||
if ( 0 != $title->getArticleID() ) {
|
||||
return $title;
|
||||
}
|
||||
|
||||
# Now try all upper case
|
||||
#
|
||||
$title = Title::newFromText( strtoupper( $this->rawText ) );
|
||||
$title = Title::newFromText( strtoupper( $term ) );
|
||||
if ( 0 != $title->getArticleID() ) {
|
||||
return $title;
|
||||
}
|
||||
|
||||
# Entering an IP address goes to the contributions page
|
||||
if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $this->rawText ) ) {
|
||||
$title = Title::makeTitle( NS_SPECIAL, "Contributions/" . $this->rawText );
|
||||
if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $term ) ) {
|
||||
$title = Title::makeTitle( NS_SPECIAL, "Contributions/" . $term );
|
||||
return $title;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
function legalSearchChars() {
|
||||
return "A-Za-z_'0-9\\x80-\\xFF\\-";
|
||||
}
|
||||
|
||||
function goResult() {
|
||||
global $wgOut, $wgGoToEdit;
|
||||
global $wgDisableTextSearch;
|
||||
$fname = 'SearchEngine::goResult';
|
||||
|
||||
# Try to go to page as entered.
|
||||
#
|
||||
$t = Title::newFromText( $this->rawText );
|
||||
|
||||
# If the string cannot be used to create a title
|
||||
if( is_null( $t ) ){
|
||||
$this->showResults();
|
||||
return;
|
||||
/**
|
||||
* Set the maximum number of results to return
|
||||
* and how many to skip before returning the first.
|
||||
*
|
||||
* @param int $limit
|
||||
* @param int $offset
|
||||
* @access public
|
||||
*/
|
||||
function setLimitOffset( $limit, $offset = 0 ) {
|
||||
$this->limit = IntVal( $limit );
|
||||
$this->offset = IntVal( $offset );
|
||||
}
|
||||
|
||||
/**
|
||||
* Set which namespaces the search should include.
|
||||
* Give an array of namespace index numbers.
|
||||
*
|
||||
* @param array $namespaces
|
||||
* @access public
|
||||
*/
|
||||
function setNamespaces( $namespaces ) {
|
||||
$this->namespaces = $namespaces;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a list of searchable namespaces and their canonical names.
|
||||
* @return array
|
||||
* @access public
|
||||
*/
|
||||
function searchableNamespaces() {
|
||||
global $wgContLang;
|
||||
$arr = array();
|
||||
foreach( $wgContLang->getNamespaces() as $ns => $name ) {
|
||||
if( $ns >= 0 ) {
|
||||
$arr[$ns] = $name;
|
||||
}
|
||||
}
|
||||
|
||||
# If there's an exact or very near match, jump right there.
|
||||
$t = $this->getNearMatch();
|
||||
if( !is_null( $t ) ) {
|
||||
$wgOut->redirect( $t->getFullURL() );
|
||||
return;
|
||||
}
|
||||
|
||||
# No match, generate an edit URL
|
||||
$t = Title::newFromText( $this->rawText );
|
||||
|
||||
# If the feature is enabled, go straight to the edit page
|
||||
if ( $wgGoToEdit ) {
|
||||
$wgOut->redirect( $t->getFullURL( 'action=edit' ) );
|
||||
return;
|
||||
}
|
||||
|
||||
if( $t ) {
|
||||
$editurl = $t->escapeLocalURL( 'action=edit' );
|
||||
return $arr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch an array of regular expression fragments for matching
|
||||
* the search terms as parsed by this engine in a text extract.
|
||||
*
|
||||
* @return array
|
||||
* @access public
|
||||
*/
|
||||
function termMatches() {
|
||||
return $this->searchTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a 'cleaned up' search string
|
||||
*
|
||||
* @return string
|
||||
* @access public
|
||||
*/
|
||||
function filter( $text ) {
|
||||
$lc = $this->legalSearchChars();
|
||||
return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a partial WHERE clause to exclude redirects, if so set
|
||||
* @return string
|
||||
* @access private
|
||||
*/
|
||||
function queryRedirect() {
|
||||
if( $this->showRedirects ) {
|
||||
return 'AND cur_is_redirect=0';
|
||||
} else {
|
||||
$editurl = ''; # ??
|
||||
}
|
||||
$wgOut->addHTML( '<p>' . wfMsg('nogomatch', $editurl, htmlspecialchars( $this->rawText ) ) . "</p>\n" );
|
||||
|
||||
# Try a fuzzy title search
|
||||
$anyhit = false;
|
||||
global $wgDisableFuzzySearch;
|
||||
if(! $wgDisableFuzzySearch ){
|
||||
foreach( array(NS_MAIN, NS_PROJECT, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){
|
||||
$anyhit |= SearchEngine::doFuzzyTitleSearch( $this->rawText, $namespace );
|
||||
}
|
||||
}
|
||||
|
||||
if( ! $anyhit ){
|
||||
return $this->showResults();
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @static
|
||||
* Return a partial WHERE clause to limit the search to the given namespaces
|
||||
* @return string
|
||||
* @access private
|
||||
*/
|
||||
function doFuzzyTitleSearch( $search, $namespace ){
|
||||
global $wgContLang, $wgOut;
|
||||
|
||||
$this->setupPage();
|
||||
|
||||
$sstr = ucfirst($search);
|
||||
$sstr = str_replace(' ', '_', $sstr);
|
||||
$fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace );
|
||||
$fuzzymatches = array_slice($fuzzymatches, 0, 10);
|
||||
$slen = strlen( $search );
|
||||
$wikitext = '';
|
||||
foreach($fuzzymatches as $res){
|
||||
$t = str_replace('_', ' ', $res[1]);
|
||||
$tfull = $wgContLang->getNsText( $namespace ) . ":$t|$t";
|
||||
if( $namespace == NS_MAIN )
|
||||
$tfull = $t;
|
||||
$distance = $res[0];
|
||||
$closeness = (strlen( $search ) - $distance) / strlen( $search );
|
||||
$percent = intval( $closeness * 100 ) . '%';
|
||||
$stars = str_repeat('*', ceil(5 * $closeness) );
|
||||
$wikitext .= "* [[$tfull]] $percent ($stars)\n";
|
||||
function queryNamespaces() {
|
||||
$namespaces = implode( ',', $this->namespaces );
|
||||
if ($namespaces == '') {
|
||||
$namespaces = '0';
|
||||
}
|
||||
if( $wikitext ){
|
||||
if( $namespace != NS_MAIN )
|
||||
$wikitext = '=== ' . $wgContLang->getNsText( $namespace ) . " ===\n" . $wikitext;
|
||||
$wgOut->addWikiText( $wikitext );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return 'AND cur_namespace IN (' . $namespaces . ')';
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @static
|
||||
* Return a LIMIT clause to limit results on the query.
|
||||
* @return string
|
||||
* @access private
|
||||
*/
|
||||
function fuzzyTitles( $sstr, $namespace = NS_MAIN ){
|
||||
$span = 0.10; // weed on title length before doing levenshtein.
|
||||
$tolerance = 0.35; // allowed percentage of erronous characters
|
||||
$slen = strlen($sstr);
|
||||
$tolerance_count = ceil($tolerance * $slen);
|
||||
$spanabs = ceil($slen * (1 + $span)) - $slen;
|
||||
# print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n";
|
||||
$result = array();
|
||||
$cnt = 0;
|
||||
for( $i=0; $i <= $spanabs; $i++ ){
|
||||
$titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace );
|
||||
if( $i != 0) {
|
||||
$titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) );
|
||||
}
|
||||
foreach($titles as $t){
|
||||
$d = levenshtein($sstr, $t);
|
||||
if($d < $tolerance_count)
|
||||
$result[] = array($d, $t);
|
||||
$cnt++;
|
||||
}
|
||||
}
|
||||
usort($result, 'SearchEngine_pcmp');
|
||||
return $result;
|
||||
function queryLimit() {
|
||||
return $this->db->limitResult( $this->limit, $this->offset );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* static
|
||||
* Construct the full SQL query to do the search.
|
||||
* The guts shoulds be constructed in queryMain()
|
||||
* @param string $filteredTerm
|
||||
* @param bool $fulltext
|
||||
* @access private
|
||||
*/
|
||||
function getTitlesByLength($aLength, $aNamespace = 0){
|
||||
global $wgMemc, $wgDBname;
|
||||
$fname = 'SearchEngin::getTitlesByLength';
|
||||
|
||||
// to avoid multiple costly SELECTs in case of no memcached
|
||||
if( $this->allTitles ){
|
||||
if( isset( $this->allTitles[$aLength][$aNamespace] ) ){
|
||||
return $this->allTitles[$aLength][$aNamespace];
|
||||
} else {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
function getQuery( $filteredTerm, $fulltext ) {
|
||||
return $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
|
||||
$this->queryRedirect() . ' ' .
|
||||
$this->queryNamespaces() . ' ' .
|
||||
$this->queryLimit();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace";
|
||||
$mkeyts = "$wgDBname:titlesbylength:createtime";
|
||||
$ts = $wgMemc->get( $mkeyts );
|
||||
$result = $wgMemc->get( $mkey );
|
||||
|
||||
if( time() - $ts < 3600 ){
|
||||
// note: in case of insufficient memcached space, we return
|
||||
// an empty list instead of starting to hit the DB.
|
||||
return is_array( $result ) ? $result : array();
|
||||
}
|
||||
|
||||
$wgMemc->set( $mkeyts, time() );
|
||||
|
||||
$res = $this->db->select( 'cur', array( 'cur_title', 'cur_namespace' ), false, $fname );
|
||||
$titles = array(); // length, ns, [titles]
|
||||
while( $obj = $this->db->fetchObject( $res ) ){
|
||||
$title = $obj->cur_title;
|
||||
$ns = $obj->cur_namespace;
|
||||
$len = strlen( $title );
|
||||
$titles[$len][$ns][] = $title;
|
||||
}
|
||||
foreach($titles as $length => $length_arr){
|
||||
foreach($length_arr as $ns => $title_arr){
|
||||
$mkey = "$wgDBname:titlesbylength:$length:$ns";
|
||||
$wgMemc->set( $mkey, $title_arr, 3600 * 24 );
|
||||
}
|
||||
}
|
||||
$this->allTitles = $titles;
|
||||
if( isset( $titles[$aLength][$aNamespace] ) )
|
||||
return $titles[$aLength][$aNamespace];
|
||||
else
|
||||
return array();
|
||||
class SearchEngineDummy {
|
||||
function search( $term ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @access private
|
||||
* @static
|
||||
*/
|
||||
function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; }
|
||||
|
||||
?>
|
||||
?>
|
||||
86
includes/SearchMySQL3.php
Normal file
86
includes/SearchMySQL3.php
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
<?php
|
||||
# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
|
||||
# http://www.mediawiki.org/
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
# http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
/**
|
||||
* Search engine hook for MySQL 3.23.x
|
||||
* @package MediaWiki
|
||||
* @subpackage Search
|
||||
*/
|
||||
|
||||
require_once( 'SearchEngine.php' );
|
||||
|
||||
class SearchMySQL3 extends SearchEngine {
|
||||
function SearchEngineMySQL3( $db ) {
|
||||
$this->db =& $db;
|
||||
}
|
||||
|
||||
function getIndexField( $fulltext ) {
|
||||
return $fulltext ? 'si_text' : 'si_title';
|
||||
}
|
||||
|
||||
function parseQuery( $filteredText, $fulltext ) {
|
||||
global $wgDBminWordLen, $wgContLang;
|
||||
|
||||
$field = $this->getIndexField( $fulltext );
|
||||
|
||||
# on non mysql4 database: get list of words we don't want to search for
|
||||
require_once( 'FulltextStoplist.php' );
|
||||
|
||||
$lc = SearchEngine::legalSearchChars() . '()';
|
||||
$q = preg_replace( "/([()])/", " \\1 ", $filteredText );
|
||||
$q = preg_replace( "/\\s+/", " ", $q );
|
||||
$w = explode( ' ', trim( $q ) );
|
||||
|
||||
$last = $cond = '';
|
||||
foreach ( $w as $word ) {
|
||||
$word = $wgContLang->stripForSearch( $word );
|
||||
if ( 'and' == $word || 'or' == $word || 'not' == $word
|
||||
|| '(' == $word || ')' == $word ) {
|
||||
$cond .= ' ' . strtoupper( $word );
|
||||
$last = '';
|
||||
} else if ( strlen( $word ) < $wgDBminWordLen ) {
|
||||
continue;
|
||||
} else if ( FulltextStoplist::inList( $word ) ) {
|
||||
continue;
|
||||
} else {
|
||||
if ( '' != $last ) { $cond .= ' AND'; }
|
||||
$cond .= " (MATCH ($field) AGAINST ('" .
|
||||
$this->db->strencode( $word ). "'))";
|
||||
$last = $word;
|
||||
$this->searchTerms[] = "\\b" . preg_quote( $word, '/' ) . "\\b";
|
||||
}
|
||||
}
|
||||
if ( 0 == count( $this->searchTerms ) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return '(' . $cond . ' )';
|
||||
}
|
||||
|
||||
function queryMain( $filteredTerm, $fulltext ) {
|
||||
$match = $this->parseQuery( $filteredTerm, $fulltext );
|
||||
$cur = $this->db->tableName( 'cur' );
|
||||
$searchindex = $this->db->tableName( 'searchindex' );
|
||||
return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' .
|
||||
"FROM $cur,$searchindex " .
|
||||
'WHERE cur_id=si_page AND ' . $match;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
83
includes/SearchMySQL4.php
Normal file
83
includes/SearchMySQL4.php
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
<?php
|
||||
# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
|
||||
# http://www.mediawiki.org/
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
# http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
/**
|
||||
* Search engine hook for MySQL 4+
|
||||
* @package MediaWiki
|
||||
* @subpackage Search
|
||||
*/
|
||||
|
||||
require_once( 'SearchEngine.php' );
|
||||
|
||||
class SearchMySQL4 extends SearchEngine {
|
||||
var $strictMatching = false;
|
||||
|
||||
function SearchMySQL4( &$db ) {
|
||||
$this->db =& $db;
|
||||
}
|
||||
|
||||
function getIndexField( $fulltext ) {
|
||||
return $fulltext ? 'si_text' : 'si_title';
|
||||
}
|
||||
|
||||
function parseQuery( $filteredText, $fulltext ) {
|
||||
global $wgContLang;
|
||||
$lc = SearchEngine::legalSearchChars();
|
||||
$searchon = '';
|
||||
$this->searchTerms = array();
|
||||
|
||||
# FIXME: This doesn't handle parenthetical expressions.
|
||||
if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
|
||||
$filteredText, $m, PREG_SET_ORDER ) ) {
|
||||
foreach( $m as $terms ) {
|
||||
if( $searchon !== '' ) $searchon .= ' ';
|
||||
if( $this->strictMatching && ($terms[1] == '') ) {
|
||||
$terms[1] = '+';
|
||||
}
|
||||
$searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
|
||||
if( !empty( $terms[3] ) ) {
|
||||
$regexp = preg_quote( $terms[3], '/' );
|
||||
if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
|
||||
} else {
|
||||
$regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
|
||||
}
|
||||
$this->searchTerms[] = $regexp;
|
||||
}
|
||||
wfDebug( "Would search with '$searchon'\n" );
|
||||
wfDebug( "Match with /\b" . implode( '\b|\b', $this->searchTerms ) . "\b/\n" );
|
||||
} else {
|
||||
wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
|
||||
}
|
||||
|
||||
$searchon = $this->db->strencode( $searchon );
|
||||
$field = $this->getIndexField( $fulltext );
|
||||
return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
|
||||
}
|
||||
|
||||
function queryMain( $filteredTerm, $fulltext ) {
|
||||
$match = $this->parseQuery( $filteredTerm, $fulltext );
|
||||
$cur = $this->db->tableName( 'cur' );
|
||||
$searchindex = $this->db->tableName( 'searchindex' );
|
||||
return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' .
|
||||
"FROM $cur,$searchindex " .
|
||||
'WHERE cur_id=si_page AND ' . $match;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
|
@ -18,21 +18,370 @@
|
|||
# http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
/**
|
||||
* Token special page for marking search requests properly in the skin.
|
||||
* Run text & title search and display the output
|
||||
* @package MediaWiki
|
||||
* @subpackage SpecialPage
|
||||
*/
|
||||
|
||||
/**
|
||||
* constructor
|
||||
*/
|
||||
function wfSpecialSearch( $par ) {
|
||||
global $wgOutput;
|
||||
require_once( 'SearchEngine.php' );
|
||||
$searchEngine = new SearchEngine( $par );
|
||||
$searchEngine->showResults();
|
||||
require_once( 'SearchEngine.php' );
|
||||
|
||||
function wfSpecialSearch( $par='' ) {
|
||||
global $wgRequest, $wgUser;
|
||||
|
||||
$search = $wgRequest->getText( 'search', $par );
|
||||
$searchPage = new SpecialSearch( $wgRequest, $wgUser );
|
||||
if( $wgRequest->getVal( 'fulltext' ) ||
|
||||
!is_null( $wgRequest->getVal( 'offset' ) ) ||
|
||||
!is_null ($wgRequest->getVal( 'searchx' ) ) ) {
|
||||
$searchPage->showResults( $search );
|
||||
} else {
|
||||
$searchPage->goResult( $search );
|
||||
}
|
||||
}
|
||||
|
||||
# Hey, it could be worse. It could be /bin/true on Solaris!
|
||||
|
||||
class SpecialSearch {
|
||||
/**
|
||||
* Set up basic search parameters from the request and user settings.
|
||||
* Typically you'll pass $wgRequest and $wgUser.
|
||||
*
|
||||
* @param WebRequest $request
|
||||
* @param User $user
|
||||
* @access public
|
||||
*/
|
||||
function SpecialSearch( &$request, &$user ) {
|
||||
list( $this->limit, $this->offset ) = $request->getLimitOffset( 20, 'searchlimit' );
|
||||
|
||||
if( $request->getCheck( 'searchx' ) ) {
|
||||
$this->namespaces = $this->powerSearch( $request );
|
||||
} else {
|
||||
$this->namespaces = $this->userNamespaces( $user );
|
||||
}
|
||||
|
||||
$this->searchRedirects = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* If an exact title match can be found, jump straight ahead to
|
||||
* @param string $term
|
||||
* @access public
|
||||
*/
|
||||
function goResult( $term ) {
|
||||
global $wgOut;
|
||||
global $wgGoToEdit;
|
||||
|
||||
$this->setupPage( $term );
|
||||
|
||||
# Try to go to page as entered.
|
||||
#
|
||||
$t = Title::newFromText( $term );
|
||||
|
||||
# If the string cannot be used to create a title
|
||||
if( is_null( $t ) ){
|
||||
return $this->showResults( $term );
|
||||
}
|
||||
|
||||
# If there's an exact or very near match, jump right there.
|
||||
$t = SearchEngine::getNearMatch( $term );
|
||||
if( !is_null( $t ) ) {
|
||||
$wgOut->redirect( $t->getFullURL() );
|
||||
return;
|
||||
}
|
||||
|
||||
# No match, generate an edit URL
|
||||
$t = Title::newFromText( $term );
|
||||
if( is_null( $t ) ) {
|
||||
$editurl = ''; # hrm...
|
||||
} else {
|
||||
# If the feature is enabled, go straight to the edit page
|
||||
if ( $wgGoToEdit ) {
|
||||
$wgOut->redirect( $t->getFullURL( 'action=edit' ) );
|
||||
return;
|
||||
} else {
|
||||
$editurl = $t->escapeLocalURL( 'action=edit' );
|
||||
}
|
||||
}
|
||||
# FIXME: HTML in wiki message
|
||||
$wgOut->addHTML( '<p>' . wfMsg('nogomatch', $editurl, htmlspecialchars( $term ) ) . "</p>\n" );
|
||||
|
||||
return $this->showResults( $term );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $term
|
||||
* @access public
|
||||
*/
|
||||
function showResults( $term ) {
|
||||
$this->setupPage( $term );
|
||||
|
||||
global $wgUser, $wgOut;
|
||||
$sk = $wgUser->getSkin();
|
||||
$wgOut->addWikiText( wfMsg( 'searchresulttext' ) );
|
||||
|
||||
#if ( !$this->parseQuery() ) {
|
||||
if( '' === trim( $term ) ) {
|
||||
$wgOut->addWikiText(
|
||||
'==' . wfMsg( 'badquery' ) . "==\n" .
|
||||
wfMsg( 'badquerytext' ) );
|
||||
return;
|
||||
}
|
||||
|
||||
global $wgDisableTextSearch;
|
||||
if ( $wgDisableTextSearch ) {
|
||||
global $wgInputEncoding;
|
||||
$wgOut->addHTML( wfMsg( 'searchdisabled' ) );
|
||||
$wgOut->addHTML( wfMsg( 'googlesearch',
|
||||
htmlspecialchars( $term ),
|
||||
htmlspecialchars( $wgInputEncoding ) ) );
|
||||
return;
|
||||
}
|
||||
|
||||
$search =& $this->getSearchEngine();
|
||||
$titleMatches = $search->searchTitle( $term );
|
||||
$textMatches = $search->searchText( $term );
|
||||
|
||||
$num = $titleMatches->numRows() + $textMatches->numRows();
|
||||
if ( $num >= $this->limit ) {
|
||||
$top = wfShowingResults( $this->offset, $this->limit );
|
||||
} else {
|
||||
$top = wfShowingResultsNum( $this->offset, $this->limit, $num );
|
||||
}
|
||||
$wgOut->addHTML( "<p>{$top}</p>\n" );
|
||||
|
||||
if( $num || $this->offset ) {
|
||||
$prevnext = wfViewPrevNext( $this->offset, $this->limit,
|
||||
'Special:Search',
|
||||
wfArrayToCGI(
|
||||
$this->powerSearchOptions(),
|
||||
array( 'search' => $term ) ) );
|
||||
$wgOut->addHTML( "<br />{$prevnext}\n" );
|
||||
}
|
||||
|
||||
$terms = implode( '|', $search->termMatches() );
|
||||
|
||||
if( $titleMatches->numRows() ) {
|
||||
$wgOut->addWikiText( '==' . wfMsg( 'titlematches' ) . "==\n" );
|
||||
$wgOut->addHTML( $this->showMatches( $titleMatches, $terms ) );
|
||||
} else {
|
||||
$wgOut->addWikiText( '==' . wfMsg( 'notitlematches' ) . "==\n" );
|
||||
}
|
||||
|
||||
if( $textMatches->numRows() ) {
|
||||
$wgOut->addWikiText( '==' . wfMsg( 'textmatches' ) . "==\n" );
|
||||
$wgOut->addHTML( $this->showMatches( $textMatches, $terms ) );
|
||||
} elseif( $num == 0 ) {
|
||||
# Don't show the 'no text matches' if we received title matches
|
||||
$wgOut->addWikiText( '==' . wfMsg( 'notextmatches' ) . "==\n" );
|
||||
}
|
||||
|
||||
if ( $num == 0 ) {
|
||||
$wgOut->addWikiText( wfMsg( 'nonefound' ) );
|
||||
}
|
||||
if( $num || $this->offset ) {
|
||||
$wgOut->addHTML( "<p>{$prevnext}</p>\n" );
|
||||
}
|
||||
$wgOut->addHTML( $this->powerSearchBox( $term ) );
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------
|
||||
# Private methods below this line
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
function setupPage( $term ) {
|
||||
global $wgOut;
|
||||
$wgOut->setPageTitle( wfMsg( 'searchresults' ) );
|
||||
$wgOut->setSubtitle( wfMsg( 'searchquery', htmlspecialchars( $term ) ) );
|
||||
$wgOut->setArticleRelated( false );
|
||||
$wgOut->setRobotpolicy( 'noindex,nofollow' );
|
||||
}
|
||||
|
||||
/**
|
||||
* Load up the appropriate search engine class for the currently
|
||||
* active database backend, and return a configured instance.
|
||||
*
|
||||
* @return SearchEngine
|
||||
* @access private
|
||||
*/
|
||||
function &getSearchEngine() {
|
||||
global $wgDBtype, $wgDBmysql4, $wgSearchType;
|
||||
if( $wgDBtype == 'mysql' ) {
|
||||
if( $wgDBmysql4 ) {
|
||||
$class = 'SearchMySQL4';
|
||||
require_once( 'SearchMySQL4.php' );
|
||||
} else {
|
||||
$class = 'SearchMysql3';
|
||||
require_once( 'SearchMySQL3.php' );
|
||||
}
|
||||
} else {
|
||||
$class = 'SearchEngineDummy';
|
||||
}
|
||||
$search = new $class( wfGetDB( DB_SLAVE ) );
|
||||
$search->setLimitOffset( $this->limit, $this->offset );
|
||||
$search->setNamespaces( $this->namespaces );
|
||||
return $search;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract default namespaces to search from the given user's
|
||||
* settings, returning a list of index numbers.
|
||||
*
|
||||
* @param User $user
|
||||
* @return array
|
||||
* @access private
|
||||
*/
|
||||
function userNamespaces( &$user ) {
|
||||
$arr = array();
|
||||
foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
|
||||
if( $user->getOption( 'searchNs' . $ns ) ) {
|
||||
$arr[] = $ns;
|
||||
}
|
||||
}
|
||||
return $arr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract "power search" namespace settings from the request object,
|
||||
* returning a list of index numbers to search.
|
||||
*
|
||||
* @param WebRequest $request
|
||||
* @return array
|
||||
* @access private
|
||||
*/
|
||||
function powerSearch( &$request ) {
|
||||
$arr = array();
|
||||
foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
|
||||
if( $request->getCheck( 'ns' . $ns ) ) {
|
||||
$arr[] = $ns;
|
||||
}
|
||||
}
|
||||
return $arr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstruct the 'power search' options for links
|
||||
* @return array
|
||||
* @access private
|
||||
*/
|
||||
function powerSearchOptions() {
|
||||
$opt = array();
|
||||
foreach( $this->namespaces as $n ) {
|
||||
$opt['ns' . $n] = 1;
|
||||
}
|
||||
$opt['redirs'] = $this->searchRedirects ? 1 : 0;
|
||||
$opt['searchx'] = 1;
|
||||
return $opt;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ResultWrapper $matches
|
||||
* @param string $terms partial regexp for highlighting terms
|
||||
*/
|
||||
function showMatches( &$matches, $terms ) {
|
||||
global $wgOut;
|
||||
$off = $this->offset + 1;
|
||||
$out = "<ol start='{$off}'>\n";
|
||||
|
||||
while( $row = $matches->fetchObject() ) {
|
||||
$out .= $this->showHit( $row, $terms );
|
||||
}
|
||||
$out .= "</ol>\n";
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a single hit result
|
||||
* @param object $row
|
||||
* @param string $terms partial regexp for highlighting terms
|
||||
*/
|
||||
function showHit( $row, $terms ) {
|
||||
global $wgUser, $wgContLang;
|
||||
|
||||
$t = Title::makeName( $row->cur_namespace, $row->cur_title );
|
||||
if( is_null( $t ) ) {
|
||||
return "<!-- Broken link in search result -->\n";
|
||||
}
|
||||
$sk = $wgUser->getSkin();
|
||||
|
||||
$contextlines = $wgUser->getOption( 'contextlines' );
|
||||
if ( '' == $contextlines ) { $contextlines = 5; }
|
||||
$contextchars = $wgUser->getOption( 'contextchars' );
|
||||
if ( '' == $contextchars ) { $contextchars = 50; }
|
||||
|
||||
$link = $sk->makeKnownLink( $t, '' );
|
||||
$size = wfMsg( 'nbytes', strlen( $row->cur_text ) );
|
||||
|
||||
$lines = explode( "\n", $row->cur_text );
|
||||
$pat1 = "/(.*)($terms)(.*)/i";
|
||||
$lineno = 0;
|
||||
|
||||
$extract = '';
|
||||
foreach ( $lines as $line ) {
|
||||
if ( 0 == $contextlines ) {
|
||||
break;
|
||||
}
|
||||
--$contextlines;
|
||||
++$lineno;
|
||||
if ( ! preg_match( $pat1, $line, $m ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$pre = $wgContLang->truncate( $m[1], -$contextchars, '...' );
|
||||
|
||||
if ( count( $m ) < 3 ) {
|
||||
$post = '';
|
||||
} else {
|
||||
$post = $wgContLang->truncate( $m[3], $contextchars, '...' );
|
||||
}
|
||||
|
||||
$found = $m[2];
|
||||
|
||||
$line = htmlspecialchars( $pre . $found . $post );
|
||||
$pat2 = '/(' . $terms . ")/i";
|
||||
$line = preg_replace( $pat2,
|
||||
"<span class='searchmatch'>\\1</span>", $line );
|
||||
|
||||
$extract .= "<br /><small>{$lineno}: {$line}</small>\n";
|
||||
}
|
||||
return "<li>{$link} ({$size}){$extract}</li>\n";
|
||||
}
|
||||
|
||||
function powerSearchBox( $term ) {
|
||||
$namespaces = '';
|
||||
foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
|
||||
$checked = in_array( $ns, $this->namespaces )
|
||||
? ' checked="checked"'
|
||||
: '';
|
||||
$name = str_replace( '_', ' ', $name );
|
||||
if( '' == $name ) {
|
||||
$name = wfMsg( 'blanknamespace' );
|
||||
}
|
||||
$namespaces .= " <label><input type='checkbox' value=\"1\" name=\"" .
|
||||
"ns{$ns}\"{$checked} />{$name}</label>\n";
|
||||
}
|
||||
|
||||
$checked = $this->searchRedirects
|
||||
? ' checked="checked"'
|
||||
: '';
|
||||
$redirect = "<input type='checkbox' value='1' name=\"redirs\"{$checked} />\n";
|
||||
|
||||
$searchField = "<input type='text' name=\"search\" value=\"" .
|
||||
htmlspecialchars( $term ) ."\" width=\"80\" />\n";
|
||||
|
||||
$searchButton = '<input type="submit" name="searchx" value="' .
|
||||
htmlspecialchars( wfMsg('powersearch') ) . "\" />\n";
|
||||
|
||||
$ret = wfMsg( 'powersearchtext',
|
||||
$namespaces, $redirect, $searchField,
|
||||
'', '', '', '', '', # Dummy placeholders
|
||||
$searchButton );
|
||||
|
||||
$title = Title::makeTitle( NS_SPECIAL, 'Search' );
|
||||
$action = $title->escapeLocalURL();
|
||||
return "<br /><br />\n<form id=\"powersearch\" method=\"get\" " .
|
||||
"action=\"$action\">\n{$ret}\n</form>\n";
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
|
|
|||
11
index.php
11
index.php
|
|
@ -62,16 +62,9 @@ if ( !is_null( $wgTitle ) && !$wgTitle->userCanRead() ) {
|
|||
wfProfileIn( "main-action" );
|
||||
$search = $wgRequest->getText( 'search' );
|
||||
if( !is_null( $search ) && $search !== '' ) {
|
||||
require_once( 'includes/SearchEngine.php' );
|
||||
require_once( 'includes/SpecialSearch.php' );
|
||||
$wgTitle = Title::makeTitle( NS_SPECIAL, "Search" );
|
||||
$searchEngine = new SearchEngine( $search );
|
||||
if( $wgRequest->getVal( 'fulltext' ) ||
|
||||
!is_null( $wgRequest->getVal( 'offset' ) ) ||
|
||||
!is_null ($wgRequest->getVal( 'searchx' ) ) ) {
|
||||
$searchEngine->showResults();
|
||||
} else {
|
||||
$searchEngine->goResult();
|
||||
}
|
||||
wfSpecialSearch();
|
||||
} else if( !$wgTitle or $wgTitle->getDBkey() == "" ) {
|
||||
$wgTitle = Title::newFromText( wfMsgForContent( "badtitle" ) );
|
||||
$wgOut->errorpage( "badtitle", "badtitletext" );
|
||||
|
|
|
|||
6
tests/.cvsignore
Normal file
6
tests/.cvsignore
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
LocalTestSettings.php
|
||||
*~
|
||||
bin
|
||||
.classpath
|
||||
.project
|
||||
project.index
|
||||
|
|
@ -4,9 +4,33 @@ define( "MEDIAWIKI", true );
|
|||
|
||||
require_once( 'PHPUnit.php' );
|
||||
|
||||
$testOptions = array(
|
||||
'mysql3' => array(
|
||||
'server' => null,
|
||||
'user' => null,
|
||||
'password' => null,
|
||||
'database' => null ),
|
||||
'mysql4' => array(
|
||||
'server' => null,
|
||||
'user' => null,
|
||||
'password' => null,
|
||||
'database' => null ),
|
||||
'postgresql' => array(
|
||||
'server' => null,
|
||||
'user' => null,
|
||||
'password' => null,
|
||||
'database' => null ),
|
||||
);
|
||||
|
||||
if( file_exists( 'LocalTestSettings.php' ) ) {
|
||||
include( './LocalTestSettings.php' );
|
||||
}
|
||||
|
||||
$tests = array(
|
||||
'GlobalTest',
|
||||
'DatabaseTest',
|
||||
'SearchMySQL3Test',
|
||||
'SearchMySQL4Test',
|
||||
);
|
||||
foreach( $tests as $test ) {
|
||||
require_once( $test . '.php' );
|
||||
|
|
@ -15,4 +39,51 @@ foreach( $tests as $test ) {
|
|||
echo $result->toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $serverType
|
||||
* @param array $tables
|
||||
*/
|
||||
function &buildTestDatabase( $serverType, $tables ) {
|
||||
global $testOptions, $wgDBprefix;
|
||||
$wgDBprefix = 'parsertest';
|
||||
$db =& new Database(
|
||||
$testOptions[$serverType]['server'],
|
||||
$testOptions[$serverType]['user'],
|
||||
$testOptions[$serverType]['password'],
|
||||
$testOptions[$serverType]['database'] );
|
||||
if( $db->isOpen() ) {
|
||||
if (!(strcmp($db->getServerVersion(), '4.1') < 0 and stristr($db->getSoftwareLink(), 'MySQL'))) {
|
||||
# Database that supports CREATE TABLE ... LIKE
|
||||
foreach ($tables as $tbl) {
|
||||
$newTableName = $db->tableName( $tbl );
|
||||
#$tableName = $this->oldTableNames[$tbl];
|
||||
$tableName = $tbl;
|
||||
$db->query("CREATE TEMPORARY TABLE $newTableName (LIKE $tableName INCLUDING DEFAULTS)");
|
||||
}
|
||||
} else {
|
||||
# Hack for MySQL versions < 4.1, which don't support
|
||||
# "CREATE TABLE ... LIKE". Note that
|
||||
# "CREATE TEMPORARY TABLE ... SELECT * FROM ... LIMIT 0"
|
||||
# would not create the indexes we need....
|
||||
foreach ($tables as $tbl) {
|
||||
$res = $db->query("SHOW CREATE TABLE $tbl");
|
||||
$row = $db->fetchRow($res);
|
||||
$create = $row[1];
|
||||
$create_tmp = preg_replace('/CREATE TABLE `(.*?)`/', 'CREATE TEMPORARY TABLE `'
|
||||
. $wgDBprefix . '\\1`', $create);
|
||||
if ($create === $create_tmp) {
|
||||
# Couldn't do replacement
|
||||
die("could not create temporary table $tbl");
|
||||
}
|
||||
$db->query($create_tmp);
|
||||
}
|
||||
|
||||
}
|
||||
return $db;
|
||||
} else {
|
||||
// Something amiss
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
102
tests/SearchEngineTest.php
Normal file
102
tests/SearchEngineTest.php
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
<?php
|
||||
|
||||
$IP = '..';
|
||||
require_once( 'PHPUnit.php' );
|
||||
require_once( '../includes/Defines.php' );
|
||||
require_once( '../includes/DefaultSettings.php' );
|
||||
require_once( '../includes/Profiling.php' );
|
||||
require_once( '../includes/MagicWord.php' );
|
||||
require_once( '../languages/Language.php' );
|
||||
require_once( '../languages/LanguageUtf8.php' );
|
||||
|
||||
require_once( '../includes/SearchEngine.php' );
|
||||
|
||||
class SearchEngine_TestCase extends PHPUnit_TestCase {
|
||||
var $db, $search;
|
||||
|
||||
function insertSearchData() {
|
||||
$this->db->safeQuery( <<<END
|
||||
INSERT INTO ! (cur_id,cur_namespace,cur_title,cur_text)
|
||||
VALUES (1, 0, 'Main_Page', 'This is a main page'),
|
||||
(2, 1, 'Main_Page', 'This is a talk page to the main page, see [[smithee]]'),
|
||||
(3, 0, 'Smithee', 'A smithee is one who smiths. See also [[Alan Smithee]]'),
|
||||
(4, 1, 'Smithee', 'This article sucks.'),
|
||||
(5, 0, 'Unrelated_page', 'Nothing in this page is about the S word.'),
|
||||
(6, 0, 'Another_page', 'This page also is unrelated.'),
|
||||
(7, 4, 'Help', 'Help me!'),
|
||||
(8, 0, 'Thppt', 'Blah blah'),
|
||||
(9, 0, 'Alan_Smithee', 'yum'),
|
||||
(10, 0, 'Pages', 'are food')
|
||||
END
|
||||
, $this->db->tableName( 'cur' ) );
|
||||
$this->db->safeQuery( <<<END
|
||||
INSERT INTO ! (si_page,si_title,si_text)
|
||||
VALUES (1, 'main page', 'this is a main page'),
|
||||
(2, 'main page', 'this is a talk page to the main page, see smithee'),
|
||||
(3, 'smithee', 'a smithee is one who smiths see also alan smithee'),
|
||||
(4, 'smithee', 'this article sucks'),
|
||||
(5, 'unrelated page', 'nothing in this page is about the s word'),
|
||||
(6, 'another page', 'this page also is unrelated'),
|
||||
(7, 'help', 'help me'),
|
||||
(8, 'thppt', 'blah blah'),
|
||||
(9, 'alan smithee', 'yum'),
|
||||
(10, 'pages', 'are food')
|
||||
END
|
||||
, $this->db->tableName( 'searchindex' ) );
|
||||
}
|
||||
|
||||
function fetchIds( &$results ) {
|
||||
$matches = array();
|
||||
while( $row = $results->fetchObject() ) {
|
||||
$matches[] = IntVal( $row->cur_id );
|
||||
}
|
||||
$results->free();
|
||||
return $matches;
|
||||
}
|
||||
|
||||
function testTextSearch() {
|
||||
$this->assertFalse( is_null( $this->db ), "Can't find a database to test with." );
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->assertEquals(
|
||||
array( 3 ),
|
||||
$this->fetchIds( $this->search->searchText( 'smithee' ) ),
|
||||
"Plain search failed" );
|
||||
}
|
||||
}
|
||||
|
||||
function testTextPowerSearch() {
|
||||
$this->assertFalse( is_null( $this->db ), "Can't find a database to test with." );
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->search->setNamespaces( array( 0, 1, 4 ) );
|
||||
$this->assertEquals(
|
||||
array( 2, 3 ),
|
||||
$this->fetchIds( $this->search->searchText( 'smithee' ) ),
|
||||
"Power search failed" );
|
||||
}
|
||||
}
|
||||
|
||||
function testTitleSearch() {
|
||||
$this->assertFalse( is_null( $this->db ), "Can't find a database to test with." );
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->assertEquals(
|
||||
array( 3, 9 ),
|
||||
$this->fetchIds( $this->search->searchTitle( 'smithee' ) ),
|
||||
"Title search failed" );
|
||||
}
|
||||
}
|
||||
|
||||
function testTextTitlePowerSearch() {
|
||||
$this->assertFalse( is_null( $this->db ), "Can't find a database to test with." );
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->search->setNamespaces( array( 0, 1, 4 ) );
|
||||
$this->assertEquals(
|
||||
array( 3, 4, 9 ),
|
||||
$this->fetchIds( $this->search->searchTitle( 'smithee' ) ),
|
||||
"Title power search failed" );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
?>
|
||||
34
tests/SearchMySQL3Test.php
Normal file
34
tests/SearchMySQL3Test.php
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
require_once( 'SearchEngineTest.php' );
|
||||
require_once( '../includes/SearchMySQL3.php' );
|
||||
|
||||
class SearchMySQL3Test extends SearchEngine_TestCase {
|
||||
var $db;
|
||||
|
||||
function SearchMySQL3Test( $name ) {
|
||||
$this->PHPUnit_TestCase( $name );
|
||||
}
|
||||
|
||||
function setUp() {
|
||||
$GLOBALS['wgContLang'] = new LanguageUtf8;
|
||||
$this->db =& buildTestDatabase(
|
||||
'mysql3',
|
||||
array( 'cur', 'searchindex' ) );
|
||||
if( $this->db ) {
|
||||
$this->insertSearchData();
|
||||
}
|
||||
$this->search =& new SearchMySQL3( $this->db );
|
||||
}
|
||||
|
||||
function tearDown() {
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->db->close();
|
||||
}
|
||||
unset( $this->db );
|
||||
unset( $this->search );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
34
tests/SearchMySQL4Test.php
Normal file
34
tests/SearchMySQL4Test.php
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
require_once( 'SearchEngineTest.php' );
|
||||
require_once( '../includes/SearchMySQL4.php' );
|
||||
|
||||
class SearchMySQL4Test extends SearchEngine_TestCase {
|
||||
var $db;
|
||||
|
||||
function SearchMySQL4Test( $name ) {
|
||||
$this->PHPUnit_TestCase( $name );
|
||||
}
|
||||
|
||||
function setUp() {
|
||||
$GLOBALS['wgContLang'] = new LanguageUtf8;
|
||||
$this->db =& buildTestDatabase(
|
||||
'mysql4',
|
||||
array( 'cur', 'searchindex' ) );
|
||||
if( $this->db ) {
|
||||
$this->insertSearchData();
|
||||
}
|
||||
$this->search =& new SearchMySQL4( $this->db );
|
||||
}
|
||||
|
||||
function tearDown() {
|
||||
if( !is_null( $this->db ) ) {
|
||||
$this->db->close();
|
||||
}
|
||||
unset( $this->db );
|
||||
unset( $this->search );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
Loading…
Reference in a new issue