wiki.techinc.nl/includes/SearchEngine.php
Matthias Jordan 01515e038f Implemented the "default namespaces for search" feature. This includes
changes to the preferences form, a new key in the LanguageXX.php files
(defaultns) and a new global config variable in DefaultSettings.php
($wgNamespacesToBeSearchedDefault).

I also killed a bug that produced an errorneous SQL command when the user
failed to check any namespaces to search in the search form. The updated
implementation searched the main namespace if no checkbox is checked.
2003-07-01 08:27:32 +00:00

441 lines
12 KiB
PHP

<?
# See search.doc
class SearchEngine {
/* private */ var $mUsertext, $mSearchterms;
/* private */ var $mTitlecond, $mTextcond;
var $doSearchRedirects = true;
var $addtoquery = array();
var $namespacesToSearch = array();
var $alternateTitle;
function SearchEngine( $text )
{
# We display the query, so let's strip it for safety
#
global $wgDBmysql4;
$lc = SearchEngine::legalSearchChars() . "()";
if( $wgDBmysql4 ) $lc .= "\"~<>*+-";
$this->mUsertext = trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
$this->mSearchterms = array();
}
function queryNamespaces()
{
$namespaces = implode( ",", $this->namespacesToSearch );
if ($namespaces == "") {
$namespaces = "0";
}
return "AND cur_namespace IN (" . $namespaces . ")";
#return "1";
}
function searchRedirects()
{
if ( $this->doSearchRedirects ) return "";
return "AND cur_is_redirect=0 ";
}
/* private */ function initNamespaceCheckbox( $i )
{
global $wgUser, $wgNamespacesToBeSearchedDefault;
if ($wgUser->getRights()) {
// User is logged in so we retrieve his default namespaces
return $wgUser->getOption( "searchNs".$i );
}
else {
// User is not logged in so we give him the global default namespaces
return $wgNamespacesToBeSearchedDefault[ $i ];
}
}
function powersearch()
{
global $wgUser, $wgOut, $wgLang, $wgTitle;
$nscb = array();
$search = $_REQUEST['search'];
$searchx = $_REQUEST['searchx'];
$listredirs = $_REQUEST['redirs'];
if ( ! isset ( $searchx ) ) { /* First time here */
$listredirs = 1;
for ($i = 0; ($i <= 7); $i++)
{
$nscb[$i] = $this->initNamespaceCheckbox($i);
}
} else {
$nscb[0] = $_REQUEST['ns0'];
$nscb[1] = $_REQUEST['ns1'];
$nscb[2] = $_REQUEST['ns2'];
$nscb[3] = $_REQUEST['ns3'];
$nscb[4] = $_REQUEST['ns4'];
$nscb[5] = $_REQUEST['ns5'];
$nscb[6] = $_REQUEST['ns6'];
$nscb[7] = $_REQUEST['ns7'];
}
$this->checkboxes["searchx"] = 1;
$ret = wfMsg("powersearchtext");
# Determine namespace checkboxes
$ns = $wgLang->getNamespaces();
array_shift( $ns ); /* Skip "Special" */
$r1 = "";
for ( $i = 0; $i < count( $ns ); ++$i ) {
$checked = "";
if ( $nscb[$i] == 1 ) {
$checked = " checked";
$this->addtoquery["ns{$i}"] = 1;
array_push( $this->namespacesToSearch, $i );
}
$name = str_replace( "_", " ", $ns[$i] );
if ( "" == $name ) { $name = "(Main)"; }
if ( 0 != $i ) { $r1 .= " "; }
$r1 .= "<input type=checkbox value=\"1\" name=\"" .
"ns{$i}\"{$checked}>{$name}\n";
}
$ret = str_replace ( "$1", $r1, $ret );
# List redirects checkbox
$checked = "";
if ( $listredirs == 1 ) {
$this->addtoquery["redirs"] = 1;
$checked = " checked";
}
$r2 = "<input type=checkbox value=1 name=\"redirs\"{$checked}>\n";
$ret = str_replace( "$2", $r2, $ret );
# Search field
$r3 = "<input type=text name=\"search\" value=\"" .
htmlspecialchars( $search ) ."\" width=80>\n";
$ret = str_replace( "$3", $r3, $ret );
# Searchx button
$r9 = "<input type=submit name=\"searchx\" value=\"" .
wfMsg("powersearch") . "\">\n";
$ret = str_replace( "$9", $r9, $ret );
$ret = "<br><br>\n<form id=\"powersearch\" method=\"get\" " .
"action=\"" . wfLocalUrl( "" ) . "\">\n{$ret}\n</form>\n";
if ( isset ( $searchx ) ) {
if ( ! $listredirs ) { $this->doSearchRedirects = false; }
}
return $ret;
}
function showResults()
{
global $wgUser, $wgTitle, $wgOut, $wgLang, $wgDisableTextSearch;
$fname = "SearchEngine::showResults";
$search = $_REQUEST['search'];
$powersearch = $this->powersearch(); /* Need side-effects here? */
$wgOut->setPageTitle( wfMsg( "searchresults" ) );
$q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) );
$wgOut->setSubtitle( $q );
$wgOut->setArticleFlag( false );
$wgOut->setRobotpolicy( "noindex,nofollow" );
$sk = $wgUser->getSkin();
$text = wfMsg( "searchresulttext", $sk->makeKnownLink(
wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) );
$wgOut->addHTML( $text );
$this->parseQuery();
if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
$wgOut->addHTML( "<h2>" . wfMsg( "badquery" ) . "</h2>\n" .
"<p>" . wfMsg( "badquerytext" ) );
return;
}
list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" );
$searchnamespaces = $this->queryNamespaces();
$redircond = $this->searchRedirects();
$sql = "SELECT cur_id,cur_namespace,cur_title," .
"cur_text FROM cur,searchindex " .
"WHERE cur_id=si_page AND {$this->mTitlecond} " .
"{$searchnamespaces} {$redircond}" .
"LIMIT {$offset}, {$limit}";
$res1 = wfQuery( $sql, $fname );
$num = wfNumRows($res1);
if ( $wgDisableTextSearch ) {
$res2 = 0;
} else {
$sql = "SELECT cur_id,cur_namespace,cur_title," .
"cur_text FROM cur,searchindex " .
"WHERE cur_id=si_page AND {$this->mTextcond} " .
"{$searchnamespaces} {$redircond} " .
"LIMIT {$offset}, {$limit}";
$res2 = wfQuery( $sql, $fname );
$num = $num + wfNumRows($res2);
}
if ( $num == $limit ) {
$top = wfShowingResults( $offset, $limit);
} else {
$top = wfShowingResultsNum( $offset, $limit, $num );
}
$wgOut->addHTML( "<p>{$top}\n" );
# For powersearch
$a2l = "" ;
$akk = array_keys( $this->addtoquery ) ;
foreach ( $akk AS $ak ) {
$a2l .= "&{$ak}={$this->addtoquery[$ak]}" ;
}
$sl = wfViewPrevNext( $offset, $limit, "",
"search=" . wfUrlencode( $this->mUsertext ) . $a2l );
$wgOut->addHTML( "<br>{$sl}\n" );
$foundsome = false;
if ( 0 == wfNumRows( $res1 ) ) {
$wgOut->addHTML( "<h2>" . wfMsg( "notitlematches" ) .
"</h2>\n" );
} else {
$foundsome = true;
$off = $offset + 1;
$wgOut->addHTML( "<h2>" . wfMsg( "titlematches" ) .
"</h2>\n<ol start='{$off}'>" );
while ( $row = wfFetchObject( $res1 ) ) {
$this->showHit( $row );
}
wfFreeResult( $res1 );
$wgOut->addHTML( "</ol>\n" );
}
if ( $wgDisableTextSearch ) {
$wgOut->addHTML( str_replace( "$1",
htmlspecialchars( $search ), wfMsg( "searchdisabled" ) ) );
} else {
if ( 0 == wfNumRows( $res2 ) ) {
$wgOut->addHTML( "<h2>" . wfMsg( "notextmatches" ) .
"</h2>\n" );
} else {
$foundsome = true;
$off = $offset + 1;
$wgOut->addHTML( "<h2>" . wfMsg( "textmatches" ) . "</h2>\n" .
"<ol start='{$off}'>" );
while ( $row = wfFetchObject( $res2 ) ) {
$this->showHit( $row );
}
wfFreeResult( $res2 );
$wgOut->addHTML( "</ol>\n" );
}
}
if ( ! $foundsome ) {
$wgOut->addHTML( "<p>" . wfMsg( "nonefound" ) . "\n" );
}
$wgOut->addHTML( "<p>{$sl}\n" );
$wgOut->addHTML( $powersearch );
}
function legalSearchChars()
{
$lc = "A-Za-z_'0-9\\x80-\\xFF\\-";
return $lc;
}
function parseQuery()
{
global $wgDBminWordLen, $wgLang, $wgDBmysql4;
if( $wgDBmysql4 ) {
# Use cleaner boolean search if available
return $this->parseQuery4();
}
$lc = SearchEngine::legalSearchChars() . "()";
$q = preg_replace( "/([()])/", " \\1 ", $this->mUsertext );
$q = preg_replace( "/\\s+/", " ", $q );
$w = explode( " ", strtolower( trim( $q ) ) );
$last = $cond = "";
foreach ( $w as $word ) {
$word = $wgLang->stripForSearch( $word );
if ( "and" == $word || "or" == $word || "not" == $word
|| "(" == $word || ")" == $word ) {
$cond .= " " . strtoupper( $word );
$last = "";
} else if ( strlen( $word ) < $wgDBminWordLen ) {
continue;
} else if ( FulltextStoplist::inList( $word ) ) {
continue;
} else {
if ( "" != $last ) { $cond .= " AND"; }
$cond .= " (MATCH (##field##) AGAINST ('" .
wfStrencode( $word ). "'))";
$last = $word;
array_push( $this->mSearchterms, "\\b" . $word . "\\b" );
}
}
if ( 0 == count( $this->mSearchterms ) ) { return; }
$this->mTitlecond = "(" . str_replace( "##field##",
"si_title", $cond ) . " )";
$this->mTextcond = "(" . str_replace( "##field##",
"si_text", $cond ) . " AND (cur_is_redirect=0) )";
}
function parseQuery4()
{
# FIXME: not ready yet! Do not use.
global $wgLang;
$lc = SearchEngine::legalSearchChars();
#$q = preg_replace( "/([+-]?)([$lc]+)/e",
# "\"$1\" . \$wgLang->stripForSearch(\"$2\")",
# $this->mUsertext );
$q = $this->mUsertext;
$qq = wfStrencode( $q );
$this->mTitlecond = " MATCH(si_title) AGAINST('$qq' IN BOOLEAN MODE)";
$this->mTextcond = " (MATCH(si_text) AGAINST('$qq' IN BOOLEAN MODE) AND cur_is_redirect=0)";
}
function showHit( $row )
{
global $wgUser, $wgOut;
$t = Title::makeName( $row->cur_namespace, $row->cur_title );
$sk = $wgUser->getSkin();
$contextlines = $wgUser->getOption( "contextlines" );
if ( "" == $contextlines ) { $contextlines = 5; }
$contextchars = $wgUser->getOption( "contextchars" );
if ( "" == $contextchars ) { $contextchars = 50; }
$link = $sk->makeKnownLink( $t, "" );
$size = str_replace( "$1", strlen( $row->cur_text ), WfMsg( "nbytes" ) );
$wgOut->addHTML( "<li>{$link} ({$size})" );
$lines = explode( "\n", $row->cur_text );
$pat1 = "/(.*)(" . implode( "|", $this->mSearchterms ) . ")(.*)/i";
$lineno = 0;
foreach ( $lines as $line ) {
if ( 0 == $contextlines ) { break; }
--$contextlines;
++$lineno;
if ( ! preg_match( $pat1, $line, $m ) ) { continue; }
$pre = $m[1];
if ( 0 == $contextchars ) { $pre = "..."; }
else {
if ( strlen( $pre ) > $contextchars ) {
$pre = "..." . substr( $pre, -$contextchars );
}
}
$pre = wfEscapeHTML( $pre );
if ( count( $m ) < 3 ) { $post = ""; }
else { $post = $m[3]; }
if ( 0 == $contextchars ) { $post = "..."; }
else {
if ( strlen( $post ) > $contextchars ) {
$post = substr( $post, 0, $contextchars ) . "...";
}
}
$post = wfEscapeHTML( $post );
$found = wfEscapeHTML( $m[2] );
$line = "{$pre}{$found}{$post}";
$pat2 = "/(" . implode( "|", $this->mSearchterms ) . ")/i";
$line = preg_replace( $pat2,
"<font color='red'>\\1</font>", $line );
$wgOut->addHTML( "<br><small>{$lineno}: {$line}</small>\n" );
}
$wgOut->addHTML( "</li>\n" );
}
function goResult()
{
global $wgOut, $wgArticle, $wgTitle;
$fname = "SearchEngine::goResult";
$search = $_REQUEST['search'];
# First try to go to page as entered
#
$wgArticle = new Article();
$wgTitle = Title::newFromText( $search );
if ( 0 != $wgArticle->getID() ) {
$wgArticle->view();
return;
}
# Now try all lower case (i.e. first letter capitalized)
#
$wgTitle = Title::newFromText( strtolower( $search ) );
if ( 0 != $wgArticle->getID() ) {
$wgArticle->view();
return;
}
# Now try capitalized string
#
$wgTitle=Title::newFromText( ucwords( strtolower( $search ) ) );
if ( 0 != $wgArticle->getID() ) {
$wgArticle->view();
return;
}
# Now try all upper case
#
$wgTitle = Title::newFromText( strtoupper( $search ) );
if ( 0 != $wgArticle->getID() ) {
$wgArticle->view();
return;
}
# Try a near match
#
$this->parseQuery();
$sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " .
"WHERE cur_id=si_page AND {$this->mTitlecond} ORDER BY cur_namespace LIMIT 1";
if ( "" != $this->mTitlecond ) {
$res = wfQuery( $sql, $fname );
}
if ( isset( $res ) && 0 != wfNumRows( $res ) ) {
$s = wfFetchObject( $res );
$wgTitle = Title::newFromDBkey( $s->cur_title );
$wgTitle->setNamespace( $s->cur_namespace );
$wgArticle->view();
return;
}
$wgOut->addHTML( str_replace( "$1",
wfLocalUrl( ucfirst($this->mUsertext) . "&action=edit"),
wfMsg("nogomatch")) . "\n<p>" );
$this->showResults();
}
}