As of now, an input like "[" that contains nothing but invalid characters behaves odd: The prefixsearch API returns the entire database, alphabetically ordered. This behavior exists ever since this code was introduced in Iaffe30a (2014). While this might be used as a "feature", I really don't think it is intentional: * The way the code is arranged executes an SQL query with a pointless `LIKE '%'` that doesn't do anything but possibly wasting CPU resources. This doesn't look intentional. * There are much better APIs when you really want to list all pages. * I would expect an input like "*", "%", or "" to return all pages. But neither of these inputs does this. Only some very strange inputs like "[" do. * I would argue that "[" should behave the same as "", i.e. as if no useful input is given. * Since I877297f (2020) we normalize e.g. "[[foo]]" to "foo", for convenience. Since then such an input works as expected, instead of returning the entire database. This leaves titles that contain nothing but invalid characters as the only edge-case. Change-Id: Ib2cbf315ed7c3736391362be6d69599c58e690db
317 lines
10 KiB
PHP
317 lines
10 KiB
PHP
<?php
|
|
/**
|
|
* Prefix search of page names.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
*/
|
|
|
|
use MediaWiki\MediaWikiServices;
|
|
|
|
/**
|
|
* Handles searching prefixes of titles and finding any page
|
|
* names that match. Used largely by the OpenSearch implementation.
|
|
* @deprecated Since 1.27, Use SearchEngine::defaultPrefixSearch or SearchEngine::completionSearch
|
|
*
|
|
* @stable to extend
|
|
* @ingroup Search
|
|
*/
|
|
abstract class PrefixSearch {
|
|
/**
|
|
* Do a prefix search of titles and return a list of matching page names.
|
|
*
|
|
* @param string $search
|
|
* @param int $limit
|
|
* @param array $namespaces Used if query is not explicitly prefixed
|
|
* @param int $offset How many results to offset from the beginning
|
|
* @return (Title|string)[]
|
|
*/
|
|
public function search( $search, $limit, $namespaces = [], $offset = 0 ) {
|
|
$search = trim( $search );
|
|
if ( $search == '' ) {
|
|
return []; // Return empty result
|
|
}
|
|
|
|
$hasNamespace = SearchEngine::parseNamespacePrefixes( $search, false, true );
|
|
if ( $hasNamespace !== false ) {
|
|
list( $search, $namespaces ) = $hasNamespace;
|
|
}
|
|
|
|
return $this->searchBackend( $namespaces, $search, $limit, $offset );
|
|
}
|
|
|
|
/**
|
|
* Do a prefix search for all possible variants of the prefix
|
|
* @param string $search
|
|
* @param int $limit
|
|
* @param array $namespaces
|
|
* @param int $offset How many results to offset from the beginning
|
|
*
|
|
* @return (Title|string)[]
|
|
*/
|
|
public function searchWithVariants( $search, $limit, array $namespaces, $offset = 0 ) {
|
|
$searches = $this->search( $search, $limit, $namespaces, $offset );
|
|
|
|
// if the content language has variants, try to retrieve fallback results
|
|
$fallbackLimit = $limit - count( $searches );
|
|
if ( $fallbackLimit > 0 ) {
|
|
$fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()->
|
|
autoConvertToAllVariants( $search );
|
|
$fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
|
|
|
|
foreach ( $fallbackSearches as $fbs ) {
|
|
$fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
|
|
$searches = array_merge( $searches, $fallbackSearchResult );
|
|
$fallbackLimit -= count( $fallbackSearchResult );
|
|
|
|
if ( $fallbackLimit == 0 ) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return $searches;
|
|
}
|
|
|
|
/**
|
|
* When implemented in a descendant class, receives an array of Title objects and returns
|
|
* either an unmodified array or an array of strings corresponding to titles passed to it.
|
|
*
|
|
* @param Title[] $titles
|
|
* @return (Title|string)[]
|
|
*/
|
|
abstract protected function titles( array $titles );
|
|
|
|
/**
|
|
* When implemented in a descendant class, receives an array of titles as strings and returns
|
|
* either an unmodified array or an array of Title objects corresponding to strings received.
|
|
*
|
|
* @param string[] $strings
|
|
* @return (Title|string)[]
|
|
*/
|
|
abstract protected function strings( array $strings );
|
|
|
|
/**
|
|
* Do a prefix search of titles and return a list of matching page names.
|
|
* @param int[] $namespaces
|
|
* @param string $search
|
|
* @param int $limit
|
|
* @param int $offset How many results to offset from the beginning
|
|
* @return (Title|string)[]
|
|
*/
|
|
protected function searchBackend( $namespaces, $search, $limit, $offset ) {
|
|
if ( count( $namespaces ) == 1 ) {
|
|
$ns = $namespaces[0];
|
|
if ( $ns == NS_MEDIA ) {
|
|
$namespaces = [ NS_FILE ];
|
|
} elseif ( $ns == NS_SPECIAL ) {
|
|
return $this->titles( $this->specialSearch( $search, $limit, $offset ) );
|
|
}
|
|
}
|
|
$srchres = [];
|
|
if ( Hooks::runner()->onPrefixSearchBackend(
|
|
$namespaces, $search, $limit, $srchres, $offset )
|
|
) {
|
|
return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit, $offset ) );
|
|
}
|
|
return $this->strings(
|
|
$this->handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) );
|
|
}
|
|
|
|
private function handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) {
|
|
if ( $offset === 0 ) {
|
|
// Only perform exact db match if offset === 0
|
|
// This is still far from perfect but at least we avoid returning the
|
|
// same title afain and again when the user is scrolling with a query
|
|
// that matches a title in the db.
|
|
$rescorer = new SearchExactMatchRescorer();
|
|
$srchres = $rescorer->rescore( $search, $namespaces, $srchres, $limit );
|
|
}
|
|
return $srchres;
|
|
}
|
|
|
|
/**
|
|
* Prefix search special-case for Special: namespace.
|
|
*
|
|
* @param string $search Term
|
|
* @param int $limit Max number of items to return
|
|
* @param int $offset Number of items to offset
|
|
* @return array
|
|
*/
|
|
protected function specialSearch( $search, $limit, $offset ) {
|
|
$searchParts = explode( '/', $search, 2 );
|
|
$searchKey = $searchParts[0];
|
|
$subpageSearch = $searchParts[1] ?? null;
|
|
|
|
// Handle subpage search separately.
|
|
$spFactory = MediaWikiServices::getInstance()->getSpecialPageFactory();
|
|
if ( $subpageSearch !== null ) {
|
|
// Try matching the full search string as a page name
|
|
$specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
|
|
if ( !$specialTitle ) {
|
|
return [];
|
|
}
|
|
$special = $spFactory->getPage( $specialTitle->getText() );
|
|
if ( $special ) {
|
|
$subpages = $special->prefixSearchSubpages( $subpageSearch, $limit, $offset );
|
|
return array_map( static function ( $sub ) use ( $specialTitle ) {
|
|
return $specialTitle->getSubpage( $sub );
|
|
}, $subpages );
|
|
} else {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
# normalize searchKey, so aliases with spaces can be found - T27675
|
|
$contLang = MediaWikiServices::getInstance()->getContentLanguage();
|
|
$searchKey = str_replace( ' ', '_', $searchKey );
|
|
$searchKey = $contLang->caseFold( $searchKey );
|
|
|
|
// Unlike SpecialPage itself, we want the canonical forms of both
|
|
// canonical and alias title forms...
|
|
$keys = [];
|
|
foreach ( $spFactory->getNames() as $page ) {
|
|
$keys[$contLang->caseFold( $page )] = [ 'page' => $page, 'rank' => 0 ];
|
|
}
|
|
|
|
foreach ( $contLang->getSpecialPageAliases() as $page => $aliases ) {
|
|
if ( !in_array( $page, $spFactory->getNames() ) ) {# T22885
|
|
continue;
|
|
}
|
|
|
|
foreach ( $aliases as $key => $alias ) {
|
|
$keys[$contLang->caseFold( $alias )] = [ 'page' => $alias, 'rank' => $key ];
|
|
}
|
|
}
|
|
ksort( $keys );
|
|
|
|
$matches = [];
|
|
foreach ( $keys as $pageKey => $page ) {
|
|
if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
|
|
// T29671: Don't use SpecialPage::getTitleFor() here because it
|
|
// localizes its input leading to searches for e.g. Special:All
|
|
// returning Spezial:MediaWiki-Systemnachrichten and returning
|
|
// Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
|
|
$matches[$page['rank']][] = Title::makeTitleSafe( NS_SPECIAL, $page['page'] );
|
|
|
|
if ( isset( $matches[0] ) && count( $matches[0] ) >= $limit + $offset ) {
|
|
// We have enough items in primary rank, no use to continue
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// Ensure keys are in order
|
|
ksort( $matches );
|
|
// Flatten the array
|
|
$matches = array_reduce( $matches, 'array_merge', [] );
|
|
|
|
return array_slice( $matches, $offset, $limit );
|
|
}
|
|
|
|
/**
|
|
* Unless overridden by PrefixSearchBackend hook...
|
|
* This is case-sensitive (First character may
|
|
* be automatically capitalized by Title::secureAndSpit()
|
|
* later on depending on $wgCapitalLinks)
|
|
*
|
|
* @param int[]|null $namespaces Namespaces to search in
|
|
* @param string $search Term
|
|
* @param int $limit Max number of items to return
|
|
* @param int $offset Number of items to skip
|
|
* @return Title[]
|
|
*/
|
|
public function defaultSearchBackend( $namespaces, $search, $limit, $offset ) {
|
|
if ( !$namespaces ) {
|
|
$namespaces = [ NS_MAIN ];
|
|
}
|
|
|
|
if ( in_array( NS_SPECIAL, $namespaces ) ) {
|
|
// For now, if special is included, ignore the other namespaces
|
|
return $this->specialSearch( $search, $limit, $offset );
|
|
}
|
|
|
|
// Construct suitable prefix for each namespace. They differ in cases where
|
|
// some namespaces always capitalize and some don't.
|
|
$prefixes = [];
|
|
// Allow to do a prefix search for e.g. "Talk:"
|
|
if ( $search === '' ) {
|
|
$prefixes[$search] = $namespaces;
|
|
} else {
|
|
// Don't just ignore input like "[[Foo]]", but try to search for "Foo"
|
|
$search = preg_replace( MediaWikiTitleCodec::getTitleInvalidRegex(), '', $search );
|
|
foreach ( $namespaces as $namespace ) {
|
|
$title = Title::makeTitleSafe( $namespace, $search );
|
|
if ( $title ) {
|
|
$prefixes[ $title->getDBkey() ][] = $namespace;
|
|
}
|
|
}
|
|
}
|
|
if ( !$prefixes ) {
|
|
return [];
|
|
}
|
|
|
|
$dbr = wfGetDB( DB_REPLICA );
|
|
// Often there is only one prefix that applies to all requested namespaces,
|
|
// but sometimes there are two if some namespaces do not always capitalize.
|
|
$conds = [];
|
|
foreach ( $prefixes as $prefix => $namespaces ) {
|
|
$condition = [ 'page_namespace' => $namespaces ];
|
|
if ( $prefix !== '' ) {
|
|
$condition[] = 'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() );
|
|
}
|
|
$conds[] = $dbr->makeList( $condition, LIST_AND );
|
|
}
|
|
|
|
$table = 'page';
|
|
$fields = [ 'page_id', 'page_namespace', 'page_title' ];
|
|
$conds = $dbr->makeList( $conds, LIST_OR );
|
|
$options = [
|
|
'LIMIT' => $limit,
|
|
'ORDER BY' => [ 'page_title', 'page_namespace' ],
|
|
'OFFSET' => $offset
|
|
];
|
|
|
|
$res = $dbr->select( $table, $fields, $conds, __METHOD__, $options );
|
|
|
|
return iterator_to_array( TitleArray::newFromResult( $res ) );
|
|
}
|
|
|
|
/**
|
|
* Validate an array of numerical namespace indexes
|
|
*
|
|
* @param array $namespaces
|
|
* @return array (default: contains only NS_MAIN)
|
|
*/
|
|
protected function validateNamespaces( $namespaces ) {
|
|
// We will look at each given namespace against content language namespaces
|
|
$validNamespaces = MediaWikiServices::getInstance()->getContentLanguage()->getNamespaces();
|
|
if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
|
|
$valid = [];
|
|
foreach ( $namespaces as $ns ) {
|
|
if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
|
|
$valid[] = $ns;
|
|
}
|
|
}
|
|
if ( count( $valid ) > 0 ) {
|
|
return $valid;
|
|
}
|
|
}
|
|
|
|
return [ NS_MAIN ];
|
|
}
|
|
}
|