api: Fix prefixsearch returning the entire database

As of now, an input like "[" that contains nothing but invalid
characters behaves odd: The prefixsearch API returns the entire
database, alphabetically ordered.

This behavior exists ever since this code was introduced in
Iaffe30a (2014).

While this might be used as a "feature", I really don't think it
is intentional:
* The way the code is arranged executes an SQL query with a
  pointless `LIKE '%'` that doesn't do anything but possibly
  wasting CPU resources. This doesn't look intentional.
* There are much better APIs when you really want to list all
  pages.
* I would expect an input like "*", "%", or "" to return all
  pages. But neither of these inputs does this. Only some very
  strange inputs like "[" do.
* I would argue that "[" should behave the same as "", i.e. as
  if no useful input is given.
* Since I877297f (2020) we normalize e.g. "[[foo]]" to "foo",
  for convenience. Since then such an input works as expected,
  instead of returning the entire database. This leaves titles
  that contain nothing but invalid characters as the only
  edge-case.

Change-Id: Ib2cbf315ed7c3736391362be6d69599c58e690db
This commit is contained in:
Thiemo Kreuz 2021-06-17 10:37:10 +02:00
parent 224fca0c6c
commit 26bd5ff256
2 changed files with 41 additions and 26 deletions

View file

@ -229,40 +229,40 @@ abstract class PrefixSearch {
* be automatically capitalized by Title::secureAndSpit()
* later on depending on $wgCapitalLinks)
*
* @param array|null $namespaces Namespaces to search in
* @param int[]|null $namespaces Namespaces to search in
* @param string $search Term
* @param int $limit Max number of items to return
* @param int $offset Number of items to skip
* @return Title[] Array of Title objects
* @return Title[]
*/
public function defaultSearchBackend( $namespaces, $search, $limit, $offset ) {
// Backwards compatability with old code. Default to NS_MAIN if no namespaces provided.
if ( $namespaces === null ) {
$namespaces = [];
}
if ( !$namespaces ) {
$namespaces[] = NS_MAIN;
$namespaces = [ NS_MAIN ];
}
if ( in_array( NS_SPECIAL, $namespaces ) ) {
// For now, if special is included, ignore the other namespaces
return $this->specialSearch( $search, $limit, $offset );
}
// Construct suitable prefix for each namespace. They differ in cases where
// some namespaces always capitalize and some don't.
$prefixes = [];
foreach ( $namespaces as $namespace ) {
// For now, if special is included, ignore the other namespaces
if ( $namespace == NS_SPECIAL ) {
return $this->specialSearch( $search, $limit, $offset );
// Allow to do a prefix search for e.g. "Talk:"
if ( $search === '' ) {
$prefixes[$search] = $namespaces;
} else {
// Don't just ignore input like "[[Foo]]", but try to search for "Foo"
$search = preg_replace( MediaWikiTitleCodec::getTitleInvalidRegex(), '', $search );
foreach ( $namespaces as $namespace ) {
$title = Title::makeTitleSafe( $namespace, $search );
if ( $title ) {
$prefixes[ $title->getDBkey() ][] = $namespace;
}
}
$title = Title::makeTitleSafe( $namespace, $search );
if ( !$title ) {
$title = Title::makeTitleSafe(
$namespace,
// Don't just ignore input like "[[Foo]]", but try to search for "Foo"
preg_replace( MediaWikiTitleCodec::getTitleInvalidRegex(), '', $search )
);
}
$prefix = $title ? $title->getDBkey() : '';
$prefixes[$prefix][] = $namespace;
}
if ( !$prefixes ) {
return [];
}
$dbr = wfGetDB( DB_REPLICA );
@ -270,10 +270,10 @@ abstract class PrefixSearch {
// but sometimes there are two if some namespaces do not always capitalize.
$conds = [];
foreach ( $prefixes as $prefix => $namespaces ) {
$condition = [
'page_namespace' => $namespaces,
'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() ),
];
$condition = [ 'page_namespace' => $namespaces ];
if ( $prefix !== '' ) {
$condition[] = 'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() );
}
$conds[] = $dbr->makeList( $condition, LIST_AND );
}

View file

@ -86,6 +86,11 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
'query' => '',
'results' => [],
] ],
[ [
'All invalid characters, effectively empty',
'query' => '[',
'results' => [],
] ],
[ [
'Main namespace with title prefix',
'query' => 'Sa',
@ -99,6 +104,16 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
'Sample Who',
],
] ],
[ [
'Some invalid characters',
'query' => '[[Sa]]',
'results' => [
'Sample',
'Sample Ban',
'Sample Eat',
],
'offsetresult' => [ 'Sample Who' ],
] ],
[ [
'Talk namespace prefix',
'query' => 'Talk:',