A regex in SearchUpdate was built for ancient pure ISO 8859-1 and looked for \xa0-\xff bytes -- this caused the regex to cut off partway through if there was a char containing a byte in the \x80-\x9f range. Fixed regex to pass \x80-\xff instead. Added a test case to SearchUpdateTest which checks for this case (example text run through the update squash algo, then run through preg_replace with a /u param to make sure it gets treated as UTF-8 and checking whether it breaks.)
90 lines
1.8 KiB
PHP
90 lines
1.8 KiB
PHP
<?php
|
|
|
|
class MockSearch extends SearchEngine {
|
|
public static $id;
|
|
public static $title;
|
|
public static $text;
|
|
|
|
public function __construct( $db ) {
|
|
}
|
|
|
|
public function update( $id, $title, $text ) {
|
|
self::$id = $id;
|
|
self::$title = $title;
|
|
self::$text = $text;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @group Search
|
|
*/
|
|
class SearchUpdateTest extends MediaWikiTestCase {
|
|
static $searchType;
|
|
|
|
function update( $text, $title = 'Test', $id = 1 ) {
|
|
$u = new SearchUpdate( $id, $title, $text );
|
|
$u->doUpdate();
|
|
return array( MockSearch::$title, MockSearch::$text );
|
|
}
|
|
|
|
function updateText( $text ) {
|
|
list( , $resultText ) = $this->update( $text );
|
|
$resultText = trim( $resultText ); // abstract from some implementation details
|
|
return $resultText;
|
|
}
|
|
|
|
function setUp() {
|
|
global $wgSearchType;
|
|
|
|
self::$searchType = $wgSearchType;
|
|
$wgSearchType = 'MockSearch';
|
|
}
|
|
|
|
function tearDown() {
|
|
global $wgSearchType;
|
|
|
|
$wgSearchType = self::$searchType;
|
|
}
|
|
|
|
function testUpdateText() {
|
|
$this->assertEquals(
|
|
'test',
|
|
$this->updateText( '<div>TeSt</div>' ),
|
|
'HTML stripped, text lowercased'
|
|
);
|
|
|
|
$this->assertEquals(
|
|
'foo bar boz quux',
|
|
$this->updateText( <<<EOT
|
|
<table style="color:red; font-size:100px">
|
|
<tr class="scary"><td><div>foo</div></td><tr>bar</td></tr>
|
|
<tr><td>boz</td><tr>quux</td></tr>
|
|
</table>
|
|
EOT
|
|
), 'Stripping HTML tables' );
|
|
|
|
$this->assertEquals(
|
|
'a b',
|
|
$this->updateText( 'a > b' ),
|
|
'Handle unclosed tags'
|
|
);
|
|
|
|
$text = str_pad( "foo <barbarbar \n", 10000, 'x' );
|
|
|
|
$this->assertNotEquals(
|
|
'',
|
|
$this->updateText( $text ),
|
|
'Bug 18609'
|
|
);
|
|
}
|
|
|
|
function testBug32712() {
|
|
$text = "text „http://example.com“ text";
|
|
$result = $this->updateText( $text );
|
|
$processed = preg_replace( '/Q/u', 'Q', $result );
|
|
$this->assertTrue(
|
|
$processed != '',
|
|
'Link surrounded by unicode quotes should not fail UTF-8 validation'
|
|
);
|
|
}
|
|
}
|