diff --git a/RELEASE-NOTES-1.40 b/RELEASE-NOTES-1.40 index ca6fc944095..aa6b275cc26 100644 --- a/RELEASE-NOTES-1.40 +++ b/RELEASE-NOTES-1.40 @@ -66,6 +66,10 @@ For notes on 1.39.x and older releases, see HISTORY. passed to var_dump(), to make its use for debugging more feasible. * Added 'GetBlockErrorMessageKey' hook, allow extensions' block error messages to be received and displayed by BlockErrorFormatter. +* Added an optional RevisionRecord param to: + - ContentHandler::getParserOutputForIndexing + - ContentHandler::getDataForSearchIndex + - the SearchDataForIndex hook is deprecated in favor of SearchDataForIndex2 * … === External library changes in 1.40 === diff --git a/includes/HookContainer/DeprecatedHooks.php b/includes/HookContainer/DeprecatedHooks.php index b5ffa51abd3..25d78838de1 100644 --- a/includes/HookContainer/DeprecatedHooks.php +++ b/includes/HookContainer/DeprecatedHooks.php @@ -61,6 +61,7 @@ class DeprecatedHooks { 'SkinTemplateNavigation' => [ 'deprecatedVersion' => '1.39' ], 'SkinTemplateNavigation::SpecialPage' => [ 'deprecatedVersion' => '1.39' ], 'PersonalUrls' => [ 'deprecatedVersion' => '1.39' ], + 'SearchDataForIndex' => [ 'deprecatedVersion' => '1.40', 'silent' => true ], ]; /** diff --git a/includes/HookContainer/HookRunner.php b/includes/HookContainer/HookRunner.php index 576ce64d04c..3b550ec61ee 100644 --- a/includes/HookContainer/HookRunner.php +++ b/includes/HookContainer/HookRunner.php @@ -77,6 +77,7 @@ class HookRunner implements \MediaWiki\Content\Hook\PageContentLanguageHook, \MediaWiki\Content\Hook\PlaceNewSectionHook, \MediaWiki\Content\Hook\SearchDataForIndexHook, + \MediaWiki\Content\Hook\SearchDataForIndex2Hook, \MediaWiki\Specials\Contribute\Hook\ContributeCardsHook, \MediaWiki\Diff\Hook\AbortDiffCacheHook, \MediaWiki\Diff\Hook\ArticleContentOnDiffHook, @@ -3294,15 +3295,22 @@ class HookRunner implements ); } - public function onSearchDataForIndex( &$fields, $handler, $page, $output, - $engine - ) { + public function onSearchDataForIndex( &$fields, $handler, $page, $output, $engine ) { return $this->container->run( 'SearchDataForIndex', [ &$fields, $handler, $page, $output, $engine ] ); } + public function onSearchDataForIndex2( array &$fields, \ContentHandler $handler, + \WikiPage $page, \ParserOutput $output, \SearchEngine $engine, RevisionRecord $revision + ) { + return $this->container->run( + 'SearchDataForIndex2', + [ &$fields, $handler, $page, $output, $engine, $revision ] + ); + } + public function onSearchGetNearMatch( $term, &$title ) { return $this->container->run( 'SearchGetNearMatch', diff --git a/includes/content/ContentHandler.php b/includes/content/ContentHandler.php index 648425df372..8d8533bb559 100644 --- a/includes/content/ContentHandler.php +++ b/includes/content/ContentHandler.php @@ -39,6 +39,7 @@ use MediaWiki\Revision\RevisionRecord; use MediaWiki\Revision\SlotRecord; use MediaWiki\Revision\SlotRenderingProvider; use MediaWiki\Search\ParserOutputSearchDataExtractor; +use Wikimedia\Assert\Assert; use Wikimedia\ScopedCallback; /** @@ -1380,16 +1381,32 @@ abstract class ContentHandler { * @param WikiPage $page Page to index * @param ParserOutput $output * @param SearchEngine $engine Search engine for which we are indexing - * @return array Map of name=>value for fields + * @param RevisionRecord|null $revision Revision content to fetch if provided or use the latest revision + * from WikiPage::getRevisionRecord() if not + * @return array Map of name=>value for fields, an empty array is returned if the latest + * revision cannot be retrieved. * @since 1.28 */ public function getDataForSearchIndex( WikiPage $page, ParserOutput $output, - SearchEngine $engine + SearchEngine $engine, + RevisionRecord $revision = null ) { $fieldData = []; - $content = $page->getContent(); + $revision = $revision ?? $page->getRevisionRecord(); + if ( $revision === null ) { + LoggerFactory::getInstance( 'search' )->warning( + "Called getDataForSearchIndex on the page {page_id} for which the " . + "latest revision cannot be loaded.", + [ "page_id" => $page->getId() ] + ); + return []; + } + Assert::invariant( $revision->getPageId() === $page->getId(), + '$revision and $page must target the same page_id' ); + + $content = $revision->getContent( SlotRecord::MAIN ); if ( $content ) { $searchDataExtractor = new ParserOutputSearchDataExtractor(); @@ -1408,6 +1425,8 @@ abstract class ContentHandler { } $this->getHookRunner()->onSearchDataForIndex( $fieldData, $this, $page, $output, $engine ); + $this->getHookRunner()->onSearchDataForIndex2( $fieldData, $this, $page, $output, $engine, $revision ); + return $fieldData; } @@ -1426,10 +1445,15 @@ abstract class ContentHandler { * * @param WikiPage $page * @param ParserCache|null $cache deprecated since 1.38 and won't have any effect + * @param RevisionRecord|null $revision * @return ParserOutput|null null when the ParserOutput cannot be obtained * @see ParserOutputAccess::getParserOutput() for failure modes */ - public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) { + public function getParserOutputForIndexing( + WikiPage $page, + ParserCache $cache = null, + RevisionRecord $revision = null + ) { // TODO: MCR: ContentHandler should be called per slot, not for the whole page. // See T190066. $parserOptions = $page->makeParserOptions( 'canonical' ); @@ -1437,7 +1461,7 @@ abstract class ContentHandler { return $parserOutputAccess->getParserOutput( $page, $parserOptions, - null, + $revision, ParserOutputAccess::OPT_NO_UPDATE_CACHE )->getValue(); } diff --git a/includes/content/FileContentHandler.php b/includes/content/FileContentHandler.php index f3f9a97f520..9fc90dabc71 100644 --- a/includes/content/FileContentHandler.php +++ b/includes/content/FileContentHandler.php @@ -1,6 +1,7 @@ value pairs for fields + * @param ContentHandler $handler ContentHandler for the content being indexed + * @param WikiPage $page WikiPage that is being indexed + * @param ParserOutput $output ParserOutput that is produced from the page + * @param SearchEngine $engine SearchEngine for which the indexing is intended + * @param RevisionRecord $revision RevisionRecord being indexed + * @return bool|void True or no return value to continue or false to abort + */ + public function onSearchDataForIndex2( + array &$fields, + ContentHandler $handler, + WikiPage $page, + ParserOutput $output, + SearchEngine $engine, + RevisionRecord $revision + ); +} diff --git a/includes/content/Hook/SearchDataForIndexHook.php b/includes/content/Hook/SearchDataForIndexHook.php index f5d7a8bd2de..09ddb0437fc 100644 --- a/includes/content/Hook/SearchDataForIndexHook.php +++ b/includes/content/Hook/SearchDataForIndexHook.php @@ -12,6 +12,7 @@ use WikiPage; * Use the hook name "SearchDataForIndex" to register handlers implementing this interface. * * @stable to implement + * @deprecated since 1.40, use SearchDataForIndexHook2 instead. * @ingroup Hooks */ interface SearchDataForIndexHook { diff --git a/includes/content/TextContentHandler.php b/includes/content/TextContentHandler.php index d5f6ee4649b..6c68232b292 100644 --- a/includes/content/TextContentHandler.php +++ b/includes/content/TextContentHandler.php @@ -27,6 +27,7 @@ use MediaWiki\Content\Renderer\ContentParseParams; use MediaWiki\Content\Transform\PreSaveTransformParams; use MediaWiki\MainConfigNames; use MediaWiki\MediaWikiServices; +use MediaWiki\Revision\RevisionRecord; /** * Base content handler implementation for flat text contents. @@ -157,9 +158,10 @@ class TextContentHandler extends ContentHandler { public function getDataForSearchIndex( WikiPage $page, ParserOutput $output, - SearchEngine $engine + SearchEngine $engine, + ?RevisionRecord $revision = null ) { - $fields = parent::getDataForSearchIndex( $page, $output, $engine ); + $fields = parent::getDataForSearchIndex( $page, $output, $engine, $revision ); $fields['language'] = $this->getPageLanguage( $page->getTitle(), $page->getContent() )->getCode(); return $fields; diff --git a/includes/content/WikitextContentHandler.php b/includes/content/WikitextContentHandler.php index c81f61ac2f6..1da1e349c36 100644 --- a/includes/content/WikitextContentHandler.php +++ b/includes/content/WikitextContentHandler.php @@ -29,6 +29,7 @@ use MediaWiki\Content\Transform\PreSaveTransformParams; use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\MediaWikiServices; use MediaWiki\Parser\ParserOutputFlags; +use MediaWiki\Revision\RevisionRecord; /** * Content handler for wiki text pages. @@ -157,9 +158,10 @@ class WikitextContentHandler extends TextContentHandler { public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput, - SearchEngine $engine + SearchEngine $engine, + ?RevisionRecord $revision = null ) { - $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine ); + $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision ); $structure = new WikiTextStructure( $parserOutput ); $fields['heading'] = $structure->headings(); @@ -172,7 +174,7 @@ class WikitextContentHandler extends TextContentHandler { // Until we have full first-class content handler for files, we invoke it explicitly here if ( $page->getTitle()->getNamespace() === NS_FILE ) { $fields = array_merge( $fields, - $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) ); + $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision ) ); } return $fields; } diff --git a/tests/phpunit/includes/content/ContentHandlerTest.php b/tests/phpunit/includes/content/ContentHandlerTest.php index 5309907179c..446da43961b 100644 --- a/tests/phpunit/includes/content/ContentHandlerTest.php +++ b/tests/phpunit/includes/content/ContentHandlerTest.php @@ -444,9 +444,9 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase { $fields['testDataField'] = 'test content'; } ); - $contentRenderer = $this->getServiceContainer()->getContentRenderer(); - $output = $contentRenderer->getParserOutput( $page->getContent(), $title ); - $data = $page->getContentHandler()->getDataForSearchIndex( $page, $output, $mockEngine ); + $revision = $page->getRevisionRecord(); + $output = $page->getContentHandler()->getParserOutputForIndexing( $page, null, $revision ); + $data = $page->getContentHandler()->getDataForSearchIndex( $page, $output, $mockEngine, $revision ); $this->assertArrayHasKey( 'text', $data ); $this->assertArrayHasKey( 'text_bytes', $data ); $this->assertArrayHasKey( 'language', $data ); @@ -461,8 +461,9 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase { public function testParserOutputForIndexing() { $title = Title::newFromText( 'Smithee', NS_MAIN ); $page = $this->getServiceContainer()->getWikiPageFactory()->newFromTitle( $title ); + $revision = $page->getRevisionRecord(); - $out = $page->getContentHandler()->getParserOutputForIndexing( $page ); + $out = $page->getContentHandler()->getParserOutputForIndexing( $page, null, $revision ); $this->assertInstanceOf( ParserOutput::class, $out ); $this->assertStringContainsString( 'one who smiths', $out->getRawText() ); }