Make the doc building for search aware of the revision
Added an optional RevisionRecord param to: - ContentHandler::getParserOutputForIndexing - ContentHandler::getDataForSearchIndex - the SearchDataForIndex hook So that they have a chance to build the content related to a specific revision. Ultimately we'd like to make this parameter mandatory. Bug: T317309 Depends-On: I8b220cd6c4aeeca1d924bdd527409b8602318944 Depends-On: I8616b611caab3f5fa97ff0e655b19c3034304597 Change-Id: I3298ce7591069eb32f624b2c9fbb6de58ae04a29
This commit is contained in:
parent
f3566aacb8
commit
9fbd8f500f
10 changed files with 105 additions and 18 deletions
|
|
@ -66,6 +66,10 @@ For notes on 1.39.x and older releases, see HISTORY.
|
|||
passed to var_dump(), to make its use for debugging more feasible.
|
||||
* Added 'GetBlockErrorMessageKey' hook, allow extensions'
|
||||
block error messages to be received and displayed by BlockErrorFormatter.
|
||||
* Added an optional RevisionRecord param to:
|
||||
- ContentHandler::getParserOutputForIndexing
|
||||
- ContentHandler::getDataForSearchIndex
|
||||
- the SearchDataForIndex hook is deprecated in favor of SearchDataForIndex2
|
||||
* …
|
||||
|
||||
=== External library changes in 1.40 ===
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ class DeprecatedHooks {
|
|||
'SkinTemplateNavigation' => [ 'deprecatedVersion' => '1.39' ],
|
||||
'SkinTemplateNavigation::SpecialPage' => [ 'deprecatedVersion' => '1.39' ],
|
||||
'PersonalUrls' => [ 'deprecatedVersion' => '1.39' ],
|
||||
'SearchDataForIndex' => [ 'deprecatedVersion' => '1.40', 'silent' => true ],
|
||||
];
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ class HookRunner implements
|
|||
\MediaWiki\Content\Hook\PageContentLanguageHook,
|
||||
\MediaWiki\Content\Hook\PlaceNewSectionHook,
|
||||
\MediaWiki\Content\Hook\SearchDataForIndexHook,
|
||||
\MediaWiki\Content\Hook\SearchDataForIndex2Hook,
|
||||
\MediaWiki\Specials\Contribute\Hook\ContributeCardsHook,
|
||||
\MediaWiki\Diff\Hook\AbortDiffCacheHook,
|
||||
\MediaWiki\Diff\Hook\ArticleContentOnDiffHook,
|
||||
|
|
@ -3294,15 +3295,22 @@ class HookRunner implements
|
|||
);
|
||||
}
|
||||
|
||||
public function onSearchDataForIndex( &$fields, $handler, $page, $output,
|
||||
$engine
|
||||
) {
|
||||
public function onSearchDataForIndex( &$fields, $handler, $page, $output, $engine ) {
|
||||
return $this->container->run(
|
||||
'SearchDataForIndex',
|
||||
[ &$fields, $handler, $page, $output, $engine ]
|
||||
);
|
||||
}
|
||||
|
||||
public function onSearchDataForIndex2( array &$fields, \ContentHandler $handler,
|
||||
\WikiPage $page, \ParserOutput $output, \SearchEngine $engine, RevisionRecord $revision
|
||||
) {
|
||||
return $this->container->run(
|
||||
'SearchDataForIndex2',
|
||||
[ &$fields, $handler, $page, $output, $engine, $revision ]
|
||||
);
|
||||
}
|
||||
|
||||
public function onSearchGetNearMatch( $term, &$title ) {
|
||||
return $this->container->run(
|
||||
'SearchGetNearMatch',
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ use MediaWiki\Revision\RevisionRecord;
|
|||
use MediaWiki\Revision\SlotRecord;
|
||||
use MediaWiki\Revision\SlotRenderingProvider;
|
||||
use MediaWiki\Search\ParserOutputSearchDataExtractor;
|
||||
use Wikimedia\Assert\Assert;
|
||||
use Wikimedia\ScopedCallback;
|
||||
|
||||
/**
|
||||
|
|
@ -1380,16 +1381,32 @@ abstract class ContentHandler {
|
|||
* @param WikiPage $page Page to index
|
||||
* @param ParserOutput $output
|
||||
* @param SearchEngine $engine Search engine for which we are indexing
|
||||
* @return array Map of name=>value for fields
|
||||
* @param RevisionRecord|null $revision Revision content to fetch if provided or use the latest revision
|
||||
* from WikiPage::getRevisionRecord() if not
|
||||
* @return array Map of name=>value for fields, an empty array is returned if the latest
|
||||
* revision cannot be retrieved.
|
||||
* @since 1.28
|
||||
*/
|
||||
public function getDataForSearchIndex(
|
||||
WikiPage $page,
|
||||
ParserOutput $output,
|
||||
SearchEngine $engine
|
||||
SearchEngine $engine,
|
||||
RevisionRecord $revision = null
|
||||
) {
|
||||
$fieldData = [];
|
||||
$content = $page->getContent();
|
||||
$revision = $revision ?? $page->getRevisionRecord();
|
||||
if ( $revision === null ) {
|
||||
LoggerFactory::getInstance( 'search' )->warning(
|
||||
"Called getDataForSearchIndex on the page {page_id} for which the " .
|
||||
"latest revision cannot be loaded.",
|
||||
[ "page_id" => $page->getId() ]
|
||||
);
|
||||
return [];
|
||||
}
|
||||
Assert::invariant( $revision->getPageId() === $page->getId(),
|
||||
'$revision and $page must target the same page_id' );
|
||||
|
||||
$content = $revision->getContent( SlotRecord::MAIN );
|
||||
|
||||
if ( $content ) {
|
||||
$searchDataExtractor = new ParserOutputSearchDataExtractor();
|
||||
|
|
@ -1408,6 +1425,8 @@ abstract class ContentHandler {
|
|||
}
|
||||
|
||||
$this->getHookRunner()->onSearchDataForIndex( $fieldData, $this, $page, $output, $engine );
|
||||
$this->getHookRunner()->onSearchDataForIndex2( $fieldData, $this, $page, $output, $engine, $revision );
|
||||
|
||||
return $fieldData;
|
||||
}
|
||||
|
||||
|
|
@ -1426,10 +1445,15 @@ abstract class ContentHandler {
|
|||
*
|
||||
* @param WikiPage $page
|
||||
* @param ParserCache|null $cache deprecated since 1.38 and won't have any effect
|
||||
* @param RevisionRecord|null $revision
|
||||
* @return ParserOutput|null null when the ParserOutput cannot be obtained
|
||||
* @see ParserOutputAccess::getParserOutput() for failure modes
|
||||
*/
|
||||
public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
|
||||
public function getParserOutputForIndexing(
|
||||
WikiPage $page,
|
||||
ParserCache $cache = null,
|
||||
RevisionRecord $revision = null
|
||||
) {
|
||||
// TODO: MCR: ContentHandler should be called per slot, not for the whole page.
|
||||
// See T190066.
|
||||
$parserOptions = $page->makeParserOptions( 'canonical' );
|
||||
|
|
@ -1437,7 +1461,7 @@ abstract class ContentHandler {
|
|||
return $parserOutputAccess->getParserOutput(
|
||||
$page,
|
||||
$parserOptions,
|
||||
null,
|
||||
$revision,
|
||||
ParserOutputAccess::OPT_NO_UPDATE_CACHE
|
||||
)->getValue();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
<?php
|
||||
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Revision\RevisionRecord;
|
||||
|
||||
/**
|
||||
* Content handler for File: files
|
||||
|
|
@ -36,7 +37,8 @@ class FileContentHandler extends WikitextContentHandler {
|
|||
public function getDataForSearchIndex(
|
||||
WikiPage $page,
|
||||
ParserOutput $parserOutput,
|
||||
SearchEngine $engine
|
||||
SearchEngine $engine,
|
||||
?RevisionRecord $revision = null
|
||||
) {
|
||||
$fields = [];
|
||||
|
||||
|
|
|
|||
42
includes/content/Hook/SearchDataForIndex2Hook.php
Normal file
42
includes/content/Hook/SearchDataForIndex2Hook.php
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
<?php
|
||||
|
||||
namespace MediaWiki\Content\Hook;
|
||||
|
||||
use ContentHandler;
|
||||
use MediaWiki\Revision\RevisionRecord;
|
||||
use ParserOutput;
|
||||
use SearchEngine;
|
||||
use WikiPage;
|
||||
|
||||
/**
|
||||
* This is a hook handler interface, see docs/Hooks.md.
|
||||
* Use the hook name "SearchDataForIndex" to register handlers implementing this interface.
|
||||
*
|
||||
* @stable to implement
|
||||
* @ingroup Hooks
|
||||
*/
|
||||
interface SearchDataForIndex2Hook {
|
||||
|
||||
/**
|
||||
* Use this hook to add data to search document. Allows you to add any data to
|
||||
* the field map used to index the document.
|
||||
*
|
||||
* @since 1.40
|
||||
*
|
||||
* @param array &$fields Array of name => value pairs for fields
|
||||
* @param ContentHandler $handler ContentHandler for the content being indexed
|
||||
* @param WikiPage $page WikiPage that is being indexed
|
||||
* @param ParserOutput $output ParserOutput that is produced from the page
|
||||
* @param SearchEngine $engine SearchEngine for which the indexing is intended
|
||||
* @param RevisionRecord $revision RevisionRecord being indexed
|
||||
* @return bool|void True or no return value to continue or false to abort
|
||||
*/
|
||||
public function onSearchDataForIndex2(
|
||||
array &$fields,
|
||||
ContentHandler $handler,
|
||||
WikiPage $page,
|
||||
ParserOutput $output,
|
||||
SearchEngine $engine,
|
||||
RevisionRecord $revision
|
||||
);
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ use WikiPage;
|
|||
* Use the hook name "SearchDataForIndex" to register handlers implementing this interface.
|
||||
*
|
||||
* @stable to implement
|
||||
* @deprecated since 1.40, use SearchDataForIndexHook2 instead.
|
||||
* @ingroup Hooks
|
||||
*/
|
||||
interface SearchDataForIndexHook {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use MediaWiki\Content\Renderer\ContentParseParams;
|
|||
use MediaWiki\Content\Transform\PreSaveTransformParams;
|
||||
use MediaWiki\MainConfigNames;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Revision\RevisionRecord;
|
||||
|
||||
/**
|
||||
* Base content handler implementation for flat text contents.
|
||||
|
|
@ -157,9 +158,10 @@ class TextContentHandler extends ContentHandler {
|
|||
public function getDataForSearchIndex(
|
||||
WikiPage $page,
|
||||
ParserOutput $output,
|
||||
SearchEngine $engine
|
||||
SearchEngine $engine,
|
||||
?RevisionRecord $revision = null
|
||||
) {
|
||||
$fields = parent::getDataForSearchIndex( $page, $output, $engine );
|
||||
$fields = parent::getDataForSearchIndex( $page, $output, $engine, $revision );
|
||||
$fields['language'] =
|
||||
$this->getPageLanguage( $page->getTitle(), $page->getContent() )->getCode();
|
||||
return $fields;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ use MediaWiki\Content\Transform\PreSaveTransformParams;
|
|||
use MediaWiki\Languages\LanguageNameUtils;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
use MediaWiki\Parser\ParserOutputFlags;
|
||||
use MediaWiki\Revision\RevisionRecord;
|
||||
|
||||
/**
|
||||
* Content handler for wiki text pages.
|
||||
|
|
@ -157,9 +158,10 @@ class WikitextContentHandler extends TextContentHandler {
|
|||
public function getDataForSearchIndex(
|
||||
WikiPage $page,
|
||||
ParserOutput $parserOutput,
|
||||
SearchEngine $engine
|
||||
SearchEngine $engine,
|
||||
?RevisionRecord $revision = null
|
||||
) {
|
||||
$fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine );
|
||||
$fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision );
|
||||
|
||||
$structure = new WikiTextStructure( $parserOutput );
|
||||
$fields['heading'] = $structure->headings();
|
||||
|
|
@ -172,7 +174,7 @@ class WikitextContentHandler extends TextContentHandler {
|
|||
// Until we have full first-class content handler for files, we invoke it explicitly here
|
||||
if ( $page->getTitle()->getNamespace() === NS_FILE ) {
|
||||
$fields = array_merge( $fields,
|
||||
$this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) );
|
||||
$this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision ) );
|
||||
}
|
||||
return $fields;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -444,9 +444,9 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
$fields['testDataField'] = 'test content';
|
||||
} );
|
||||
|
||||
$contentRenderer = $this->getServiceContainer()->getContentRenderer();
|
||||
$output = $contentRenderer->getParserOutput( $page->getContent(), $title );
|
||||
$data = $page->getContentHandler()->getDataForSearchIndex( $page, $output, $mockEngine );
|
||||
$revision = $page->getRevisionRecord();
|
||||
$output = $page->getContentHandler()->getParserOutputForIndexing( $page, null, $revision );
|
||||
$data = $page->getContentHandler()->getDataForSearchIndex( $page, $output, $mockEngine, $revision );
|
||||
$this->assertArrayHasKey( 'text', $data );
|
||||
$this->assertArrayHasKey( 'text_bytes', $data );
|
||||
$this->assertArrayHasKey( 'language', $data );
|
||||
|
|
@ -461,8 +461,9 @@ class ContentHandlerTest extends MediaWikiIntegrationTestCase {
|
|||
public function testParserOutputForIndexing() {
|
||||
$title = Title::newFromText( 'Smithee', NS_MAIN );
|
||||
$page = $this->getServiceContainer()->getWikiPageFactory()->newFromTitle( $title );
|
||||
$revision = $page->getRevisionRecord();
|
||||
|
||||
$out = $page->getContentHandler()->getParserOutputForIndexing( $page );
|
||||
$out = $page->getContentHandler()->getParserOutputForIndexing( $page, null, $revision );
|
||||
$this->assertInstanceOf( ParserOutput::class, $out );
|
||||
$this->assertStringContainsString( 'one who smiths', $out->getRawText() );
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue