wiki.techinc.nl/includes/content/WikitextContentHandler.php

<?php
/**
 * Content handler for wiki text pages.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @since 1.21
 *
 * @file
 * @ingroup Content
 */

namespace MediaWiki\Content;

use MediaWiki\Content\Renderer\ContentParseParams;
use MediaWiki\Content\Transform\PreloadTransformParams;
use MediaWiki\Content\Transform\PreSaveTransformParams;
use MediaWiki\Languages\LanguageNameUtils;
use MediaWiki\Linker\LinkRenderer;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\Parser\MagicWordFactory;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\ParserOutputFlags;
use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Title\Title;
use MediaWiki\Title\TitleFactory;
use ParserFactory;
use SearchEngine;
use SearchIndexField;
use Wikimedia\UUID\GlobalIdGenerator;
use WikiPage;

/**
 * Content handler for wiki text pages.
 *
 * @ingroup Content
 */
class WikitextContentHandler extends TextContentHandler {

	private TitleFactory $titleFactory;
	private ParserFactory $parserFactory;
	private GlobalIdGenerator $globalIdGenerator;
	private LanguageNameUtils $languageNameUtils;
	private LinkRenderer $linkRenderer;
	private MagicWordFactory $magicWordFactory;
	private ParsoidParserFactory $parsoidParserFactory;

	public function __construct(
		string $modelId,
		TitleFactory $titleFactory,
		ParserFactory $parserFactory,
		GlobalIdGenerator $globalIdGenerator,
		LanguageNameUtils $languageNameUtils,
		LinkRenderer $linkRenderer,
		MagicWordFactory $magicWordFactory,
		ParsoidParserFactory $parsoidParserFactory
	) {
		// $modelId should always be CONTENT_MODEL_WIKITEXT
		parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] );
		$this->titleFactory = $titleFactory;
		$this->parserFactory = $parserFactory;
		$this->globalIdGenerator = $globalIdGenerator;
		$this->languageNameUtils = $languageNameUtils;
		$this->linkRenderer = $linkRenderer;
		$this->magicWordFactory = $magicWordFactory;
		$this->parsoidParserFactory = $parsoidParserFactory;
	}

	/**
	 * @return class-string<WikitextContent>
	 */
	protected function getContentClass() {
		return WikitextContent::class;
	}

	/**
	 * Returns a WikitextContent object representing a redirect to the given destination page.
	 *
	 * @param Title $destination The page to redirect to.
	 * @param string $text Text to include in the redirect, if possible.
	 *
	 * @return Content
	 *
	 * @see ContentHandler::makeRedirectContent
	 */
	public function makeRedirectContent( Title $destination, $text = '' ) {
		$optionalColon = '';

		if ( $destination->getNamespace() === NS_CATEGORY ) {
			$optionalColon = ':';
		} else {
			$iw = $destination->getInterwiki();
			if ( $iw && $this->languageNameUtils->getLanguageName( $iw,
				LanguageNameUtils::AUTONYMS,
				LanguageNameUtils::DEFINED
			) ) {
				$optionalColon = ':';
			}
		}

		$mwRedir = $this->magicWordFactory->get( 'redirect' );
		$redirectText = $mwRedir->getSynonym( 0 ) .
			' [[' . $optionalColon . $destination->getFullText() . ']]';

		if ( $text != '' ) {
			$redirectText .= "\n" . $text;
		}

		$class = $this->getContentClass();

		return new $class( $redirectText );
	}

	/**
	 * Returns true because wikitext supports redirects.
	 *
	 * @return bool Always true.
	 *
	 * @see ContentHandler::supportsRedirects
	 */
	public function supportsRedirects() {
		return true;
	}

	/**
	 * Returns true because wikitext supports sections.
	 *
	 * @return bool Always true.
	 *
	 * @see ContentHandler::supportsSections
	 */
	public function supportsSections() {
		return true;
	}

	/**
	 * Returns true, because wikitext supports caching using the
	 * ParserCache mechanism.
	 *
	 * @since 1.21
	 *
	 * @return bool Always true.
	 *
	 * @see ContentHandler::isParserCacheSupported
	 */
	public function isParserCacheSupported() {
		return true;
	}

	/** @inheritDoc */
	public function supportsPreloadContent(): bool {
		return true;
	}

	/**
	 * @return FileContentHandler
	 */
	protected function getFileHandler() {
		return new FileContentHandler(
			$this->getModelID(),
			$this->titleFactory,
			$this->parserFactory,
			$this->globalIdGenerator,
			$this->languageNameUtils,
			$this->linkRenderer,
			$this->magicWordFactory,
			$this->parsoidParserFactory
		);
	}

	public function getFieldsForSearchIndex( SearchEngine $engine ) {
		$fields = parent::getFieldsForSearchIndex( $engine );

		$fields['heading'] =
			$engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
		$fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );

		$fields['auxiliary_text'] =
			$engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT );

		$fields['opening_text'] =
			$engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
		$fields['opening_text']->setFlag(
			SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT
		);

		// Until we have the full first-class content handler for files, we invoke it explicitly here
		return array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) );
	}

	public function getDataForSearchIndex(
		WikiPage $page,
		ParserOutput $parserOutput,
		SearchEngine $engine,
		?RevisionRecord $revision = null
	) {
		$fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision );

		$structure = new WikiTextStructure( $parserOutput );
		$fields['heading'] = $structure->headings();
		// text fields
		$fields['opening_text'] = $structure->getOpeningText();
		$fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler
		$fields['auxiliary_text'] = $structure->getAuxiliaryText();
		$fields['defaultsort'] = $structure->getDefaultSort();
		$fields['file_text'] = null;

		// Until we have the full first-class content handler for files, we invoke it explicitly here
		if ( $page->getTitle()->getNamespace() === NS_FILE ) {
			$fields = array_merge(
				$fields,
				$this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision )
			);
		}

		return $fields;
	}

	/**
	 * Returns the content's text as-is.
	 *
	 * @param Content $content
	 * @param string|null $format The serialization format to check
	 *
	 * @return mixed
	 */
	public function serializeContent( Content $content, $format = null ) {
		$this->checkFormat( $format );
		return parent::serializeContent( $content, $format );
	}

	public function preSaveTransform(
		Content $content,
		PreSaveTransformParams $pstParams
	): Content {
		'@phan-var WikitextContent $content';
		$text = $content->getText();

		$parser = $this->parserFactory->getInstance();
		$pst = $parser->preSaveTransform(
			$text,
			$pstParams->getPage(),
			$pstParams->getUser(),
			$pstParams->getParserOptions()
		);

		if ( $text === $pst ) {
			return $content;
		}

		$contentClass = $this->getContentClass();
		$ret = new $contentClass( $pst );
		$ret->setPreSaveTransformFlags( $parser->getOutput()->getAllFlags() );

		return $ret;
	}

	/**
	 * Returns a Content object with preload transformations applied (or this
	 * object if no transformations apply).
	 *
	 * @param Content $content
	 * @param PreloadTransformParams $pltParams
	 *
	 * @return Content
	 */
	public function preloadTransform(
		Content $content,
		PreloadTransformParams $pltParams
	): Content {
		'@phan-var WikitextContent $content';
		$text = $content->getText();

		$plt = $this->parserFactory->getInstance()->getPreloadText(
			$text,
			$pltParams->getPage(),
			$pltParams->getParserOptions(),
			$pltParams->getParams()
		);

		$contentClass = $this->getContentClass();

		return new $contentClass( $plt );
	}

	/**
	 * Extract the redirect target and the remaining text on the page.
	 *
	 * @since 1.41 (used to be a method on WikitextContent since 1.23)
	 *
	 * @return array List of two elements: LinkTarget|null and WikitextContent object.
	 */
	public function extractRedirectTargetAndText( WikitextContent $content ): array {
		$redir = $this->magicWordFactory->get( 'redirect' );
		$text = ltrim( $content->getText() );

		if ( !$redir->matchStartAndRemove( $text ) ) {
			return [ null, $content ];
		}

		// Extract the first link and see if it's usable
		// Ensure that it really does come directly after #REDIRECT
		// Some older redirects included a colon, so don't freak about that!
		$m = [];
		if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) {
			// Strip preceding colon used to "escape" categories, etc.
			// and URL-decode links
			if ( strpos( $m[1], '%' ) !== false ) {
				// Match behavior of inline link parsing here;
				$m[1] = rawurldecode( ltrim( $m[1], ':' ) );
			}

			// TODO: Move isValidRedirectTarget() out Title, so we can use a TitleValue here.
			$title = $this->titleFactory->newFromText( $m[1] );

			// If the title is a redirect to bad special pages or is invalid, return null
			if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) {
				return [ null, $content ];
			}

			$remainingContent = new WikitextContent( substr( $text, strlen( $m[0] ) ) );
			return [ $title, $remainingContent ];
		}

		return [ null, $content ];
	}

	/**
	 * Returns a ParserOutput object resulting from parsing the content's text
	 * using the global Parser service.
	 *
	 * @since 1.38
	 *
	 * @param Content $content
	 * @param ContentParseParams $cpoParams
	 * @param ParserOutput &$parserOutput The output object to fill (reference).
	 */
	protected function fillParserOutput(
		Content $content,
		ContentParseParams $cpoParams,
		ParserOutput &$parserOutput
	) {
		'@phan-var WikitextContent $content';
		$title = $this->titleFactory->newFromPageReference( $cpoParams->getPage() );
		$parserOptions = $cpoParams->getParserOptions();
		$revId = $cpoParams->getRevId();

		[ $redir, $contentWithoutRedirect ] = $this->extractRedirectTargetAndText( $content );
		if ( $parserOptions->getUseParsoid() ) {
			$parser = $this->parsoidParserFactory->create();
			// Parsoid renders the #REDIRECT magic word as an invisible
			// <link> tag and doesn't require it to be stripped.
			// T349087: ...and in fact, RESTBase relies on getting
			// redirect information from this <link> tag, so it needs
			// to be present.
			// Further, Parsoid can accept a Content in place of a string.
			$text = $content;
			$extraArgs = [ $cpoParams->getPreviousOutput() ];
		} else {
			// The legacy parser requires the #REDIRECT magic word to
			// be stripped from the content before parsing.
			$parser = $this->parserFactory->getInstance();
			$text = $contentWithoutRedirect->getText();
			$extraArgs = [];
		}

		$time = -microtime( true );

		$parserOutput = $parser
			->parse( $text, $title, $parserOptions, true, true, $revId, ...$extraArgs );
		$time += microtime( true );

		// Timing hack
		if ( $time > 3 ) {
			// TODO: Use Parser's logger (once it has one)
			$channel = $parserOptions->getUseParsoid() ? 'slow-parsoid' : 'slow-parse';
			$logger = LoggerFactory::getInstance( $channel );
			$logger->info( 'Parsing {title} was slow, took {time} seconds', [
				'time' => number_format( $time, 2 ),
				'title' => (string)$title,
				'trigger' => $parserOptions->getRenderReason(),
			] );
		}

		// T330667: Record the fact that we used the value of
		// 'useParsoid' to influence this parse.  Note that
		// ::getUseParsoid() has a side-effect on $parserOutput here
		// which didn't occur when we called ::getUseParsoid() earlier
		// because $parserOutput didn't exist at that time.
		$parserOptions->getUseParsoid();

		// Add redirect indicator at the top
		if ( $redir ) {
			// Make sure to include the redirect link in pagelinks
			$parserOutput->addLink( $redir );
			if ( $cpoParams->getGenerateHtml() ) {
				$parserOutput->setRedirectHeader(
					$this->linkRenderer->makeRedirectHeader(
						$title->getPageLanguage(), $redir, false
					)
				);
				$parserOutput->addModuleStyles( [ 'mediawiki.action.view.redirectPage' ] );
			} else {
				$parserOutput->setRawText( null );
			}
		}

		// Pass along user-signature flag
		if ( in_array( 'user-signature', $content->getPreSaveTransformFlags() ) ) {
			$parserOutput->setOutputFlag( ParserOutputFlags::USER_SIGNATURE );
		}
	}
}

/** @deprecated class alias since 1.43 */
class_alias( WikitextContentHandler::class, 'WikitextContentHandler' );