wiki.techinc.nl/includes/parser/Parsoid/PageBundleParserOutputConverter.php
daniel e7f21f6e64 HtmlOutputRendererHelper: fall back to page language
HtmlOutputRendererHelper should not crash hard if the ParserOutput has
no language set. ParserOutput may come from a variety of places, we
should be lenient about it not having a language.

However, we should try harder to actually set a language on ParserOutput
if we have one available. So this also updates
PageBundleParserOutputConverter to keep the ParserOutput's language in
sync wit the language header in the PageBundle.

Bug: T349868
Bug: T353689
Bug: T359426
Change-Id: I2edf20dc3b199e22cda2f32bc858c21ca7d8f4bd
2024-03-06 17:18:16 +00:00

108 lines
3.1 KiB
PHP

<?php
namespace MediaWiki\Parser\Parsoid;
use LanguageCode;
use MediaWiki\Parser\ParserOutput;
use Wikimedia\Parsoid\Core\PageBundle;
/**
* Provides methods for conversion between PageBundle and ParserOutput
* TODO: Convert to a trait once we drop support for PHP < 8.2 since
* support for constants in traits was added in PHP 8.2
* @since 1.40
* @internal
*/
final class PageBundleParserOutputConverter {
/**
* @var string Key used to store parsoid page bundle data in ParserOutput
*/
public const PARSOID_PAGE_BUNDLE_KEY = 'parsoid-page-bundle';
/**
* We do not want instances of this class to be created
* @return void
*/
private function __construct() {
}
/**
* Creates a ParserOutput object containing the relevant data from
* the given PageBundle object.
*
* We need to inject data-parsoid and other properties into the
* parser output object for caching, so we can use it for VE edits
* and transformations.
*
* @param PageBundle $pageBundle
* @param ?ParserOutput $originalParserOutput Any non-parsoid metadata
* from $originalParserOutput will be copied into the new ParserOutput object.
*
* @return ParserOutput
*/
public static function parserOutputFromPageBundle(
PageBundle $pageBundle, ?ParserOutput $originalParserOutput = null
): ParserOutput {
$parserOutput = new ParserOutput( $pageBundle->html );
if ( $originalParserOutput ) {
$parserOutput->mergeHtmlMetaDataFrom( $originalParserOutput );
$parserOutput->mergeTrackingMetaDataFrom( $originalParserOutput );
$parserOutput->mergeInternalMetaDataFrom( $originalParserOutput );
}
$parserOutput->setExtensionData(
self::PARSOID_PAGE_BUNDLE_KEY,
[
'parsoid' => $pageBundle->parsoid,
'mw' => $pageBundle->mw,
'version' => $pageBundle->version,
'headers' => $pageBundle->headers,
'contentmodel' => $pageBundle->contentmodel,
]
);
if ( isset( $pageBundle->headers['content-language'] ) ) {
$lang = LanguageCode::normalizeNonstandardCodeAndWarn(
// @phan-suppress-next-line PhanTypeArraySuspiciousNullable
$pageBundle->headers['content-language']
);
$parserOutput->setLanguage( $lang );
}
return $parserOutput;
}
/**
* Returns a Parsoid PageBundle equivalent to the given ParserOutput.
*
* @param ParserOutput $parserOutput
*
* @return PageBundle
*/
public static function pageBundleFromParserOutput( ParserOutput $parserOutput ): PageBundle {
$pageBundleData = $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY );
$lang = $parserOutput->getLanguage();
$headers = $pageBundleData['headers'] ?? [];
if ( $lang ) {
$headers['content-language'] = $lang;
}
return new PageBundle(
$parserOutput->getRawText(),
$pageBundleData['parsoid'] ?? [],
$pageBundleData['mw'] ?? [],
// It would be nice to have this be "null", but PageBundle::responseData()
// chocks on that: T325137.
$pageBundleData['version'] ?? '0.0.0',
$pageBundleData['headers'] ?? $headers,
$pageBundleData['contentmodel'] ?? null
);
}
public static function hasPageBundle( ParserOutput $parserOutput ) {
return $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY ) !== null;
}
}