This is an initial quick-and-dirty implementation. The ParsoidParser class will eventually inherit from \Parser, but this is an initial placeholder to unblock other Parsoid read views work. Currently Parsoid does not fully implement all the ParserOutput metadata set by the legacy parser, but we're working on it. This patch also addresses T300325 by ensuring the the Page HTML APIs use ParserOutput::getRawText(), which will return the entire Parsoid HTML document without post-processing. This is what the Parsoid team refers to as "edit mode" HTML. The ParserOutput::getText() method returns only the <body> contents of the HTML, and applies several transformations, including inserting Table of Contents and style deduplication; this is the "read views" flavor of the Parsoid HTML. We need to be careful of the interaction of the `useParsoid` flag with the ParserCacheMetadata. Effectively `useParsoid` should *always* be marked as "used" or else the ParserCache will assume its value doesn't matter and will serve legacy content for parsoid requests and vice-versa. T330677 is a follow up to address this more thoroughly by splitting the parser cache in ParserOutputAccess; the stop gap in this patch is fragile and, because it doesn't fork the ParserCacheMetadata cache, may corrupt the ParserCacheMetadata in the case when Parsoid and the legacy parser consult different sets of options to render a page. Bug: T300191 Bug: T330677 Bug: T300325 Change-Id: Ica09a4284c00d7917f8b6249e946232b2fb38011
77 lines
1.9 KiB
PHP
77 lines
1.9 KiB
PHP
<?php
|
|
|
|
namespace MediaWiki\Parser\Parsoid;
|
|
|
|
use MediaWiki\Languages\LanguageConverterFactory;
|
|
use MediaWiki\Parser\Parsoid\Config\PageConfigFactory;
|
|
use ParserFactory;
|
|
use Wikimedia\Parsoid\Config\DataAccess;
|
|
use Wikimedia\Parsoid\Config\SiteConfig;
|
|
|
|
/**
|
|
* ParserFactory which uses a ParsoidParser.
|
|
*
|
|
* This is similar to \ParserFactory, but simplified since we don't need
|
|
* to try to reuse parser objects. Eventually we'll be able to simplify
|
|
* \ParserFactory the same way.
|
|
*
|
|
* @since 1.41
|
|
* @internal May be combined with \ParserFactory or otherwise refactored
|
|
*
|
|
* @file
|
|
* @ingroup Parser
|
|
*/
|
|
class ParsoidParserFactory /* eventually this may extend \ParserFactory */ {
|
|
|
|
/** @var SiteConfig */
|
|
private $siteConfig;
|
|
|
|
/** @var DataAccess */
|
|
private $dataAccess;
|
|
|
|
/** @var PageConfigFactory */
|
|
private $pageConfigFactory;
|
|
|
|
/** @var LanguageConverterFactory */
|
|
private $languageConverterFactory;
|
|
|
|
/** @var ParserFactory */
|
|
private $legacyParserFactory;
|
|
|
|
/**
|
|
* @param SiteConfig $siteConfig
|
|
* @param DataAccess $dataAccess
|
|
* @param PageConfigFactory $pageConfigFactory
|
|
* @param LanguageConverterFactory $languageConverterFactory
|
|
* @param ParserFactory $legacyParserFactory
|
|
*/
|
|
public function __construct(
|
|
SiteConfig $siteConfig,
|
|
DataAccess $dataAccess,
|
|
PageConfigFactory $pageConfigFactory,
|
|
LanguageConverterFactory $languageConverterFactory,
|
|
ParserFactory $legacyParserFactory
|
|
) {
|
|
$this->siteConfig = $siteConfig;
|
|
$this->dataAccess = $dataAccess;
|
|
$this->pageConfigFactory = $pageConfigFactory;
|
|
$this->languageConverterFactory = $languageConverterFactory;
|
|
$this->legacyParserFactory = $legacyParserFactory;
|
|
}
|
|
|
|
/**
|
|
* Creates a new Parsoid parser.
|
|
* @return ParsoidParser
|
|
* @since 1.41
|
|
* @unstable
|
|
*/
|
|
public function create(): ParsoidParser {
|
|
return new ParsoidParser(
|
|
$this->siteConfig,
|
|
$this->dataAccess,
|
|
$this->pageConfigFactory,
|
|
$this->languageConverterFactory,
|
|
$this->legacyParserFactory
|
|
);
|
|
}
|
|
}
|