diff --git a/includes/Rest/Handler/ParsoidFormatHelper.php b/includes/Rest/Handler/ParsoidFormatHelper.php new file mode 100644 index 00000000000..38d5ddd5cce --- /dev/null +++ b/includes/Rest/Handler/ParsoidFormatHelper.php @@ -0,0 +1,132 @@ + 'plain', + self::FORMAT_HTML => 'html', + self::FORMAT_PAGEBUNDLE => 'json', + self::FORMAT_LINT => 'json', + ]; + + public const VALID_PAGE = [ + self::FORMAT_WIKITEXT, self::FORMAT_HTML, self::FORMAT_PAGEBUNDLE, self::FORMAT_LINT + ]; + + public const VALID_TRANSFORM = [ + self::FORMAT_WIKITEXT => [ self::FORMAT_HTML, self::FORMAT_PAGEBUNDLE, self::FORMAT_LINT ], + self::FORMAT_HTML => [ self::FORMAT_WIKITEXT ], + self::FORMAT_PAGEBUNDLE => [ self::FORMAT_WIKITEXT, self::FORMAT_PAGEBUNDLE ], + ]; + + /** + * Get the content type appropriate for a given response format. + * @param string $format One of the FORMAT_* constants + * @param ?string $contentVersion Output version, only for HTML and pagebundle + * formats. See Env::getcontentVersion(). + * @return string + */ + public static function getContentType( + string $format, ?string $contentVersion = null + ): string { + if ( $format !== self::FORMAT_WIKITEXT && !$contentVersion ) { + throw new InvalidArgumentException( '$contentVersion is required for this format' ); + } + + switch ( $format ) { + case self::FORMAT_WIKITEXT: + $contentType = 'text/plain'; + // PORT-FIXME in the original the version number is from MWParserEnvironment.wikitextVersion + // but it did not seem to be used anywhere + $profile = 'https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0'; + break; + case self::FORMAT_HTML: + $contentType = 'text/html'; + $profile = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $contentVersion; + break; + case self::FORMAT_PAGEBUNDLE: + $contentType = 'application/json'; + $profile = 'https://www.mediawiki.org/wiki/Specs/pagebundle/' . $contentVersion; + break; + default: + throw new InvalidArgumentException( "Invalid format $format" ); + } + return "$contentType; charset=utf-8; profile=\"$profile\""; + } + + /** + * Set the Content-Type header appropriate for a given response format. + * @param ResponseInterface $response + * @param string $format One of the FORMAT_* constants + * @param ?string $contentVersion Output version, only for HTML and pagebundle + * formats. See Env::getcontentVersion(). + */ + public static function setContentType( + ResponseInterface $response, string $format, + ?string $contentVersion = null + ): void { + $response->setHeader( 'Content-Type', self::getContentType( $format, $contentVersion ) ); + } + + /** + * Parse a Content-Type header and return the format type and version. + * Mostly the inverse of getContentType() but also accounts for legacy formats. + * @param string $contentTypeHeader The value of the Content-Type header. + * @param ?string &$format Format type will be set here (as a FORMAT_* constant). + * @return ?string Format version, or null if it couldn't be identified. + * @see Env::getInputContentVersion() + */ + public static function parseContentTypeHeader( + string $contentTypeHeader, ?string &$format = null + ): ?string { + $newProfileSyntax = 'https://www.mediawiki.org/wiki/Specs/(HTML|pagebundle)/'; + $oldProfileSyntax = 'mediawiki.org/specs/(html)/'; + $profileRegex = "#\bprofile=\"(?:$newProfileSyntax|$oldProfileSyntax)(\d+\.\d+\.\d+)\"#"; + preg_match( $profileRegex, $contentTypeHeader, $m ); + if ( $m ) { + switch ( $m[1] ?: $m[2] ) { + case 'HTML': + case 'html': + $format = self::FORMAT_HTML; + break; + case 'pagebundle': + $format = self::FORMAT_PAGEBUNDLE; + break; + } + return $m[3]; + } + return null; + } + +} diff --git a/includes/Rest/Handler/ParsoidHandler.php b/includes/Rest/Handler/ParsoidHandler.php new file mode 100644 index 00000000000..044829421eb --- /dev/null +++ b/includes/Rest/Handler/ParsoidHandler.php @@ -0,0 +1,1097 @@ +getMainConfig()->get( 'ParsoidSettings' ), + $services->getParsoidSiteConfig(), + $services->getParsoidPageConfigFactory(), + $services->getParsoidDataAccess() + ); + } + + /** + * @param array $parsoidSettings + * @param SiteConfig $siteConfig + * @param PageConfigFactory $pageConfigFactory + * @param DataAccess $dataAccess + */ + public function __construct( + array $parsoidSettings, + SiteConfig $siteConfig, + PageConfigFactory $pageConfigFactory, + DataAccess $dataAccess + ) { + $this->parsoidSettings = $parsoidSettings; + $this->siteConfig = $siteConfig; + $this->pageConfigFactory = $pageConfigFactory; + $this->dataAccess = $dataAccess; + $this->extensionRegistry = ExtensionRegistry::getInstance(); + $this->metrics = $siteConfig->metrics(); + } + + /** @inheritDoc */ + public function checkPreconditions() { + // Execute this since this sets up state + // needed for other functionality. + parent::checkPreconditions(); + + // Disable precondition checks by ignoring the return value above. + // Parsoid/JS doesn't implement these checks. + // See https://phabricator.wikimedia.org/T238849#5683035 for a discussion. + return null; + } + + /** + * Verify that the {domain} path parameter matches the actual domain. + * @param string $domain Domain name parameter to validate + */ + protected function assertDomainIsCorrect( $domain ): void { + // We are cutting some corners here (IDN, non-ASCII casing) + // since domain name support is provisional. + // TODO use a proper validator instead + $server = \RequestContext::getMain()->getConfig()->get( 'Server' ); + $expectedDomain = wfParseUrl( $server )['host'] ?? null; + if ( !$expectedDomain ) { + throw new LogicException( 'Cannot parse $wgServer' ); + } + if ( strcasecmp( (string)$expectedDomain, $domain ) === 0 ) { + return; + } + + // TODO probably the better + if ( $this->extensionRegistry->isLoaded( 'MobileFrontend' ) ) { + // @phan-suppress-next-line PhanUndeclaredClassMethod + $mobileServer = MobileContext::singleton()->getMobileUrl( $server ); + $expectedMobileDomain = wfParseUrl( $mobileServer )['host'] ?? null; + if ( strcasecmp( (string)$expectedMobileDomain, $domain ) === 0 ) { + return; + } + } + + throw new ValidationException( + new DataMessageValue( 'mwparsoid-invalid-domain', [], 'invalid-domain', [ + 'expected' => $expectedDomain, + 'actual' => $domain, + ] ), + 'domain', $domain, [] + ); + } + + /** + * Get the parsed body by content-type + * + * @return array + */ + protected function getParsedBody(): array { + $request = $this->getRequest(); + list( $contentType ) = explode( ';', $request->getHeader( 'Content-Type' )[0] ?? '', 2 ); + switch ( $contentType ) { + case 'application/x-www-form-urlencoded': + case 'multipart/form-data': + return $request->getPostParams(); + case 'application/json': + $json = json_decode( $request->getBody()->getContents(), true ); + if ( !is_array( $json ) ) { + throw new HttpException( 'Payload does not JSON decode to an array.', 400 ); + } + return $json; + default: + throw new HttpException( 'Unsupported Media Type', 415 ); + } + } + + /** + * Rough equivalent of req.local from Parsoid-JS. + * FIXME most of these should be replaced with more native ways of handling the request. + * @return array + */ + protected function &getRequestAttributes(): array { + if ( $this->requestAttributes ) { + return $this->requestAttributes; + } + + $request = $this->getRequest(); + $body = ( $request->getMethod() === 'POST' ) ? $this->getParsedBody() : []; + $opts = array_merge( $body, array_intersect_key( $request->getPathParams(), + [ 'from' => true, 'format' => true ] ) ); + '@phan-var array $opts'; // @var array $opts + $attribs = [ + 'titleMissing' => empty( $request->getPathParams()['title'] ), + 'pageName' => $request->getPathParam( 'title' ) ?? '', + 'oldid' => $request->getPathParam( 'revision' ), + // "body_only" flag to return just the body (instead of the entire HTML doc) + // We would like to deprecate use of this flag: T181657 + 'body_only' => $request->getQueryParams()['body_only'] ?? $body['body_only'] ?? null, + 'errorEnc' => ParsoidFormatHelper::ERROR_ENCODING[$opts['format']] ?? 'plain', + 'iwp' => WikiMap::getCurrentWikiId(), // PORT-FIXME verify + 'subst' => (bool)( $request->getQueryParams()['subst'] ?? $body['subst'] ?? null ), + 'offsetType' => $body['offsetType'] + ?? $request->getQueryParams()['offsetType'] + // Lint requests should return UCS2 offsets by default + ?? ( $opts['format'] === ParsoidFormatHelper::FORMAT_LINT ? 'ucs2' : 'byte' ), + 'pagelanguage' => $request->getHeaderLine( 'Content-Language' ) ?: null, + ]; + + if ( $request->getMethod() === 'POST' ) { + if ( isset( $opts['original']['revid'] ) ) { + $attribs['oldid'] = $opts['original']['revid']; + } + if ( isset( $opts['original']['title'] ) ) { + $attribs['titleMissing'] = false; + $attribs['pageName'] = $opts['original']['title']; + } + } + if ( $attribs['oldid'] !== null ) { + if ( $attribs['oldid'] === '' ) { + $attribs['oldid'] = null; + } else { + $attribs['oldid'] = (int)$attribs['oldid']; + } + } + + $attribs['envOptions'] = [ + // We use `prefix` but ought to use `domain` (T206764) + 'prefix' => $attribs['iwp'], + 'domain' => $request->getPathParam( 'domain' ), + 'pageName' => $attribs['pageName'], + 'offsetType' => $attribs['offsetType'], + 'cookie' => $request->getHeaderLine( 'Cookie' ), + 'reqId' => $request->getHeaderLine( 'X-Request-Id' ), + 'userAgent' => $request->getHeaderLine( 'User-Agent' ), + 'htmlVariantLanguage' => $request->getHeaderLine( 'Accept-Language' ) ?: null, + // Semver::satisfies checks below expect a valid outputContentVersion value. + // Better to set it here instead of adding the default value at every check. + 'outputContentVersion' => Parsoid::defaultHTMLVersion(), + ]; + $attribs['opts'] = $opts; + + if ( empty( $this->parsoidSettings['debugApi'] ) ) { + $this->assertDomainIsCorrect( $attribs['envOptions']['domain'] ); + } + + $this->requestAttributes = $attribs; + return $this->requestAttributes; + } + + /** + * FIXME: Combine with ParsoidFormatHelper::parseContentTypeHeader + */ + private const NEW_SPEC = + '#^https://www.mediawiki.org/wiki/Specs/(HTML|pagebundle)/(\d+\.\d+\.\d+)$#D'; + + /** + * This method checks if we support the requested content formats + * As a side-effect, it updates $attribs to set outputContentVersion + * that Parsoid should generate based on request headers. + * + * @param array &$attribs Request attributes from getRequestAttributes() + * @return bool + */ + protected function acceptable( array &$attribs ): bool { + $request = $this->getRequest(); + $format = $attribs['opts']['format']; + + if ( $format === ParsoidFormatHelper::FORMAT_WIKITEXT ) { + return true; + } + + $acceptHeader = $request->getHeader( 'Accept' ); + if ( !$acceptHeader ) { + return true; + } + + $parser = new HttpAcceptParser(); + $acceptableTypes = $parser->parseAccept( $acceptHeader[0] ); // FIXME: Multiple headers valid? + if ( !$acceptableTypes ) { + return true; + } + + // `acceptableTypes` is already sorted by quality. + foreach ( $acceptableTypes as $t ) { + $type = "{$t['type']}/{$t['subtype']}"; + $profile = $t['params']['profile'] ?? null; + if ( + ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/html' ) || + ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && $type === 'application/json' ) + ) { + if ( $profile ) { + preg_match( self::NEW_SPEC, $profile, $matches ); + if ( $matches && strtolower( $matches[1] ) === $format ) { + $contentVersion = Parsoid::resolveContentVersion( $matches[2] ); + if ( $contentVersion ) { + // $attribs mutated here! + $attribs['envOptions']['outputContentVersion'] = $contentVersion; + return true; + } else { + continue; + } + } else { + continue; + } + } else { + return true; + } + } elseif ( + ( $type === '*/*' ) || + ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/*' ) + ) { + return true; + } + } + + return false; + } + + /** + * @param string $title The page to be transformed + * @param ?int $revision The revision to be transformed + * @param ?string $wikitextOverride + * Custom wikitext to use instead of the real content of the page. + * @param ?string $pagelanguageOverride + * @return PageConfig + */ + protected function createPageConfig( + string $title, ?int $revision, ?string $wikitextOverride = null, + ?string $pagelanguageOverride = null + ): PageConfig { + $title = $title ? Title::newFromText( $title ) : Title::newMainPage(); + if ( !$title ) { + // TODO use proper validation + throw new LogicException( 'Title not found!' ); + } + $user = RequestContext::getMain()->getUser(); + // Note: Parsoid by design isn't supposed to use the user + // context right now, and all user state is expected to be + // introduced as a post-parse transform. So although we pass a + // User here, it only currently affects the output in obscure + // corner cases; see PageConfigFactory::create() for more. + // @phan-suppress-next-line PhanUndeclaredMethod method defined in subtype + return $this->pageConfigFactory->create( + $title, $user, $revision, $wikitextOverride, $pagelanguageOverride, + $this->parsoidSettings + ); + } + + /** + * Redirect to another Parsoid URL (e.g. canonization) + * @param string $path Target URL + * @param array $queryParams Query parameters + * @return Response + */ + protected function createRedirectResponse( + string $path, array $queryParams = [] + ): Response { + // FIXME this should not be necessary in the REST entry point + unset( $queryParams['title'] ); + + $url = $this->getRouter()->getRouteUrl( $path, [], $queryParams ); + if ( $this->getRequest()->getMethod() === 'POST' ) { + $response = $this->getResponseFactory()->createTemporaryRedirect( $url ); + } else { + $response = $this->getResponseFactory()->createLegacyTemporaryRedirect( $url ); + } + $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); + return $response; + } + + /** + * Try to create a PageConfig object. If we get an exception (because content + * may be missing or inaccessible), throw an appropriate HTTP response object + * for callers to handle. + * + * @param array $attribs + * @param ?string $wikitext + * @param bool $html2WtMode + * @return PageConfig + * @throws HttpException + */ + protected function tryToCreatePageConfig( + array $attribs, ?string $wikitext = null, bool $html2WtMode = false + ): PageConfig { + $oldid = $attribs['oldid']; + + try { + $pageConfig = $this->createPageConfig( + $attribs['pageName'], $oldid, $wikitext, + $attribs['pagelanguage'] + ); + } catch ( RevisionAccessException $exception ) { + throw new HttpException( 'The specified revision is deleted or suppressed.', 404 ); + } + + $hasOldId = ( $attribs['oldid'] !== null ); + if ( ( !$html2WtMode || $hasOldId ) && $pageConfig->getRevisionContent() === null ) { + // T234549 + throw new HttpException( + 'The specified revision does not exist.', 404 + ); + } + + if ( !$html2WtMode && $wikitext === null && !$hasOldId ) { + // Redirect to the latest revid + throw new ResponseException( + $this->createRedirectToOldidResponse( $pageConfig, $attribs ) + ); + } + + // All good! + return $pageConfig; + } + + /** + * Expand the current URL with the latest revision number and redirect there. + * + * @param PageConfig $pageConfig + * @param array $attribs Request attributes from getRequestAttributes() + * @return Response + */ + protected function createRedirectToOldidResponse( + PageConfig $pageConfig, array $attribs + ): Response { + $domain = $attribs['envOptions']['domain']; + $format = $this->getRequest()->getPathParam( 'format' ); + $target = $pageConfig->getTitle(); + $encodedTarget = PHPUtils::encodeURIComponent( $target ); + $revid = $pageConfig->getRevisionId(); + + if ( $revid === null ) { + throw new LogicException( 'Expected page to have a revision id.' ); + } + + $this->metrics->increment( 'redirectToOldid.' . $format ); + + if ( $this->getRequest()->getMethod() === 'POST' ) { + $from = $this->getRequest()->getPathParam( 'from' ); + $newPath = "/coredev/v0/$domain/transform/$from/to/$format/$encodedTarget/$revid"; + } else { + // TODO: Change this to the /v1/ revision endpoint + $newPath = "/$domain/v3/page/$format/$encodedTarget/$revid"; + } + return $this->createRedirectResponse( $newPath, $this->getRequest()->getQueryParams() ); + } + + /** + * Wikitext -> HTML helper. + * Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests. + * + * @param PageConfig $pageConfig + * @param array $attribs Request attributes from getRequestAttributes() + * @param ?string $wikitext Wikitext to transform (or null to use the + * page specified in the request attributes). + * @return Response + */ + protected function wt2html( + PageConfig $pageConfig, array $attribs, ?string $wikitext = null + ) { + $request = $this->getRequest(); + $opts = $attribs['opts']; + $format = $opts['format']; + $oldid = $attribs['oldid']; + + $needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE ); + $doSubst = ( $wikitext !== null && $attribs['subst'] ); + + // Performance Timing options + // init refers to time elapsed before parsing begins + $metrics = $this->metrics; + $timing = Timing::start( $metrics ); + + if ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'], + '!=' . Parsoid::defaultHTMLVersion() ) ) { + $metrics->increment( 'wt2html.parse.version.notdefault' ); + } + + $parsoid = new Parsoid( $this->siteConfig, $this->dataAccess ); + + if ( $doSubst ) { + if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) { + throw new HttpException( + 'Substitution is only supported for the HTML format.', 501 + ); + } + $wikitext = $parsoid->substTopLevelTemplates( + $pageConfig, (string)$wikitext + ); + $pageConfig = $this->createPageConfig( + $attribs['pageName'], $attribs['oldid'], $wikitext + ); + } + + if ( + !empty( $this->parsoidSettings['devAPI'] ) && + ( $request->getQueryParams()['follow_redirects'] ?? false ) + ) { + $content = $pageConfig->getRevisionContent(); + $redirectTarget = $content ? $content->getRedirectTarget() : null; + if ( $redirectTarget ) { + $redirectInfo = $this->dataAccess->getPageInfo( + $pageConfig, [ $redirectTarget ] + ); + $encodedTarget = PHPUtils::encodeURIComponent( $redirectTarget ); + $redirectPath = + "/{$attribs['envOptions']['domain']}/v3/page/$encodedTarget/wikitext"; + if ( $redirectInfo['revId'] ) { + $redirectPath .= '/' . $redirectInfo['revId']; + } + throw new ResponseException( + $this->createRedirectResponse( "", $request->getQueryParams() ) + ); + } + } + + $reqOpts = $attribs['envOptions'] + [ + 'pageBundle' => $needsPageBundle, + // When substing, set data-parsoid to be discarded, so that the subst'ed + // content is considered new when it comes back. + 'discardDataParsoid' => $doSubst, + 'contentmodel' => $opts['contentmodel'] ?? null, + ]; + + // VE, the only client using body_only property, + // doesn't want section tags when this flag is set. + // (T181226) + if ( $attribs['body_only'] ) { + $reqOpts['wrapSections'] = false; + $reqOpts['body_only'] = true; + } + + if ( $wikitext === null && $oldid !== null ) { + $reqOpts['logLinterData'] = true; + $mstr = 'pageWithOldid'; + } else { + $mstr = 'wt'; + } + + // XXX: Not necessary, since it's in the pageConfig + // if ( isset( $attribs['pagelanguage'] ) ) { + // $reqOpts['pagelanguage'] = $attribs['pagelanguage']; + // } + + $timing->end( "wt2html.$mstr.init" ); + $metrics->timing( + "wt2html.$mstr.size.input", + strlen( $pageConfig->getPageMainContent() ) + ); + $parseTiming = Timing::start( $metrics ); + + if ( $format === ParsoidFormatHelper::FORMAT_LINT ) { + try { + $lints = $parsoid->wikitext2lint( $pageConfig, $reqOpts ); + } catch ( ClientError $e ) { + throw new HttpException( $e->getMessage(), 400 ); + } catch ( ResourceLimitExceededException $e ) { + throw new HttpException( $e->getMessage(), 413 ); + } + $response = $this->getResponseFactory()->createJson( $lints ); + } else { + $parserOutput = new ParserOutput(); + try { + $out = $parsoid->wikitext2html( + $pageConfig, $reqOpts, $headers, $parserOutput + ); + } catch ( ClientError $e ) { + throw new HttpException( $e->getMessage(), 400 ); + } catch ( ResourceLimitExceededException $e ) { + throw new HttpException( $e->getMessage(), 413 ); + } + if ( $needsPageBundle ) { + $response = $this->getResponseFactory()->createJson( $out->responseData() ); + ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, + $out->version ); + } else { + $response = $this->getResponseFactory()->create(); + ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, + $attribs['envOptions']['outputContentVersion'] ); + $response->getBody()->write( $out ); + $response->setHeader( 'Content-Language', $headers['content-language'] ); + $response->addHeader( 'Vary', $headers['vary'] ); + } + if ( $request->getMethod() === 'GET' ) { + $tid = UIDGenerator::newUUIDv1(); + $response->addHeader( 'Etag', "W/\"{$oldid}/{$tid}\"" ); + } + + // FIXME: For pagebundle requests, this can be somewhat inflated + // because of pagebundle json-encoding overheads + $outSize = $response->getBody()->getSize(); + $parseTime = $parseTiming->end( "wt2html.$mstr.parse" ); + $timing->end( 'wt2html.total' ); + $metrics->timing( "wt2html.$mstr.size.output", $outSize ); + + // Ignore slow parse metrics for non-oldid parses + if ( $mstr === 'pageWithOldid' ) { + if ( $parseTime > 3000 ) { + LoggerFactory::getInstance( 'slow-parsoid' ) + ->info( 'Parsing {title} was slow, took {time} seconds', [ + 'time' => number_format( $parseTime / 1000, 2 ), + 'title' => $pageConfig->getTitle(), + ] ); + } + + if ( $parseTime > 10 && $outSize > 100 ) { + // * Don't bother with this metric for really small parse times + // p99 for initialization time is ~7ms according to grafana. + // So, 10ms ensures that startup overheads don't skew the metrics + // * For body_only=false requests, section isn't generated + // and if the output is small, per-request overheads can skew + // the timePerKB metrics. + + // FIXME: This is slightly misleading since there are fixed costs + // for generating output like the section and should be factored in, + // but this is good enough for now as a useful first degree of approxmation. + $timePerKB = $parseTime * 1024 / $outSize; + $metrics->timing( 'wt2html.timePerKB', $timePerKB ); + + if ( $timePerKB > 500 ) { + // At 100ms/KB, even a 100KB page which isn't that large will take 10s. + // So, we probably want to shoot for a threshold under 100ms. + // But, let's start with 500ms+ outliers first and see what we uncover. + LoggerFactory::getInstance( 'slow-parsoid' ) + ->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [ + 'time' => number_format( $parseTime / 1000, 2 ), + 'timePerKB' => number_format( $timePerKB, 1 ), + 'title' => $pageConfig->getTitle(), + ] ); + } + } + } + } + + if ( $wikitext !== null ) { + // Don't cache requests when wt is set in case somebody uses + // GET for wikitext parsing + $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); + } elseif ( $oldid !== null ) { + // FIXME this should be handled in core (cf OutputPage::sendCacheControl) + if ( $request->getHeaderLine( 'Cookie' ) || + $request->getHeaderLine( 'Authorization' ) ) { + // Don't cache requests with a session. + $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); + } + // Indicate the MediaWiki revision in a header as well for + // ease of extraction in clients. + $response->setHeader( 'Content-Revision-Id', $oldid ); + } else { + throw new LogicException( 'Should be unreachable' ); + } + return $response; + } + + /** + * HTML -> wikitext helper. + * + * @param PageConfig $pageConfig + * @param array $attribs Request attributes from getRequestAttributes() + * @param string $html HTML to transform + * @return Response + */ + protected function html2wt( + PageConfig $pageConfig, array $attribs, string $html + ) { + $request = $this->getRequest(); + $opts = $attribs['opts']; + $envOptions = $attribs['envOptions']; + $metrics = $this->metrics; + + // Performance Timing options + $timing = Timing::start( $metrics ); + + try { + $doc = DOMUtils::parseHTML( $html, true ); + } catch ( ClientError $e ) { + throw new HttpException( $e->getMessage(), 400 ); + } + + // FIXME: Should perhaps be strlen instead + $htmlSize = mb_strlen( $html ); + + // Send input size to statsd/Graphite + $metrics->timing( 'html2wt.size.input', $htmlSize ); + + $original = $opts['original'] ?? null; + $oldBody = null; + $origPb = null; + + // Get the content version of the edited doc, if available + $vEdited = DOMUtils::extractInlinedContentVersion( $doc ); + + // Check for version mismatches between original & edited doc + if ( !isset( $original['html'] ) ) { + $envOptions['inputContentVersion'] = $vEdited ?? Parsoid::defaultHTMLVersion(); + } else { + $vOriginal = ParsoidFormatHelper::parseContentTypeHeader( + $original['html']['headers']['content-type'] ?? '' ); + if ( $vOriginal === null ) { + throw new HttpException( + 'Content-type of original html is missing.', 400 + ); + } + if ( $vEdited === null ) { + // If version of edited doc is unavailable we assume + // the edited doc is derived from the original doc. + // No downgrade necessary + $envOptions['inputContentVersion'] = $vOriginal; + } elseif ( $vEdited === $vOriginal ) { + // No downgrade necessary + $envOptions['inputContentVersion'] = $vOriginal; + } else { + $envOptions['inputContentVersion'] = $vEdited; + // We need to downgrade the original to match the edited doc's version. + $downgrade = Parsoid::findDowngrade( $vOriginal, $vEdited ); + // Downgrades are only for pagebundle + if ( $downgrade && $opts['from'] === ParsoidFormatHelper::FORMAT_PAGEBUNDLE ) { + $metrics->increment( + "downgrade.from.{$downgrade['from']}.to.{$downgrade['to']}" + ); + $origPb = new PageBundle( + $original['html']['body'], + $original['data-parsoid']['body'] ?? null, + $original['data-mw']['body'] ?? null + ); + $this->validatePb( $origPb, $vOriginal ); + $downgradeTiming = Timing::start( $metrics ); + Parsoid::downgrade( $downgrade, $origPb ); + $downgradeTiming->end( 'downgrade.time' ); + $oldBody = DOMCompat::getBody( DOMUtils::parseHTML( $origPb->html ) ); + } else { + throw new HttpException( + "Modified ({$vEdited}) and original ({$vOriginal}) html are of " + . 'different type, and no path to downgrade.', 400 + ); + } + } + } + + $metrics->increment( + 'html2wt.original.version.' . $envOptions['inputContentVersion'] + ); + if ( !$vEdited ) { + $metrics->increment( 'html2wt.original.version.notinline' ); + } + + // If available, the modified data-mw blob is applied, while preserving + // existing inline data-mw. But, no data-parsoid application, since + // that's internal, we only expect to find it in its original, + // unmodified form. + if ( $opts['from'] === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && isset( $opts['data-mw'] ) + && Semver::satisfies( $envOptions['inputContentVersion'], '^999.0.0' ) + ) { + // `opts` isn't a revision, but we'll find a `data-mw` there. + $pb = new PageBundle( '', + [ 'ids' => [] ], // So it validates + $opts['data-mw']['body'] ?? null ); + $this->validatePb( $pb, $envOptions['inputContentVersion'] ); + PageBundle::apply( $doc, $pb ); + } + + $oldhtml = null; + + if ( $original ) { + if ( $opts['from'] === ParsoidFormatHelper::FORMAT_PAGEBUNDLE ) { + // Apply the pagebundle to the parsed doc. This supports the + // simple edit scenarios where data-mw might not necessarily + // have been retrieved. + if ( !$origPb ) { + $origPb = new PageBundle( '', $original['data-parsoid']['body'] ?? null, + $original['data-mw']['body'] ?? null ); + } + + // Verify that the top-level parsoid object either doesn't contain + // offsetType, or that it matches the conversion that has been + // explicitly requested. + if ( isset( $origPb->parsoid['offsetType'] ) ) { + $offsetType = $envOptions['offsetType'] ?? 'byte'; + // @phan-suppress-next-line PhanTypeArraySuspiciousNullable + $origOffsetType = $origPb->parsoid['offsetType']; + if ( $origOffsetType !== $offsetType ) { + throw new HttpException( + 'DSR offsetType mismatch: ' . + $origOffsetType . ' vs ' . $offsetType, 406 + ); + } + } + + $pb = $origPb; + // However, if a modified data-mw was provided, + // original data-mw is omitted to avoid losing deletions. + if ( isset( $opts['data-mw'] ) + && Semver::satisfies( $envOptions['inputContentVersion'], '^999.0.0' ) + ) { + // Don't modify `origPb`, it's used below. + $pb = new PageBundle( '', $pb->parsoid, [ 'ids' => [] ] ); + } + $this->validatePb( $pb, $envOptions['inputContentVersion'] ); + PageBundle::apply( $doc, $pb ); + } + + // If we got original html, parse it + if ( isset( $original['html'] ) ) { + if ( !$oldBody ) { + $oldBody = DOMCompat::getBody( DOMUtils::parseHTML( $original['html']['body'] ) ); + } + if ( $opts['from'] === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && $origPb !== null ) { + $this->validatePb( $origPb, $envOptions['inputContentVersion'] ); + // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType + PageBundle::apply( $oldBody->ownerDocument, $origPb ); + } + // @phan-suppress-next-line PhanTypeMismatchArgumentNullable + $oldhtml = ContentUtils::toXML( $oldBody ); + } + } + + // As per https://www.mediawiki.org/wiki/Parsoid/API#v1_API_entry_points + // "Both it and the oldid parameter are needed for + // clean round-tripping of HTML retrieved earlier with" + // So, no oldid => no selser + $hasOldId = ( $attribs['oldid'] !== null ); + + if ( $hasOldId && !empty( $this->parsoidSettings['useSelser'] ) ) { + if ( !$pageConfig->getRevisionContent() ) { + throw new HttpException( + 'Could not find previous revision. Has the page been locked / deleted?', 409 + ); + } + + // FIXME: T234548/T234549 - $pageConfig->getPageMainContent() is deprecated: + // should use $env->topFrame->getSrcText() + $selserData = new SelserData( $pageConfig->getPageMainContent(), $oldhtml ); + } else { + $selserData = null; + } + + $parsoid = new Parsoid( $this->siteConfig, $this->dataAccess ); + + $timing->end( 'html2wt.init' ); + + try { + $wikitext = $parsoid->dom2wikitext( $pageConfig, $doc, [ + 'inputContentVersion' => $envOptions['inputContentVersion'], + 'offsetType' => $envOptions['offsetType'], + 'contentmodel' => $opts['contentmodel'] ?? null, + 'htmlSize' => $htmlSize, + ], $selserData ); + } catch ( ClientError $e ) { + throw new HttpException( $e->getMessage(), 400 ); + } catch ( ResourceLimitExceededException $e ) { + throw new HttpException( $e->getMessage(), 413 ); + } + + if ( $html ) { // Avoid division by zero + $total = $timing->end( 'html2wt.total' ); + $metrics->timing( 'html2wt.size.output', strlen( $wikitext ) ); + $metrics->timing( 'html2wt.timePerInputKB', $total * 1024 / strlen( $html ) ); + } + + $response = $this->getResponseFactory()->create(); + ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_WIKITEXT ); + $response->getBody()->write( $wikitext ); + return $response; + } + + /** + * Pagebundle -> pagebundle helper. + * + * @param array $attribs + * @return Response + * @throws HttpException + */ + protected function pb2pb( array $attribs ) { + $request = $this->getRequest(); + $opts = $attribs['opts']; + + $revision = $opts['previous'] ?? $opts['original'] ?? null; + if ( !isset( $revision['html'] ) ) { + throw new HttpException( + 'Missing revision html.', 400 + ); + } + + $vOriginal = ParsoidFormatHelper::parseContentTypeHeader( + $revision['html']['headers']['content-type'] ?? '' ); + if ( $vOriginal === null ) { + throw new HttpException( + 'Content-type of revision html is missing.', 400 + ); + } + $attribs['envOptions']['inputContentVersion'] = $vOriginal; + '@phan-var array $attribs'; // @var array $attribs + + $this->metrics->increment( + 'pb2pb.original.version.' . $attribs['envOptions']['inputContentVersion'] + ); + + if ( !empty( $opts['updates'] ) ) { + // FIXME: Handling missing revisions uniformly for all update types + // is not probably the right thing to do but probably okay for now. + // This might need revisiting as we add newer types. + $pageConfig = $this->tryToCreatePageConfig( $attribs, null, true ); + // If we're only updating parts of the original version, it should + // satisfy the requested content version, since we'll be returning + // that same one. + // FIXME: Since this endpoint applies the acceptable middleware, + // `getOutputContentVersion` is not what's been passed in, but what + // can be produced. Maybe that should be selectively applied so + // that we can update older versions where it makes sense? + // Uncommenting below implies that we can only update the latest + // version, since carrot semantics is applied in both directions. + // if ( !Semver::satisfies( + // $attribs['envOptions']['inputContentVersion'], + // "^{$attribs['envOptions']['outputContentVersion']}" + // ) ) { + // throw new HttpException( + // 'We do not know how to do this conversion.', 415 + // ); + // } + if ( !empty( $opts['updates']['redlinks'] ) ) { + // Q(arlolra): Should redlinks be more complex than a bool? + // See gwicke's proposal at T114413#2240381 + return $this->updateRedLinks( $pageConfig, $attribs, $revision ); + } elseif ( isset( $opts['updates']['variant'] ) ) { + return $this->languageConversion( $pageConfig, $attribs, $revision ); + } else { + throw new HttpException( + 'Unknown transformation.', 400 + ); + } + } + + // TODO(arlolra): subbu has some sage advice in T114413#2365456 that + // we should probably be more explicit about the pb2pb conversion + // requested rather than this increasingly complex fallback logic. + $downgrade = Parsoid::findDowngrade( + $attribs['envOptions']['inputContentVersion'], + $attribs['envOptions']['outputContentVersion'] + ); + if ( $downgrade ) { + $pb = new PageBundle( + $revision['html']['body'], + $revision['data-parsoid']['body'] ?? null, + $revision['data-mw']['body'] ?? null + ); + $this->validatePb( $pb, $attribs['envOptions']['inputContentVersion'] ); + Parsoid::downgrade( $downgrade, $pb ); + + if ( !empty( $attribs['body_only'] ) ) { + $doc = DOMUtils::parseHTML( $pb->html ); + $body = DOMCompat::getBody( $doc ); + $pb->html = ContentUtils::toXML( $body, [ + 'innerXML' => true, + ] ); + } + + $response = $this->getResponseFactory()->createJson( $pb->responseData() ); + ParsoidFormatHelper::setContentType( + $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $pb->version + ); + return $response; + // Ensure we only reuse from semantically similar content versions. + } elseif ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'], + '^' . $attribs['envOptions']['inputContentVersion'] ) ) { + $pageConfig = $this->tryToCreatePageConfig( $attribs ); + return $this->wt2html( $pageConfig, $attribs ); + } else { + throw new HttpException( + 'We do not know how to do this conversion.', 415 + ); + } + } + + /** + * Update red links on a document. + * + * @param PageConfig $pageConfig + * @param array $attribs + * @param array $revision + * @return Response + */ + protected function updateRedLinks( + PageConfig $pageConfig, array $attribs, array $revision + ) { + $parsoid = new Parsoid( $this->siteConfig, $this->dataAccess ); + + $pb = new PageBundle( + $revision['html']['body'], + $revision['data-parsoid']['body'] ?? null, + $revision['data-mw']['body'] ?? null, + $attribs['envOptions']['inputContentVersion'], + $revision['html']['headers'] ?? null, + $revision['contentmodel'] ?? null + ); + + $out = $parsoid->pb2pb( + $pageConfig, 'redlinks', $pb, [] + ); + + $this->validatePb( $out, $attribs['envOptions']['inputContentVersion'] ); + + $response = $this->getResponseFactory()->createJson( $out->responseData() ); + ParsoidFormatHelper::setContentType( + $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version + ); + return $response; + } + + /** + * Do variant conversion on a document. + * + * @param PageConfig $pageConfig + * @param array $attribs + * @param array $revision + * @return Response + * @throws HttpException + */ + protected function languageConversion( + PageConfig $pageConfig, array $attribs, array $revision + ) { + $opts = $attribs['opts']; + $source = $opts['updates']['variant']['source'] ?? null; + $target = $opts['updates']['variant']['target'] ?? + $attribs['envOptions']['htmlVariantLanguage']; + + if ( !$target ) { + throw new HttpException( + 'Target variant is required.', 400 + ); + } + + if ( !$this->siteConfig->langConverterEnabledForLanguage( + $pageConfig->getPageLanguage() + ) ) { + throw new HttpException( + 'LanguageConversion is not enabled on this article.', 400 + ); + } + + $parsoid = new Parsoid( $this->siteConfig, $this->dataAccess ); + + $pb = new PageBundle( + $revision['html']['body'], + $revision['data-parsoid']['body'] ?? null, + $revision['data-mw']['body'] ?? null, + $attribs['envOptions']['inputContentVersion'], + $revision['html']['headers'] ?? null, + $revision['contentmodel'] ?? null + ); + $out = $parsoid->pb2pb( + $pageConfig, 'variant', $pb, + [ + 'variant' => [ + 'source' => $source, + 'target' => $target, + ] + ] + ); + + $response = $this->getResponseFactory()->createJson( $out->responseData() ); + ParsoidFormatHelper::setContentType( + $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version + ); + return $response; + } + + /** @inheritDoc */ + abstract public function execute(): Response; + + /** + * Validate a PageBundle against the given contentVersion, and throw + * an HttpException if it does not match. + * @param PageBundle $pb + * @param string $contentVersion + * @throws HttpException + */ + private function validatePb( PageBundle $pb, string $contentVersion ): void { + $errorMessage = ''; + if ( !$pb->validate( $contentVersion, $errorMessage ) ) { + throw new HttpException( $errorMessage, 400 ); + } + } + +} diff --git a/includes/Rest/Handler/TransformHandler.php b/includes/Rest/Handler/TransformHandler.php new file mode 100644 index 00000000000..77b5a4304c8 --- /dev/null +++ b/includes/Rest/Handler/TransformHandler.php @@ -0,0 +1,129 @@ + [ self::PARAM_SOURCE => 'path', + ParamValidator::PARAM_TYPE => 'string', + ParamValidator::PARAM_REQUIRED => true, ], + 'from' => [ self::PARAM_SOURCE => 'path', + ParamValidator::PARAM_TYPE => 'string', + ParamValidator::PARAM_REQUIRED => true, ], + 'format' => [ self::PARAM_SOURCE => 'path', + ParamValidator::PARAM_TYPE => 'string', + ParamValidator::PARAM_REQUIRED => true, ], + 'title' => [ self::PARAM_SOURCE => 'path', + ParamValidator::PARAM_TYPE => 'string', + ParamValidator::PARAM_REQUIRED => false, ], + 'revision' => [ self::PARAM_SOURCE => 'path', + ParamValidator::PARAM_TYPE => 'string', + ParamValidator::PARAM_REQUIRED => false, ], ]; + } + + /** + * Transform content given in the request from or to wikitext. + * + * @return Response + * @throws HttpException + */ + public function execute(): Response { + $request = $this->getRequest(); + $from = $request->getPathParam( 'from' ); + $format = $request->getPathParam( 'format' ); + if ( !isset( ParsoidFormatHelper::VALID_TRANSFORM[$from] ) || !in_array( $format, + ParsoidFormatHelper::VALID_TRANSFORM[$from], + true ) ) { + throw new HttpException( "Invalid transform: ${from}/to/${format}", + 404 ); + } + $attribs = &$this->getRequestAttributes(); + if ( !$this->acceptable( $attribs ) ) { // mutates $attribs + throw new HttpException( 'Not acceptable', + 406 ); + } + if ( $from === ParsoidFormatHelper::FORMAT_WIKITEXT ) { + // Accept wikitext as a string or object{body,headers} + $wikitext = $attribs['opts']['wikitext'] ?? null; + if ( is_array( $wikitext ) ) { + $wikitext = $wikitext['body']; + // We've been given a pagelanguage for this page. + if ( isset( $attribs['opts']['wikitext']['headers']['content-language'] ) ) { + $attribs['pagelanguage'] = $attribs['opts']['wikitext']['headers']['content-language']; + } + } + // We've been given source for this page + if ( $wikitext === null && isset( $attribs['opts']['original']['wikitext'] ) ) { + $wikitext = $attribs['opts']['original']['wikitext']['body']; + // We've been given a pagelanguage for this page. + if ( isset( $attribs['opts']['original']['wikitext']['headers']['content-language'] ) ) { + $attribs['pagelanguage'] = $attribs['opts']['original']['wikitext']['headers']['content-language']; + } + } + // Abort if no wikitext or title. + if ( $wikitext === null && $attribs['titleMissing'] ) { + throw new HttpException( 'No title or wikitext was provided.', + 400 ); + } + $pageConfig = $this->tryToCreatePageConfig( $attribs, + $wikitext ); + + return $this->wt2html( $pageConfig, + $attribs, + $wikitext ); + } elseif ( $format === ParsoidFormatHelper::FORMAT_WIKITEXT ) { + $html = $attribs['opts']['html'] ?? null; + // Accept html as a string or object{body,headers} + if ( is_array( $html ) ) { + $html = $html['body']; + } + if ( $html === null ) { + throw new HttpException( 'No html was supplied.', + 400 ); + } + $wikitext = $attribs['opts']['original']['wikitext']['body'] ?? null; + $pageConfig = $this->tryToCreatePageConfig( $attribs, + $wikitext, + true ); + + return $this->html2wt( $pageConfig, + $attribs, + $html ); + } else { + return $this->pb2pb( $attribs ); + } + } +} diff --git a/includes/Rest/coreDevelopmentRoutes.json b/includes/Rest/coreDevelopmentRoutes.json index 70a5b0358dc..9f13e643485 100644 --- a/includes/Rest/coreDevelopmentRoutes.json +++ b/includes/Rest/coreDevelopmentRoutes.json @@ -34,5 +34,23 @@ "UserNameUtils" ], "mode": "user" + }, + { + "method": "POST", + "path": "/coredev/v0/{domain}/transform/{from}/to/{format}/{title}/{revision}", + "class": "MediaWiki\\Rest\\Handler\\TransformHandler", + "factory": "MediaWiki\\Rest\\Handler\\TransformHandler::factory" + }, + { + "method": "POST", + "path": "/coredev/v0/{domain}/transform/{from}/to/{format}", + "class": "MediaWiki\\Rest\\Handler\\TransformHandler", + "factory": "MediaWiki\\Rest\\Handler\\TransformHandler::factory" + }, + { + "method": "POST", + "path": "/coredev/v0/{domain}/transform/{from}/to/{format}/{title}", + "class": "MediaWiki\\Rest\\Handler\\TransformHandler", + "factory": "MediaWiki\\Rest\\Handler\\TransformHandler::factory" } ] diff --git a/package-lock.json b/package-lock.json index 7af5ea70ce7..13dc6f9853b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "@wdio/mocha-framework": "7.16.13", "@wdio/spec-reporter": "7.16.13", "api-testing": "1.5.0", + "domino": "2.1.0", "dotenv": "8.2.0", "eslint-config-wikimedia": "0.22.1", "grunt": "1.5.2", @@ -2782,6 +2783,12 @@ "url": "https://github.com/fb55/domhandler?sponsor=1" } }, + "node_modules/domino": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/domino/-/domino-2.1.0.tgz", + "integrity": "sha512-xINSODvrnuQcm3eXJN4IkBR+JxqLrJN8Ge4fd00y1b7HsY0A4huKN5BflSS/oo8quBWmocTfWdFvrw2H8TjGqQ==", + "dev": true + }, "node_modules/domutils": { "version": "2.8.0", "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", @@ -10289,7 +10296,7 @@ } }, "tests/selenium/wdio-mediawiki": { - "version": "2.0.0", + "version": "2.1.0", "dev": true, "license": "MIT", "dependencies": { @@ -12507,6 +12514,12 @@ "domelementtype": "^2.2.0" } }, + "domino": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/domino/-/domino-2.1.0.tgz", + "integrity": "sha512-xINSODvrnuQcm3eXJN4IkBR+JxqLrJN8Ge4fd00y1b7HsY0A4huKN5BflSS/oo8quBWmocTfWdFvrw2H8TjGqQ==", + "dev": true + }, "domutils": { "version": "2.8.0", "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", diff --git a/package.json b/package.json index 83de4776a27..48636c2c16a 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "@wdio/mocha-framework": "7.16.13", "@wdio/spec-reporter": "7.16.13", "api-testing": "1.5.0", + "domino": "2.1.0", "dotenv": "8.2.0", "eslint-config-wikimedia": "0.22.1", "grunt": "1.5.2", diff --git a/tests/api-testing/REST/Transform.js b/tests/api-testing/REST/Transform.js new file mode 100644 index 00000000000..8b7ac562e8c --- /dev/null +++ b/tests/api-testing/REST/Transform.js @@ -0,0 +1,2192 @@ +/** Cases for testing the Parsoid API through HTTP */ + +'use strict'; + +const { action, assert, REST, utils } = require( 'api-testing' ); + +const domino = require( 'domino' ); +const should = require( 'chai' ).should(); +const semver = require( 'semver' ); +const url = require( 'url' ); + +const parsoidOptions = { + limits: { + wt2html: { maxWikitextSize: 20000 }, + html2wt: { maxHTMLSize: 10000 } + } +}; + +// FIXME(T283875): These should all be re-enabled +const skipForNow = true; + +const defaultContentVersion = '2.4.0'; + +// section wrappers are a distraction from the main business of +// this file which is to verify functionality of API end points +// independent of what they are returning and computing. +// +// Verifying the correctness of content is actually the job of +// parser tests and other tests. +// +// So, hide most of that that distraction in a helper. +// +// Right now, all uses of this helper have empty lead sections. +// But, maybe in the future, this may change. So, retain the option. +function validateDoc( doc, nodeName, emptyLead ) { + const leadSection = doc.body.firstChild; + leadSection.nodeName.should.equal( 'SECTION' ); + if ( emptyLead ) { + // Could have whitespace and comments + leadSection.childElementCount.should.equal( 0 ); + } + const nonEmptySection = emptyLead ? leadSection.nextSibling : leadSection; + nonEmptySection.firstChild.nodeName.should.equal( nodeName ); +} + +function status200( res ) { + assert.strictEqual( res.status, 200, res.text ); +} + +// TODO: Replace all occurrences of (Lint Page/Lint_Page) with `page`. +describe( '/transform/ endpoint', function () { + const client = new REST(); + const parsedUrl = new url.URL( client.req.app ); + const PARSOID_URL = parsedUrl.href; + const hostname = parsedUrl.hostname; + const endpointPrefix = client.pathPrefix = `rest.php/coredev/v0/${hostname}`; + const page = utils.title( 'TransformSource ' ); + const pageEncoded = encodeURIComponent( page ); + let revid; + + before( async function () { + this.timeout( 10000 ); + + const alice = await action.alice(); + + // Create pages + let edit = await alice.edit( page, { text: '{|\nhi\n|ho\n|}' } ); + edit.result.should.equal( 'Success' ); + revid = edit.newrevid; + + edit = await alice.edit( 'JSON Page', { + text: '[1]', contentmodel: 'json' + } ); + edit.result.should.equal( 'Success' ); + } ); + + describe( 'formats', function () { + + it( 'should accept application/x-www-form-urlencoded', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .type( 'form' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( status200 ) + .expect( function ( res ) { + validateDoc( domino.createDocument( res.text ), 'H2', true ); + } ) + .end( done ); + } ); + + it( 'should accept application/json', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .type( 'json' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( status200 ) + .expect( function ( res ) { + validateDoc( domino.createDocument( res.text ), 'H2', true ); + } ) + .end( done ); + } ); + + it( 'should accept multipart/form-data', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .field( 'wikitext', '== h2 ==' ) + .expect( status200 ) + .expect( function ( res ) { + validateDoc( domino.createDocument( res.text ), 'H2', true ); + } ) + .end( done ); + } ); + } ); // formats + + const acceptableHtmlResponse = function ( contentVersion, expectFunc ) { + return function ( res ) { + res.statusCode.should.equal( 200 ); + res.headers.should.have.property( 'content-type' ); + res.headers[ 'content-type' ].should.equal( + 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + contentVersion + '"' + ); + res.text.should.not.equal( '' ); + if ( expectFunc ) { + return expectFunc( res.text ); + } + }; + }; + + const acceptablePageBundleResponse = function ( contentVersion, expectFunc ) { + return function ( res ) { + res.statusCode.should.equal( 200 ); + res.headers.should.have.property( 'content-type' ); + res.headers[ 'content-type' ].should.equal( + 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' + contentVersion + '"' + ); + res.body.should.have.property( 'html' ); + res.body.html.should.have.property( 'headers' ); + res.body.html.headers.should.have.property( 'content-type' ); + res.body.html.headers[ 'content-type' ].should.equal( + 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + contentVersion + '"' + ); + res.body.html.should.have.property( 'body' ); + res.body.should.have.property( 'data-parsoid' ); + res.body[ 'data-parsoid' ].should.have.property( 'headers' ); + res.body[ 'data-parsoid' ].headers.should.have.property( 'content-type' ); + res.body[ 'data-parsoid' ].headers[ 'content-type' ].should.equal( + 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + contentVersion + '"' + ); + res.body[ 'data-parsoid' ].should.have.property( 'body' ); + if ( semver.gte( contentVersion, '999.0.0' ) ) { + res.body.should.have.property( 'data-mw' ); + res.body[ 'data-mw' ].should.have.property( 'headers' ); + res.body[ 'data-mw' ].headers.should.have.property( 'content-type' ); + res.body[ 'data-mw' ].headers[ 'content-type' ].should.equal( + 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/data-mw/' + contentVersion + '"' + ); + res.body[ 'data-mw' ].should.have.property( 'body' ); + } + if ( expectFunc ) { + return expectFunc( res.body.html.body ); + } + }; + }; + + describe( 'accepts', function () { + + it( 'should not accept requests for older content versions (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/0.0.0"' ) + .send( { wikitext: '== h2 ==' } ) + .expect( 406 ) + .expect( function ( res ) { + // FIXME: See skipped html error test above + JSON.parse( res.error.text ).message.should.equal( + 'Not acceptable' + ); + } ) + .end( done ); + } ); + + it( 'should not accept requests for older content versions (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/HTML/0.0.0"' ) + .send( { wikitext: '== h2 ==' } ) + .expect( 406 ) + .expect( function ( res ) { + JSON.parse( res.error.text ).message.should.equal( + 'Not acceptable' + ); + } ) + .end( done ); + } ); + + it( 'should not accept requests for other profiles (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', 'text/html; profile="something different"' ) + .send( { wikitext: '== h2 ==' } ) + .expect( 406 ) + .end( done ); + } ); + + it( 'should not accept requests for other profiles (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="something different"' ) + .send( { wikitext: '== h2 ==' } ) + .expect( 406 ) + .end( done ); + } ); + + it( 'should accept wildcards (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', '*/*' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( defaultContentVersion ) ) + .end( done ); + } ); + + it( 'should accept wildcards (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', '*/*' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( defaultContentVersion ) ) + .end( done ); + } ); + + it( 'should prefer higher quality (html)', function ( done ) { + const contentVersion = '999.0.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', + 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"; q=0.5,' + + 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"; q=0.8' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( contentVersion ) ) + .end( done ); + } ); + + it( 'should prefer higher quality (pagebundle)', function ( done ) { + const contentVersion = '999.0.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', + 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/2.4.0"; q=0.5,' + + 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/999.0.0"; q=0.8' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for the latest content version (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( defaultContentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for the latest content version (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { wikitext: '== h2 ==' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( defaultContentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for content version 2.x (html)', function ( done ) { + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + contentVersion + '"' ) + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( contentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for content version 2.x (pagebundle)', function ( done ) { + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' + contentVersion + '"' ) + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + // In < 999.x, data-mw is still inline. + html.should.match( /\s+data-mw\s*=\s*['"]/ ); + } ) ) + .end( done ); + } ); + + // Note that these tests aren't that useful directly after a major version bump + + it( 'should accept requests for older content version 2.x (html)', function ( done ) { + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/2.0.0"' ) // Keep this on the older version + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( contentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for older content version 2.x (pagebundle)', function ( done ) { + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/2.0.0"' ) // Keep this on the older version + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + // In < 999.x, data-mw is still inline. + html.should.match( /\s+data-mw\s*=\s*['"]/ ); + } ) ) + .end( done ); + } ); + + it( 'should sanity check 2.x content (pagebundle)', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Missing files in wiki + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' + contentVersion + '"' ) + .send( { wikitext: '[[File:Audio.oga]]' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + const doc = domino.createDocument( html ); + doc.querySelectorAll( 'audio' ).length.should.equal( 1 ); + doc.querySelectorAll( 'video' ).length.should.equal( 0 ); + } ) ) + .end( done ); + } ); + + it( 'should accept requests for content version 999.x (html)', function ( done ) { + const contentVersion = '999.0.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept', 'text/html; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + contentVersion + '"' ) + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptableHtmlResponse( contentVersion ) ) + .end( done ); + } ); + + it( 'should accept requests for content version 999.x (pagebundle)', function ( done ) { + const contentVersion = '999.0.0'; + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' + contentVersion + '"' ) + .send( { wikitext: '{{1x|hi}}' } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + // In 999.x, data-mw is in the pagebundle. + html.should.not.match( /\s+data-mw\s*=\s*['"]/ ); + } ) ) + .end( done ); + } ); + + } ); // accepts + + const validWikitextResponse = function ( expected ) { + return function ( res ) { + res.statusCode.should.equal( 200 ); + res.headers.should.have.property( 'content-type' ); + res.headers[ 'content-type' ].should.equal( + // note that express does some reordering + 'text/plain; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + ); + if ( expected !== undefined ) { + res.text.should.equal( expected ); + } else { + res.text.should.not.equal( '' ); + } + }; + }; + + const validHtmlResponse = function ( expectFunc ) { + return function ( res ) { + res.statusCode.should.equal( 200 ); + res.headers.should.have.property( 'content-type' ); + res.headers[ 'content-type' ].should.equal( + 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + ); + const doc = domino.createDocument( res.text ); + if ( expectFunc ) { + return expectFunc( doc ); + } else { + res.text.should.not.equal( '' ); + } + }; + }; + + const validPageBundleResponse = function ( expectFunc ) { + return function ( res ) { + res.statusCode.should.equal( 200 ); + res.body.should.have.property( 'html' ); + res.body.html.should.have.property( 'headers' ); + res.body.html.headers.should.have.property( 'content-type' ); + res.body.html.headers[ 'content-type' ].should.equal( + 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + ); + res.body.html.should.have.property( 'body' ); + res.body.should.have.property( 'data-parsoid' ); + res.body[ 'data-parsoid' ].should.have.property( 'headers' ); + res.body[ 'data-parsoid' ].headers.should.have.property( 'content-type' ); + res.body[ 'data-parsoid' ].headers[ 'content-type' ].should.equal( + 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + defaultContentVersion + '"' + ); + res.body[ 'data-parsoid' ].should.have.property( 'body' ); + // TODO: Check data-mw when 999.x is the default. + console.assert( !semver.gte( defaultContentVersion, '999.0.0' ) ); + const doc = domino.createDocument( res.body.html.body ); + if ( expectFunc ) { + return expectFunc( doc, res.body[ 'data-parsoid' ].body ); + } + }; + }; + + describe( 'wt2lint', function () { + + it( 'should lint the given wikitext', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Enable linting config + client.req + .post( endpointPrefix + '/transform/wikitext/to/lint/' ) + .send( { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: '{|\nhi\n|ho\n|}' + } + } ) + .expect( status200 ) + .expect( function ( res ) { + res.body.should.be.instanceof( Array ); + res.body.length.should.equal( 1 ); + res.body[ 0 ].type.should.equal( 'fostered' ); + } ) + .end( done ); + } ); + + it( 'should lint the given page, transform', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Enable linting config + client.req + .post( endpointPrefix + '/transform/wikitext/to/lint/Lint_Page/102' ) + .send( {} ) + .expect( status200 ) + .expect( function ( res ) { + res.body.should.be.instanceof( Array ); + res.body.length.should.equal( 1 ); + res.body[ 0 ].type.should.equal( 'fostered' ); + } ) + .end( done ); + } ); + + it( 'should redirect title to latest revision (lint)', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Enable linting config + client.req + .post( endpointPrefix + '/transform/wikitext/to/lint/' ) + .send( { + original: { + title: 'Lint_Page' + } + } ) + .expect( 307 ) // no revid or wikitext source provided + .expect( function ( res ) { + res.headers.should.have.property( 'location' ); + res.headers.location.should.equal( + PARSOID_URL + endpointPrefix + + '/transform/wikitext/to/lint/Lint%20Page/102' + ); + } ) + .end( done ); + } ); + + } ); + + describe( 'wt2html', function () { + + it( 'should accept wikitext as a string for html', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should accept json contentmodel as a string for html', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: '{"1":2}', + contentmodel: 'json' + } ) + .expect( validHtmlResponse( function ( doc ) { + doc.body.firstChild.nodeName.should.equal( 'TABLE' ); + } ) ) + .end( done ); + } ); + + it( 'should accept wikitext as a string for pagebundle', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( validPageBundleResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should accept json contentmodel as a string for pagebundle', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + wikitext: '{"1":2}', + contentmodel: 'json' + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.firstChild.nodeName.should.equal( 'TABLE' ); + should.not.exist( doc.querySelector( '*[typeof="mw:Error"]' ) ); + } ) ) + .end( done ); + } ); + + it( 'should accept wikitext with headers', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: '== h2 ==' + } + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should require a title when no wikitext is provided (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( {} ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should require a title when no wikitext is provided (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( {} ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should error when revision not found (transform, wt2html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/Doesnotexist' ) + .send( {} ) + .expect( 404 ) + .end( done ); + } ); + + it( 'should error when revision not found (transform, wt2pb)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/Doesnotexist' ) + .send( {} ) + .expect( 404 ) + .end( done ); + } ); + + it( 'should accept an original title (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + original: { + title: page + } + } ) + .expect( 307 ) // no revid or wikitext source provided + .expect( function ( res ) { + res.headers.should.have.property( 'location' ); + res.headers.location.should.equal( + PARSOID_URL + endpointPrefix + `/transform/wikitext/to/html/${pageEncoded}/${revid}` + ); + } ) + .end( done ); + } ); + + it( 'should accept an original title (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + original: { + title: page + } + } ) + .expect( 307 ) // no revid or wikitext source provided + .expect( function ( res ) { + res.headers.should.have.property( 'location' ); + res.headers.location.should.equal( + PARSOID_URL + endpointPrefix + `/transform/wikitext/to/pagebundle/${pageEncoded}/${revid}` + ); + } ) + .end( done ); + } ); + + it( 'should accept an original title, other than main', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + original: { + title: page + } + } ) + .expect( 307 ) // no revid or wikitext source provided + .expect( function ( res ) { + res.headers.should.have.property( 'location' ); + assert.strictEqual( res.headers.location.startsWith( PARSOID_URL + endpointPrefix + `/transform/wikitext/to/html/${pageEncoded}/` ), true ); + } ) + .end( done ); + } ); + + it( 'should not require a title when empty wikitext is provided (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: '' + } ) + .expect( validHtmlResponse( function ( doc ) { + doc.body.children.length.should.equal( 1 ); // empty lead section + doc.body.firstChild.nodeName.should.equal( 'SECTION' ); + doc.body.firstChild.children.length.should.equal( 0 ); + } ) ) + .end( done ); + } ); + + it( 'should not require a title when empty wikitext is provided (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + wikitext: '' + } ) + .expect( validPageBundleResponse() ) + .end( done ); + } ); + + it( 'should not require a title when wikitext is provided', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should not require a rev id when wikitext and a title is provided', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/Main_Page' ) + .send( { + wikitext: '== h2 ==' + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should accept the wikitext source as original data', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/Main_Page/1' ) + .send( { + original: { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: '== h2 ==' + } + } + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should use the proper source text', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Missing template 1x + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/Main_Page/1' ) + .send( { + original: { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: '{{1x|foo|bar=bat}}' + } + } + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'P', false ); + const span = doc.querySelector( 'span[typeof="mw:Transclusion"]' ); + const dmw = JSON.parse( span.getAttribute( 'data-mw' ) ); + const template = dmw.parts[ 0 ].template; + template.target.wt.should.equal( '1x' ); + template.params[ 1 ].wt.should.equal( 'foo' ); + template.params.bar.wt.should.equal( 'bat' ); + } ) ) + .end( done ); + } ); + + it( 'should accept the wikitext source as original without a title or revision', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + original: { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: '== h2 ==' + } + } + } ) + .expect( validHtmlResponse( function ( doc ) { + validateDoc( doc, 'H2', true ); + } ) ) + .end( done ); + } ); + + it( 'should respect body parameter in wikitext->html (body_only)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: "''foo''", + body_only: 1 + } ) + .expect( validHtmlResponse() ) + .expect( function ( res ) { + // v3 only returns children of + res.text.should.not.match( /pagebundle requests (body_only)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + wikitext: "''foo''", + body_only: 1 + } ) + .expect( validPageBundleResponse() ) + .expect( function ( res ) { + // v3 only returns children of + res.body.html.body.should.not.match( / should have one child,
, the lead section + body.childElementCount.should.equal( 1 ); + const p = body.firstChild.firstChild; + p.nodeName.should.equal( 'P' ); + p.innerHTML.should.equal( 'foo' ); + // The

shouldn't be a template expansion, just a plain ol' one + p.hasAttribute( 'typeof' ).should.equal( false ); + // and it shouldn't have any data-parsoid in it + p.hasAttribute( 'data-parsoid' ).should.equal( false ); + } ) ) + .end( done ); + } ); + + it( 'should implement subst - internal tranclusion', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Missing template 1x + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { wikitext: '{{1x|foo {{1x|bar}} baz}}', subst: 'true' } ) + .expect( validHtmlResponse( function ( doc ) { + const body = doc.body; + // should have one child,

, the lead section + body.childElementCount.should.equal( 1 ); + const p = body.firstChild.firstChild; + p.nodeName.should.equal( 'P' ); + // The

shouldn't be a template expansion, just a plain ol' one + p.hasAttribute( 'typeof' ).should.equal( false ); + // and it shouldn't have any data-parsoid in it + p.hasAttribute( 'data-parsoid' ).should.equal( false ); + // The internal tranclusion should be presented as such + const tplp = p.firstChild.nextSibling; + tplp.nodeName.should.equal( 'SPAN' ); + tplp.getAttribute( 'typeof' ).should.equal( 'mw:Transclusion' ); + // And not have data-parsoid, so it's used as new content + tplp.hasAttribute( 'data-parsoid' ).should.equal( false ); + } ) ) + .end( done ); + } ); + + it( 'should not allow subst with pagebundle', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { wikitext: '{{1x|foo}}', subst: 'true' } ) + .expect( 501 ) + .end( done ); + } ); + + it( 'should return a request too large error (post wt)', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Set limits in config + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + original: { + title: 'Large_Page' + }, + wikitext: 'a'.repeat( parsoidOptions.limits.wt2html.maxWikitextSize + 1 ) + } ) + .expect( 413 ) + .end( done ); + } ); + + it( 'should add redlinks for transform (html)', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Fix redlinks count, by creating pages + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .send( { + wikitext: '[[Special:Version]] [[Doesnotexist]] [[Redirected]]' + } ) + .expect( validHtmlResponse( function ( doc ) { + doc.body.querySelectorAll( 'a' ).length.should.equal( 3 ); + const redLinks = doc.body.querySelectorAll( '.new' ); + redLinks.length.should.equal( 1 ); + redLinks[ 0 ].getAttribute( 'title' ).should.equal( 'Doesnotexist' ); + const redirects = doc.body.querySelectorAll( '.mw-redirect' ); + redirects.length.should.equal( 1 ); + redirects[ 0 ].getAttribute( 'title' ).should.equal( 'Redirected' ); + } ) ) + .end( done ); + } ); + + it( 'should add redlinks for transform (pagebundle)', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Fix redlinks count, by creating pages + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .send( { + wikitext: '[[Special:Version]] [[Doesnotexist]] [[Redirected]]' + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.querySelectorAll( 'a' ).length.should.equal( 3 ); + const redLinks = doc.body.querySelectorAll( '.new' ); + redLinks.length.should.equal( 1 ); + redLinks[ 0 ].getAttribute( 'title' ).should.equal( 'Doesnotexist' ); + const redirects = doc.body.querySelectorAll( '.mw-redirect' ); + redirects.length.should.equal( 1 ); + redirects[ 0 ].getAttribute( 'title' ).should.equal( 'Redirected' ); + } ) ) + .end( done ); + } ); + + ( skipForNow ? describe.skip : describe )( 'Variant conversion', function () { + + it( 'should perform variant conversion for transform given pagelanguage in HTTP header (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept-Language', 'sr-el' ) + .set( 'Content-Language', 'sr' ) + .send( { + wikitext: 'абвг abcd x' + } ) + .expect( 'Content-Language', 'sr-el' ) + .expect( 'Vary', /\bAccept-Language\b/i ) + .expect( validHtmlResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd x' ); + } ) ) + .end( done ); + } ); + + it( 'should perform variant conversion for transform given pagelanguage in HTTP header (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept-Language', 'sr-el' ) + .set( 'Content-Language', 'sr' ) + .send( { + wikitext: 'абвг abcd x' + } ) + .expect( validPageBundleResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd x' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers.should.have.property( 'vary' ); + headers[ 'content-language' ].should.equal( 'sr-el' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + it( 'should perform variant conversion for transform given pagelanguage in JSON header (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept-Language', 'sr-el' ) + .send( { + wikitext: { + headers: { + 'content-language': 'sr' + }, + body: 'абвг abcd x' + } + } ) + .expect( 'Content-Language', 'sr-el' ) + .expect( 'Vary', /\bAccept-Language\b/i ) + .expect( validHtmlResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd x' ); + } ) ) + .end( done ); + } ); + + it( 'should perform variant conversion for transform given pagelanguage in JSON header (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept-Language', 'sr-el' ) + .send( { + wikitext: { + headers: { + 'content-language': 'sr' + }, + body: 'абвг abcd' + } + } ) + .expect( validPageBundleResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers.should.have.property( 'vary' ); + headers[ 'content-language' ].should.equal( 'sr-el' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + it( 'should perform variant conversion for transform given pagelanguage from oldid (html)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/html/' ) + .set( 'Accept-Language', 'sr-el' ) + .send( { + original: { revid: 104 }, + wikitext: { + body: 'абвг abcd x' + } + } ) + .expect( 'Content-Language', 'sr-el' ) + .expect( 'Vary', /\bAccept-Language\b/i ) + .expect( validHtmlResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd x' ); + } ) ) + .end( done ); + } ); + + it( 'should perform variant conversion for transform given pagelanguage from oldid (pagebundle)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/wikitext/to/pagebundle/' ) + .set( 'Accept-Language', 'sr-el' ) + .send( { + original: { revid: 104 }, + wikitext: 'абвг abcd' + } ) + .expect( validPageBundleResponse( ( doc ) => { + doc.body.textContent.should.equal( 'abvg abcd' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers.should.have.property( 'vary' ); + headers[ 'content-language' ].should.equal( 'sr-el' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + } ); + + } ); // end wt2html + + describe( 'html2wt', function () { + + it( 'should require html when serializing', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( {} ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should error when revision not found (transform, html2wt)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/Doesnotexist/2020' ) + .send( { + html: '

hi ho
' + } ) + .expect( 404 ) + .end( done ); + } ); + + it( 'should not error when oldid not supplied (transform, html2wt)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/Doesnotexist' ) + .send( { + html: '
hi ho
' + } ) + .expect( validWikitextResponse( ' hi ho\n' ) ) + .end( done ); + } ); + + it( 'should accept html as a string', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n' + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should accept html for json contentmodel as a string', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '\n
a4
b3
', + contentmodel: 'json' + } ) + .expect( validWikitextResponse( '{"a":4,"b":3}' ) ) + .end( done ); + } ); + + it( 'should accept html with headers', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n' + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should allow a title in the url', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/Main_Page' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n' + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should allow a title in the original data', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + title: 'Main_Page' + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should allow a revision id in the url', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/Main_Page/1' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n' + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should allow a revision id in the original data', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + revid: 1 + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should accept original wikitext as src', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + wikitext: { + headers: { + 'content-type': 'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"' + }, + body: 'MediaWiki has been successfully installed.\n\nConsult the [//meta.wikimedia.org/wiki/Help:Contents User\'s Guide] for information on using the wiki software.\n\n== Getting started ==\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:Configuration_settings Configuration settings list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:FAQ MediaWiki FAQ]\n* [https://lists.wikimedia.org/mailman/listinfo/mediawiki-announce MediaWiki release mailing list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Localisation#Translation_resources Localise MediaWiki for your language]\n' + } + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should accept original html for selser (default)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: "\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User's Guide for information on using the wiki software.

\n\n

Getting started

\n" + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + defaultContentVersion + '"' + }, + body: { + counter: 14, + ids: { + mwAA: { dsr: [ 0, 592, 0, 0 ] }, + mwAQ: { dsr: [ 0, 59, 0, 0 ] }, + mwAg: { stx: 'html', dsr: [ 0, 59, 8, 9 ] }, + mwAw: { dsr: [ 61, 171, 0, 0 ] }, + mwBA: { dsr: [ 73, 127, 41, 1 ] }, + mwBQ: { dsr: [ 173, 194, 2, 2 ] }, + mwBg: { dsr: [ 195, 592, 0, 0 ] }, + mwBw: { dsr: [ 195, 300, 1, 0 ] }, + mwCA: { dsr: [ 197, 300, 75, 1 ] }, + mwCQ: { dsr: [ 301, 373, 1, 0 ] }, + mwCg: { dsr: [ 303, 373, 56, 1 ] }, + mwCw: { dsr: [ 374, 472, 1, 0 ] }, + mwDA: { dsr: [ 376, 472, 65, 1 ] }, + mwDQ: { dsr: [ 473, 592, 1, 0 ] }, + mwDg: { dsr: [ 475, 592, 80, 1 ] } + } + } + } + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should accept original html for selser (1.1.1, meta)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + html: { + headers: { + 'content-type': 'text/html; profile="mediawiki.org/specs/html/1.1.1"' + }, + body: "\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User's Guide for information on using the wiki software.

\n\n

Getting started

\n" + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/0.0.1"' + }, + body: { + counter: 14, + ids: { + mwAA: { dsr: [ 0, 592, 0, 0 ] }, + mwAQ: { dsr: [ 0, 59, 0, 0 ] }, + mwAg: { stx: 'html', dsr: [ 0, 59, 8, 9 ] }, + mwAw: { dsr: [ 61, 171, 0, 0 ] }, + mwBA: { dsr: [ 73, 127, 41, 1 ] }, + mwBQ: { dsr: [ 173, 194, 2, 2 ] }, + mwBg: { dsr: [ 195, 592, 0, 0 ] }, + mwBw: { dsr: [ 195, 300, 1, 0 ] }, + mwCA: { dsr: [ 197, 300, 75, 1 ] }, + mwCQ: { dsr: [ 301, 373, 1, 0 ] }, + mwCg: { dsr: [ 303, 373, 56, 1 ] }, + mwCw: { dsr: [ 374, 472, 1, 0 ] }, + mwDA: { dsr: [ 376, 472, 65, 1 ] }, + mwDQ: { dsr: [ 473, 592, 1, 0 ] }, + mwDg: { dsr: [ 475, 592, 80, 1 ] } + } + } + } + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should accept original html for selser (1.1.1, headers)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + // Don't set the mw:html:version so that we get it from the original/headers + html: '\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User\'s Guide for information on using the wiki software.

\n\n

Getting started

\n', + original: { + html: { + headers: { + 'content-type': 'text/html; profile="mediawiki.org/specs/html/1.1.1"' + }, + body: "\nMain_Page

MediaWiki has been successfully installed.

\n\n

Consult the User's Guide for information on using the wiki software.

\n\n

Getting started

\n" + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/0.0.1"' + }, + body: { + counter: 14, + ids: { + mwAA: { dsr: [ 0, 592, 0, 0 ] }, + mwAQ: { dsr: [ 0, 59, 0, 0 ] }, + mwAg: { stx: 'html', dsr: [ 0, 59, 8, 9 ] }, + mwAw: { dsr: [ 61, 171, 0, 0 ] }, + mwBA: { dsr: [ 73, 127, 41, 1 ] }, + mwBQ: { dsr: [ 173, 194, 2, 2 ] }, + mwBg: { dsr: [ 195, 592, 0, 0 ] }, + mwBw: { dsr: [ 195, 300, 1, 0 ] }, + mwCA: { dsr: [ 197, 300, 75, 1 ] }, + mwCQ: { dsr: [ 301, 373, 1, 0 ] }, + mwCg: { dsr: [ 303, 373, 56, 1 ] }, + mwCw: { dsr: [ 374, 472, 1, 0 ] }, + mwDA: { dsr: [ 376, 472, 65, 1 ] }, + mwDQ: { dsr: [ 473, 592, 1, 0 ] }, + mwDg: { dsr: [ 475, 592, 80, 1 ] } + } + } + } + } + } ) + .expect( validWikitextResponse() ) + .end( done ); + } ); + + it( 'should return http 400 if supplied data-parsoid is empty', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

ho

' + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + defaultContentVersion + '"' + }, + body: {} + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + // FIXME: Pagebundle validation in general is needed + it.skip( 'should return http 400 if supplied data-parsoid is a string', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

ho

' + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + defaultContentVersion + '"' + }, + body: 'Garbled text from RESTBase.' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + // The following three tests should all serialize as: + // "
Selser test" + // However, we're deliberately setting the original wikitext in + // the first two to garbage so that when selser doesn't detect any + // difference between the new and old html, it'll just reuse that + // string and we have a reliable way of determining that selser + // was used. + + it( 'should use selser with supplied wikitext', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Create Junk Page + // New and old html are identical, which should produce no diffs + // and reuse the original wikitext. + client.req + // Need to provide an oldid so that selser mode is enabled + // Without an oldid, serialization falls back to non-selser wts. + // The oldid is used to fetch wikitext, but if wikitext is provided + // (as in this test), it is not used. So, for testing purposes, + // we can use any old random id, as long as something is present. + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '
Selser test
', + original: { + title: 'Junk Page', + revid: 1234, + wikitext: { + body: '1. This is just some junk. See the comment above.' + }, + html: { + body: '
Selser test
', + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + } + }, + 'data-parsoid': { + body: { + ids: { + mwAA: {}, + mwBB: { autoInsertedEnd: true, stx: 'html' } + } + } + } + } + } ) + .expect( validWikitextResponse( + '1. This is just some junk. See the comment above.' + ) ) + .end( done ); + } ); + + it( 'should use selser with wikitext fetched from the mw api', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Creat Junk Page + // New and old html are identical, which should produce no diffs + // and reuse the original wikitext. + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '
Selser test
', + original: { + revid: 2, + title: 'Junk Page', + html: { + body: '
Selser test
', + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + } + }, + 'data-parsoid': { + body: { + ids: { + mwAA: {}, + mwBB: { autoInsertedEnd: true, stx: 'html' } + } + } + } + } + } ) + .expect( validWikitextResponse( + '2. This is just some junk. See the comment above.' + ) ) + .end( done ); + } ); + + it( 'should fallback to non-selective serialization', function ( done ) { + // Without the original wikitext and an unavailable + // TemplateFetch for the source (no revision id provided), + // it should fallback to non-selective serialization. + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '
Selser test
', + original: { + title: 'Junk Page', + html: { + body: '
Selser test
', + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + } + }, + 'data-parsoid': { + body: { + ids: { + mwAA: {}, + mwBB: { autoInsertedEnd: true, stx: 'html' } + } + } + } + } + } ) + .expect( validWikitextResponse( + '
Selser test' + ) ) + .end( done ); + } ); + + it( 'should apply data-parsoid to duplicated ids', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '
data-parsoid test
data-parsoid test
', + original: { + title: 'Doesnotexist', + html: { + body: '
data-parsoid test
', + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + } + }, + 'data-parsoid': { + body: { + ids: { + mwAA: {}, + mwBB: { autoInsertedEnd: true, stx: 'html' } + } + } + } + } + } ) + .expect( validWikitextResponse( + '
data-parsoid test
data-parsoid test' + ) ) + .end( done ); + } ); + + it( 'should return a 400 for missing inline data-mw (2.x)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"' + }, + body: '

ho

' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should return a 400 for not supplying data-mw', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

ho

' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should apply original data-mw', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + 'data-mw': { + body: { + ids: { + mwAQ: { + parts: [ { + template: { + target: { wt: '1x', href: './Template:1x' }, + params: { 1: { wt: 'hi' } }, + i: 0 + } + } ] + } + } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

ho

' + } + } + } ) + .expect( validWikitextResponse( '{{1x|hi}}' ) ) + .end( done ); + } ); + + // Sanity check data-mw was applied in the previous test + it( 'should return a 400 for missing modified data-mw', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + 'data-mw': { + body: { + ids: { mwAQ: {} } // Missing data-mw.parts! + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

ho

' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should give precedence to inline data-mw over original', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + 'data-mw': { + body: { + ids: { mwAQ: {} } // Missing data-mw.parts! + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

ho

' + } + } + } ) + .expect( validWikitextResponse( '{{1x|hi}}' ) ) + .end( done ); + } ); + + it( 'should not apply original data-mw if modified is supplied', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

hi

', + 'data-mw': { + body: { + ids: { + mwAQ: { + parts: [ { + template: { + target: { wt: '1x', href: './Template:1x' }, + params: { 1: { wt: 'hi' } }, + i: 0 + } + } ] + } + } + } + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + 'data-mw': { + body: { + ids: { mwAQ: {} } // Missing data-mw.parts! + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

ho

' + } + } + } ) + .expect( validWikitextResponse( '{{1x|hi}}' ) ) + .end( done ); + } ); + + // The next three tests, although redundant with the above precedence + // tests, are an attempt to show clients the semantics of separate + // data-mw in the API. The main idea is, + // + // non-inline-data-mw = modified || original + // inline-data-mw > non-inline-data-mw + + it( 'should apply original data-mw when modified is absent (captions 1)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

', + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { + mwAg: { optList: [ { ck: 'caption', ak: 'Testing 123' } ] }, + mwAw: { a: { href: './File:Foobar.jpg' }, sa: {} }, + mwBA: { + a: { resource: './File:Foobar.jpg', height: '28', width: '240' }, + sa: { resource: 'File:Foobar.jpg' } + } + } + } + }, + 'data-mw': { + body: { + ids: { + mwAg: { caption: 'Testing 123' } + } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

' + } + } + } ) + .expect( validWikitextResponse( '[[File:Foobar.jpg|Testing 123]]' ) ) + .end( done ); + } ); + + it( 'should give precedence to inline data-mw over modified (captions 2)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

', + 'data-mw': { + body: { + ids: { + mwAg: { caption: 'Testing 123' } + } + } + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { + mwAg: { optList: [ { ck: 'caption', ak: 'Testing 123' } ] }, + mwAw: { a: { href: './File:Foobar.jpg' }, sa: {} }, + mwBA: { + a: { resource: './File:Foobar.jpg', height: '28', width: '240' }, + sa: { resource: 'File:Foobar.jpg' } + } + } + } + }, + 'data-mw': { + body: { + ids: { + mwAg: { caption: 'Testing 123' } + } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

' + } + } + } ) + .expect( validWikitextResponse( '[[File:Foobar.jpg]]' ) ) + .end( done ); + } ); + + it( 'should give precedence to modified data-mw over original (captions 3)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '

', + 'data-mw': { + body: { + ids: { + mwAg: {} + } + } + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { + mwAg: { optList: [ { ck: 'caption', ak: 'Testing 123' } ] }, + mwAw: { a: { href: './File:Foobar.jpg' }, sa: {} }, + mwBA: { + a: { resource: './File:Foobar.jpg', height: '28', width: '240' }, + sa: { resource: 'File:Foobar.jpg' } + } + } + } + }, + 'data-mw': { + body: { + ids: { + mwAg: { caption: 'Testing 123' } + } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '

' + } + } + } ) + .expect( validWikitextResponse( '[[File:Foobar.jpg]]' ) ) + .end( done ); + } ); + + it( 'should apply extra normalizations', function ( done ) { + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + html: '

', + original: { title: 'Doesnotexist' } + } ) + .expect( validWikitextResponse( + '' + ) ) + .end( done ); + } ); + + it( 'should return a request too large error', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Set limits in config + client.req + .post( endpointPrefix + '/transform/html/to/wikitext/' ) + .send( { + original: { + title: 'Large_Page' + }, + html: 'a'.repeat( parsoidOptions.limits.html2wt.maxHTMLSize + 1 ) + } ) + .expect( 413 ) + .end( done ); + } ); + + it( 'should fail to downgrade the original version for an unknown transition', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/wikitext/' ) + .send( { + html: '\n123', + original: { + title: 'Doesnotexist', + 'data-parsoid': { body: { ids: {} } }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2222.0.0"' + }, + body: '\n123' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + } ); // end html2wt + + describe( 'pb2pb', function () { + + it( 'should require an original or previous version', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/Reuse_Page/100' ) + .send( {} ) + .expect( 400 ) + .end( done ); + } ); + + const previousRevHTML = { + revid: 99, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

pink

' + }, + 'data-parsoid': { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' + defaultContentVersion + '"' + }, + body: { + counter: 2, + ids: { + mwAg: { pi: [ [] ], src: '{{colours of the rainbow}}' } // artificially added src + } + } + } + }; + + it( 'should error when revision not found (transform, pb2pb)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/Doesnotexist' ) + .send( { + previous: previousRevHTML + } ) + .expect( 404 ) + .end( done ); + } ); + + // FIXME: Expansion reuse wasn't ported, see T98995 + it.skip( 'should accept the previous revision to reuse expansions', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/Reuse_Page/100' ) + .send( { + previous: previousRevHTML + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.firstChild.textContent.should.match( /pink/ ); + } ) ) + .end( done ); + } ); + + const origHTML = JSON.parse( JSON.stringify( previousRevHTML ) ); + origHTML.revid = 100; + + // FIXME: Expansion reuse wasn't ported, see T98995 + it.skip( 'should accept the original and reuse certain expansions', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/Reuse_Page/100' ) + .send( { + updates: { + transclusions: true + }, + original: origHTML + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.firstChild.textContent.should.match( /purple/ ); + } ) ) + .end( done ); + } ); + + it( 'should refuse an unknown conversion (2.x -> 999.x)', function ( done ) { + previousRevHTML.html.headers[ 'content-type' ].should.equal( 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"' ); + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/Reuse_Page/100' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/999.0.0"' ) + .send( { + previous: previousRevHTML + } ) + .expect( 415 ) + .end( done ); + } ); + + it( 'should downgrade 999.x content to 2.x', function ( done ) { + const contentVersion = '2.4.0'; + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .set( 'Accept', 'application/json; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' + contentVersion + '"' ) + .send( { + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { mwAQ: { pi: [ [ { k: '1' } ] ] } } + } + }, + 'data-mw': { + body: { + ids: { + mwAQ: { + parts: [ { + template: { + target: { wt: '1x', href: './Template:1x' }, + params: { 1: { wt: 'hi' } }, + i: 0 + } + } ] + } + } + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' + }, + body: '\n

ho

' + } + } + } ) + .expect( status200 ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + // In < 999.x, data-mw is still inline. + html.should.match( /\s+data-mw\s*=\s*['"]/ ); + html.should.not.match( /\s+data-parsoid\s*=\s*['"]/ ); + const doc = domino.createDocument( html ); + const meta = doc.querySelector( 'meta[property="mw:html:version"], meta[property="mw:htmlVersion"]' ); + meta.getAttribute( 'content' ).should.equal( contentVersion ); + } ) ) + .end( done ); + } ); + + it( 'should accept the original and update the redlinks', function ( done ) { + if ( skipForNow ) { + return this.skip(); + } // Create pages to fix redlinks count + // NOTE: Keep this on an older version to show that it's preserved + // through the transformation. + const contentVersion = '2.0.0'; + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .send( { + updates: { + redlinks: true + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: {} + } + }, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + contentVersion + '"' + }, + body: '

Special:Version Doesnotexist Redirected

' + } + } + } ) + .expect( acceptablePageBundleResponse( contentVersion, function ( html ) { + const doc = domino.createDocument( html ); + doc.body.querySelectorAll( 'a' ).length.should.equal( 3 ); + const redLinks = doc.body.querySelectorAll( '.new' ); + redLinks.length.should.equal( 1 ); + redLinks[ 0 ].getAttribute( 'title' ).should.equal( 'Doesnotexist' ); + const redirects = doc.body.querySelectorAll( '.mw-redirect' ); + redirects.length.should.equal( 1 ); + redirects[ 0 ].getAttribute( 'title' ).should.equal( 'Redirected' ); + } ) ) + .end( done ); + } ); + + ( skipForNow ? describe.skip : describe )( 'Variant conversion', function () { + + it( 'should refuse variant conversion on en page', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .send( { + updates: { + variant: { target: 'sr-el' } + }, + original: { + revid: 1, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

абвг abcd

' + } + } + } ) + .expect( 400 ) + .end( done ); + } ); + + it( 'should accept the original and do variant conversion (given oldid)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .send( { + updates: { + variant: { target: 'sr-el' } + }, + original: { + revid: 104, /* sets the pagelanguage */ + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

абвг abcd x

' + } + } + } ) + .expect( status200 ) + .expect( ( res ) => { + // We don't actually require the result to have data-parsoid + // if the input didn't have data-parsoid; hack the result + // in order to make validPageBundleResponse() pass. + res.body[ 'data-parsoid' ].body = {}; + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.textContent.should.equal( 'abvg abcd x' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers[ 'content-language' ].should.equal( 'sr-el' ); + headers.should.have.property( 'vary' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + it( 'should accept the original and do variant conversion (given pagelanguage)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .set( 'Content-Language', 'sr' ) + .set( 'Accept-Language', 'sr-el' ) + .send( { + updates: { + variant: { /* target implicit from accept-language */} + }, + original: { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

абвг abcd

' + } + } + } ) + .expect( status200 ) + .expect( ( res ) => { + // We don't actually require the result to have data-parsoid + // if the input didn't have data-parsoid; hack the result + // in order to make validPageBundleResponse() pass. + res.body[ 'data-parsoid' ].body = {}; + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.textContent.should.equal( 'abvg abcd' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers[ 'content-language' ].should.equal( 'sr-el' ); + headers.should.have.property( 'vary' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + it( 'should not perform variant conversion w/ invalid variant (given pagelanguage)', function ( done ) { + client.req + .post( endpointPrefix + '/transform/pagebundle/to/pagebundle/' ) + .set( 'Content-Language', 'sr' ) + .set( 'Accept-Language', 'sr-BOGUS' ) + .send( { + updates: { + variant: { /* target implicit from accept-language */} + }, + original: { + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/' + defaultContentVersion + '"' + }, + body: '

абвг abcd

' + } + } + } ) + .expect( status200 ) + .expect( ( res ) => { + // We don't actually require the result to have data-parsoid + // if the input didn't have data-parsoid; hack the result + // in order to make validPageBundleResponse() pass. + res.body[ 'data-parsoid' ].body = {}; + } ) + .expect( validPageBundleResponse( function ( doc ) { + doc.body.textContent.should.equal( 'абвг abcd' ); + } ) ) + .expect( ( res ) => { + const headers = res.body.html.headers; + headers.should.have.property( 'content-language' ); + headers[ 'content-language' ].should.equal( 'sr' ); + headers.should.have.property( 'vary' ); + headers.vary.should.match( /\bAccept-Language\b/i ); + } ) + .end( done ); + } ); + + } ); + + } ); // end pb2pb + +} );