diff --git a/includes/OutputTransform/Stages/HandleSectionLinks.php b/includes/OutputTransform/Stages/HandleSectionLinks.php index 3e981868c52..9d79f07c85b 100644 --- a/includes/OutputTransform/Stages/HandleSectionLinks.php +++ b/includes/OutputTransform/Stages/HandleSectionLinks.php @@ -155,6 +155,10 @@ class HandleSectionLinks extends ContentTextTransformStage { $link, $fallbackAnchor, string $wrapperType ) { $anchorEscaped = htmlspecialchars( $anchor, ENT_COMPAT ); + $idAttr = " id=\"$anchorEscaped\""; + if ( isset( $attrs['id'] ) ) { + $idAttr = ''; + } $fallback = ''; if ( $fallbackAnchor !== false && $fallbackAnchor !== $anchor ) { $fallbackAnchor = htmlspecialchars( $fallbackAnchor, ENT_COMPAT ); @@ -164,16 +168,16 @@ class HandleSectionLinks extends ContentTextTransformStage { switch ( $wrapperType ) { case 'legacy': return "" - . "$fallback$html" + . "$fallback$html" . $link . ""; case 'mwheading': return "
" - . "$fallback$html" + . "$fallback$html" . $link . "
"; case 'none': - return "$fallback$html" + return "$fallback$html" . $link; default: throw new LogicException( "Bad wrapper type: $wrapperType" ); diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index f3df5a90554..c9a3329e509 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4361,9 +4361,20 @@ class Parser { // conveniently also giving us a way to handle French spaces (T324763) $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] ); + // Wrap the safe headline to parse the heading attributes + // Literal HTML tags should be sanitized at this point + // cleanUpTocLine will strip the headline tag + $wrappedHeadline = ""; + // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags, // and ensures that we generate balanced HTML at the end (T218330). - $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $safeHeadline ); + $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $wrappedHeadline ); + + // Extract a user defined id on the heading + // A heading is expected as the first child and could be asserted + $h = $headlineDom->firstChild; + $headingId = ( $h instanceof Element && DOMUtils::isHeading( $h ) ) ? + DOMCompat::getAttribute( $h, 'id' ) : null; $this->cleanUpTocLine( $headlineDom ); @@ -4372,12 +4383,17 @@ class Parser { # For the anchor, strip out HTML-y stuff period $safeHeadline = trim( $headlineDom->textContent ); + # Save headline for section edit hint before it's normalized for the link $headlineHint = htmlspecialchars( $safeHeadline ); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); $safeHeadline = self::normalizeSectionName( $safeHeadline ); + if ( $headingId !== null && $headingId !== '' ) { + $safeHeadline = $headingId; + } + $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY ); diff --git a/tests/parser/headings.txt b/tests/parser/headings.txt index c9f69cf9c42..271747addb0 100644 --- a/tests/parser/headings.txt +++ b/tests/parser/headings.txt @@ -2353,10 +2353,10 @@ wgParserEnableLegacyHeadingDOM=false !! wikitext

odd

!! html/php -

odd

+

odd

!! metadata/php Sections: - h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:odd line:odd + h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:asdf line:odd !! html/parsoid

odd

!! metadata/parsoid diff --git a/tests/parser/legacyHeadings.txt b/tests/parser/legacyHeadings.txt index 3a353016039..a4023e6dce1 100644 --- a/tests/parser/legacyHeadings.txt +++ b/tests/parser/legacyHeadings.txt @@ -2350,10 +2350,10 @@ wgParserEnableLegacyHeadingDOM=true !! wikitext

odd

!! html/php -

odd

+

odd

!! metadata/php Sections: - h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:odd line:odd + h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:asdf line:odd !! html/parsoid

odd

!! metadata/parsoid