OutputTransform: Fix double IDs on headings

Based on Ifeaaba1d0215e6f67f889a09c02879cc9079aa19

Bug: T366083
Co-Authored-by: Bartosz Dziewoński <dziewonski@fastmail.fm>
Change-Id: I2712e0fa9272106e8cd686980f847ee7f6385b6f
(cherry picked from commit 5757066096a0eac7f722e63aa3722e068915d33a)
This commit is contained in:
Arlo Breault 2024-12-12 16:49:51 +01:00 committed by Reedy
parent a52cd4462c
commit 1f51ebac15
4 changed files with 28 additions and 8 deletions

View file

@ -155,6 +155,10 @@ class HandleSectionLinks extends ContentTextTransformStage {
$link, $fallbackAnchor, string $wrapperType
) {
$anchorEscaped = htmlspecialchars( $anchor, ENT_COMPAT );
$idAttr = " id=\"$anchorEscaped\"";
if ( isset( $attrs['id'] ) ) {
$idAttr = '';
}
$fallback = '';
if ( $fallbackAnchor !== false && $fallbackAnchor !== $anchor ) {
$fallbackAnchor = htmlspecialchars( $fallbackAnchor, ENT_COMPAT );
@ -164,16 +168,16 @@ class HandleSectionLinks extends ContentTextTransformStage {
switch ( $wrapperType ) {
case 'legacy':
return "<h$level" . Html::expandAttributes( $attrs ) . ">"
. "$fallback<span class=\"mw-headline\" id=\"$anchorEscaped\">$html</span>"
. "$fallback<span class=\"mw-headline\"$idAttr>$html</span>"
. $link
. "</h$level>";
case 'mwheading':
return "<div class=\"mw-heading mw-heading$level\">"
. "<h$level id=\"$anchorEscaped\"" . Html::expandAttributes( $attrs ) . ">$fallback$html</h$level>"
. "<h$level$idAttr" . Html::expandAttributes( $attrs ) . ">$fallback$html</h$level>"
. $link
. "</div>";
case 'none':
return "<h$level id=\"$anchorEscaped\"" . Html::expandAttributes( $attrs ) . ">$fallback$html</h$level>"
return "<h$level$idAttr" . Html::expandAttributes( $attrs ) . ">$fallback$html</h$level>"
. $link;
default:
throw new LogicException( "Bad wrapper type: $wrapperType" );

View file

@ -4361,9 +4361,20 @@ class Parser {
// conveniently also giving us a way to handle French spaces (T324763)
$safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
// Wrap the safe headline to parse the heading attributes
// Literal HTML tags should be sanitized at this point
// cleanUpTocLine will strip the headline tag
$wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>";
// Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
// and ensures that we generate balanced HTML at the end (T218330).
$headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $safeHeadline );
$headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $wrappedHeadline );
// Extract a user defined id on the heading
// A heading is expected as the first child and could be asserted
$h = $headlineDom->firstChild;
$headingId = ( $h instanceof Element && DOMUtils::isHeading( $h ) ) ?
DOMCompat::getAttribute( $h, 'id' ) : null;
$this->cleanUpTocLine( $headlineDom );
@ -4372,12 +4383,17 @@ class Parser {
# For the anchor, strip out HTML-y stuff period
$safeHeadline = trim( $headlineDom->textContent );
# Save headline for section edit hint before it's normalized for the link
$headlineHint = htmlspecialchars( $safeHeadline );
$safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
$safeHeadline = self::normalizeSectionName( $safeHeadline );
if ( $headingId !== null && $headingId !== '' ) {
$safeHeadline = $headingId;
}
$fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
$safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );

View file

@ -2353,10 +2353,10 @@ wgParserEnableLegacyHeadingDOM=false
!! wikitext
<h3 id="asdf">odd</h3>
!! html/php
<h3 id="odd" id="asdf">odd</h3>
<h3 id="asdf">odd</h3>
!! metadata/php
Sections:
h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:odd line:odd
h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:asdf line:odd
!! html/parsoid
<section data-mw-section-id="0"></section><section data-mw-section-id="-1"><h3 id="asdf" data-parsoid='{"stx":"html","reusedId":true}'>odd</h3></section>
!! metadata/parsoid

View file

@ -2350,10 +2350,10 @@ wgParserEnableLegacyHeadingDOM=true
!! wikitext
<h3 id="asdf">odd</h3>
!! html/php
<h3 id="asdf"><span class="mw-headline" id="odd">odd</span></h3>
<h3 id="asdf"><span class="mw-headline">odd</span></h3>
!! metadata/php
Sections:
h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:odd line:odd
h3 index: toclevel:1 number:1 title:NULL off:NULL anchor/linkAnchor:asdf line:odd
!! html/parsoid
<section data-mw-section-id="0"></section><section data-mw-section-id="-1"><h3 id="asdf" data-parsoid='{"stx":"html","reusedId":true}'>odd</h3></section>
!! metadata/parsoid