Remove nbsp and similar characters from section IDs
Bug: T90902 Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
This commit is contained in:
parent
6f9738d832
commit
129067c907
3 changed files with 39 additions and 0 deletions
|
|
@ -41,6 +41,7 @@ production.
|
|||
* …
|
||||
|
||||
=== Bug fixes in 1.31 ===
|
||||
* (T90902) Non-breaking space in header ID breaks anchor
|
||||
* …
|
||||
|
||||
=== Action API changes in 1.31 ===
|
||||
|
|
|
|||
|
|
@ -4206,6 +4206,9 @@ class Parser {
|
|||
|
||||
# Decode HTML entities
|
||||
$safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
|
||||
|
||||
$safeHeadline = $this->normalizeSectionName( $safeHeadline );
|
||||
|
||||
$fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
|
||||
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
|
||||
$safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
|
||||
|
|
@ -5767,6 +5770,8 @@ class Parser {
|
|||
$text = $this->stripSectionName( $text );
|
||||
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
|
||||
$text = Sanitizer::decodeCharReferences( $text );
|
||||
$text = $this->normalizeSectionName( $text );
|
||||
|
||||
return '#' . Sanitizer::escapeIdForLink( $text );
|
||||
}
|
||||
|
||||
|
|
@ -5786,6 +5791,7 @@ class Parser {
|
|||
$text = $this->stripSectionName( $text );
|
||||
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
|
||||
$text = Sanitizer::decodeCharReferences( $text );
|
||||
$text = $this->normalizeSectionName( $text );
|
||||
|
||||
if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
|
||||
// ForAttribute() and ForLink() are the same for legacy encoding
|
||||
|
|
@ -5797,6 +5803,24 @@ class Parser {
|
|||
return "#$id";
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the same normalization as code making links to this section would
|
||||
*
|
||||
* @param string $text
|
||||
* @return string
|
||||
*/
|
||||
private function normalizeSectionName( $text ) {
|
||||
# T90902: ensure the same normalization is applied for IDs as to links
|
||||
$titleParser = MediaWikiServices::getInstance()->getTitleParser();
|
||||
try {
|
||||
|
||||
$parts = $titleParser->splitTitleString( "#$text" );
|
||||
} catch ( MalformedTitleException $ex ) {
|
||||
return $text;
|
||||
}
|
||||
return $parts['fragment'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips a text string of wikitext for use in a section anchor
|
||||
*
|
||||
|
|
|
|||
|
|
@ -29536,3 +29536,17 @@ wgFragmentMode=[ 'html5' ]
|
|||
</p><p><a href="#啤酒">#啤酒</a> <a href="#啤酒">#啤酒</a>
|
||||
</p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
T90902: Normalize weird characters in section IDs
|
||||
!! config
|
||||
wgFragmentMode=[ 'html5', 'legacy' ]
|
||||
!! wikitext
|
||||
== Foo bar ==
|
||||
[[#Foo bar]]
|
||||
|
||||
!! html/php
|
||||
<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
|
||||
<p><a href="#Foo_bar">#Foo bar</a>
|
||||
</p>
|
||||
!! end
|
||||
|
|
|
|||
Loading…
Reference in a new issue