Reduce code duplication correctly this time, again
The test cases I thought up are at: http://www.mediawiki.org/wiki/User:Simetrical/Id_tests All of them pass with the patch, except for some that fail on current code as well: the ones involving templates, multiply-occurring section headers, or numeric id's (there seems to be a weird bug with those that probably involves string and numeric id's being used in the same array). This is true whether $wgEnforceHtmlIds is on or off. (Actually, the problem with numeric keys doesn't happen with $wgEnforceHtmlIds off, because of course numeric ids aren't allowed then.)
This commit is contained in:
parent
a445dd5e11
commit
f7486d8642
1 changed files with 73 additions and 77 deletions
|
|
@ -3448,7 +3448,7 @@ class Parser
|
|||
* @private
|
||||
*/
|
||||
function formatHeadings( $text, $isMain=true ) {
|
||||
global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;
|
||||
global $wgMaxTocLevel, $wgContLang;
|
||||
|
||||
$doNumberHeadings = $this->mOptions->getNumberHeadings();
|
||||
$showEditLink = $this->mOptions->getEditSection();
|
||||
|
|
@ -3593,71 +3593,17 @@ class Parser
|
|||
}
|
||||
}
|
||||
|
||||
# The safe header is a version of the header text safe to use for links
|
||||
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
|
||||
$safeHeadline = $this->mStripState->unstripBoth( $headline );
|
||||
|
||||
# Remove link placeholders by the link text.
|
||||
# <!--LINK number-->
|
||||
# turns into
|
||||
# link text with suffix
|
||||
$safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
|
||||
|
||||
# Strip out HTML (other than plain <sup> and <sub>: bug 8393)
|
||||
$tocline = preg_replace(
|
||||
array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
|
||||
array( '', '<$1>'),
|
||||
$safeHeadline
|
||||
);
|
||||
$tocline = trim( $tocline );
|
||||
|
||||
# For the anchor, strip out HTML-y stuff period
|
||||
$safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
|
||||
$safeHeadline = trim( $safeHeadline );
|
||||
|
||||
# Save headline for section edit hint before it's escaped
|
||||
$headlineHint = $safeHeadline;
|
||||
|
||||
if ( $wgEnforceHtmlIds ) {
|
||||
$legacyHeadline = false;
|
||||
$safeHeadline = Sanitizer::escapeId( $safeHeadline,
|
||||
'noninitial' );
|
||||
} else {
|
||||
# For reverse compatibility, provide an id that's
|
||||
# HTML4-compatible, like we used to.
|
||||
#
|
||||
# It may be worth noting, academically, that it's possible for
|
||||
# the legacy anchor to conflict with a non-legacy headline
|
||||
# anchor on the page. In this case likely the "correct" thing
|
||||
# would be to either drop the legacy anchors or make sure
|
||||
# they're numbered first. However, this would require people
|
||||
# to type in section names like "abc_.D7.93.D7.90.D7.A4"
|
||||
# manually, so let's not bother worrying about it.
|
||||
$legacyHeadline = Sanitizer::escapeId( $safeHeadline,
|
||||
'noninitial' );
|
||||
$safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
|
||||
|
||||
if ( $legacyHeadline == $safeHeadline ) {
|
||||
# No reason to have both (in fact, we can't)
|
||||
$legacyHeadline = false;
|
||||
} elseif ( $legacyHeadline != Sanitizer::escapeId(
|
||||
$legacyHeadline, 'xml' ) ) {
|
||||
# The legacy id is invalid XML. We used to allow this, but
|
||||
# there's no reason to do so anymore. Backward
|
||||
# compatibility will fail slightly in this case, but it's
|
||||
# no big deal.
|
||||
$legacyHeadline = false;
|
||||
}
|
||||
}
|
||||
list( $anchor, $legacyAnchor, $tocline, $headlineHint ) =
|
||||
$this->processHeadingText( $headline );
|
||||
|
||||
# HTML names must be case-insensitively unique (bug 10721). FIXME:
|
||||
# Does this apply to Unicode characters? Because we aren't
|
||||
# handling those here.
|
||||
$arrayKey = strtolower( $safeHeadline );
|
||||
if ( $legacyHeadline === false ) {
|
||||
$arrayKey = strtolower( $anchor );
|
||||
if ( $legacyAnchor === false ) {
|
||||
$legacyArrayKey = false;
|
||||
} else {
|
||||
$legacyArrayKey = strtolower( $legacyHeadline );
|
||||
$legacyArrayKey = strtolower( $legacyAnchor );
|
||||
}
|
||||
|
||||
# count how many in assoc. array so we can track dupes in anchors
|
||||
|
|
@ -3679,12 +3625,10 @@ class Parser
|
|||
}
|
||||
|
||||
# Create the anchor for linking from the TOC to the section
|
||||
$anchor = $safeHeadline;
|
||||
$legacyAnchor = $legacyHeadline;
|
||||
if ( $refers[$arrayKey] > 1 ) {
|
||||
$anchor .= '_' . $refers[$arrayKey];
|
||||
}
|
||||
if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
|
||||
if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) {
|
||||
$legacyAnchor .= '_' . $refers[$legacyArrayKey];
|
||||
}
|
||||
if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
|
||||
|
|
@ -3756,6 +3700,70 @@ class Parser
|
|||
}
|
||||
}
|
||||
|
||||
private function processHeadingText( $headline ) {
|
||||
global $wgEnforceHtmlIds;
|
||||
|
||||
# The safe header is a version of the header text safe to use for links
|
||||
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
|
||||
$safeHeadline = $this->mStripState->unstripBoth( $headline );
|
||||
|
||||
# Remove link placeholders by the link text.
|
||||
# <!--LINK number-->
|
||||
# turns into
|
||||
# link text with suffix
|
||||
$safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
|
||||
|
||||
# Strip out HTML (other than plain <sup> and <sub>: bug 8393)
|
||||
$tocline = preg_replace(
|
||||
array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
|
||||
array( '', '<$1>'),
|
||||
$safeHeadline
|
||||
);
|
||||
$tocline = trim( $tocline );
|
||||
|
||||
# For the anchor, strip out HTML-y stuff period
|
||||
$safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
|
||||
$safeHeadline = trim( $safeHeadline );
|
||||
|
||||
# Save headline for section edit hint before it's escaped
|
||||
$headlineHint = $safeHeadline;
|
||||
|
||||
if ( $wgEnforceHtmlIds ) {
|
||||
$legacyHeadline = false;
|
||||
$safeHeadline = Sanitizer::escapeId( $safeHeadline,
|
||||
'noninitial' );
|
||||
} else {
|
||||
# For reverse compatibility, provide an id that's
|
||||
# HTML4-compatible, like we used to.
|
||||
#
|
||||
# It may be worth noting, academically, that it's possible for
|
||||
# the legacy anchor to conflict with a non-legacy headline
|
||||
# anchor on the page. In this case likely the "correct" thing
|
||||
# would be to either drop the legacy anchors or make sure
|
||||
# they're numbered first. However, this would require people
|
||||
# to type in section names like "abc_.D7.93.D7.90.D7.A4"
|
||||
# manually, so let's not bother worrying about it.
|
||||
$legacyHeadline = Sanitizer::escapeId( $safeHeadline,
|
||||
'noninitial' );
|
||||
$safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
|
||||
|
||||
if ( $legacyHeadline == $safeHeadline ) {
|
||||
# No reason to have both (in fact, we can't)
|
||||
$legacyHeadline = false;
|
||||
} elseif ( $legacyHeadline != Sanitizer::escapeId(
|
||||
$legacyHeadline, 'xml' ) ) {
|
||||
# The legacy id is invalid XML. We used to allow this, but
|
||||
# there's no reason to do so anymore. Backward
|
||||
# compatibility will fail slightly in this case, but it's
|
||||
# no big deal.
|
||||
$legacyHeadline = false;
|
||||
}
|
||||
}
|
||||
|
||||
return array( $safeHeadline, $legacyHeadline, $tocline,
|
||||
$headlineHint );
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform wiki markup when saving a page by doing \r\n -> \n
|
||||
* conversion, substitting signatures, {{subst:}} templates, etc.
|
||||
|
|
@ -4736,21 +4744,9 @@ class Parser
|
|||
* "== Header ==".
|
||||
*/
|
||||
public function guessSectionNameFromWikiText( $text ) {
|
||||
# Strip out wikitext links(they break the anchor)
|
||||
$text = $this->stripSectionName( $text );
|
||||
$headline = Sanitizer::decodeCharReferences( $text );
|
||||
# strip out HTML
|
||||
$headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
|
||||
$headline = trim( $headline );
|
||||
$sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
|
||||
$replacearray = array(
|
||||
'%3A' => ':',
|
||||
'%' => '.'
|
||||
);
|
||||
return str_replace(
|
||||
array_keys( $replacearray ),
|
||||
array_values( $replacearray ),
|
||||
$sectionanchor );
|
||||
list( $text, /* unneeded here */ ) = $this->processHeadingText( $text );
|
||||
return "#$text";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in a new issue