Parser: Add guessSectionNameFromStrippedText() and refactor
Split up guessSectionNameFromWikiText() into pieces to reduce code duplication, and provide guessSectionNameFromStrippedText() which doesn't do link stripping. Really these should be named guessSection*ANCHOR*From... because they return an anchor (with encoding and a '#' prefix) instead of a section name, but I didn't want to rename the existing one. Also make normalizeSectionName static (it doesn't use $this) so that guessSectionNameFromStrippedText() can be static as well. Change-Id: I56b9dda805a51517549c5ed709f4bd747ca04577
This commit is contained in:
parent
4dbb6b2d77
commit
6f07389ef2
1 changed files with 51 additions and 34 deletions
|
|
@ -4207,7 +4207,7 @@ class Parser {
|
|||
# Decode HTML entities
|
||||
$safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
|
||||
|
||||
$safeHeadline = $this->normalizeSectionName( $safeHeadline );
|
||||
$safeHeadline = self::normalizeSectionName( $safeHeadline );
|
||||
|
||||
$fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
|
||||
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
|
||||
|
|
@ -5756,43 +5756,19 @@ class Parser {
|
|||
return $this->mDefaultSort;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to guess the section anchor name based on a wikitext fragment
|
||||
* presumably extracted from a heading, for example "Header" from
|
||||
* "== Header ==".
|
||||
*
|
||||
* @param string $text
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function guessSectionNameFromWikiText( $text ) {
|
||||
# Strip out wikitext links(they break the anchor)
|
||||
$text = $this->stripSectionName( $text );
|
||||
private static function getSectionNameFromStrippedText( $text ) {
|
||||
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
|
||||
$text = Sanitizer::decodeCharReferences( $text );
|
||||
$text = $this->normalizeSectionName( $text );
|
||||
|
||||
return '#' . Sanitizer::escapeIdForLink( $text );
|
||||
$text = self::normalizeSectionName( $text );
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as guessSectionNameFromWikiText(), but produces legacy anchors
|
||||
* instead, if possible. For use in redirects, since various versions
|
||||
* of Microsoft browsers interpret Location: headers as something other
|
||||
* than UTF-8, resulting in breakage.
|
||||
*
|
||||
* @param string $text The section name
|
||||
* @return string An anchor
|
||||
*/
|
||||
public function guessLegacySectionNameFromWikiText( $text ) {
|
||||
private static function makeAnchor( $sectionName ) {
|
||||
return '#' . Sanitizer::escapeIdForLink( $sectionName );
|
||||
}
|
||||
|
||||
private static function makeLegacyAnchor( $sectionName ) {
|
||||
global $wgFragmentMode;
|
||||
|
||||
# Strip out wikitext links(they break the anchor)
|
||||
$text = $this->stripSectionName( $text );
|
||||
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
|
||||
$text = Sanitizer::decodeCharReferences( $text );
|
||||
$text = $this->normalizeSectionName( $text );
|
||||
|
||||
if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
|
||||
// ForAttribute() and ForLink() are the same for legacy encoding
|
||||
$id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK );
|
||||
|
|
@ -5803,13 +5779,54 @@ class Parser {
|
|||
return "#$id";
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to guess the section anchor name based on a wikitext fragment
|
||||
* presumably extracted from a heading, for example "Header" from
|
||||
* "== Header ==".
|
||||
*
|
||||
* @param string $text
|
||||
* @return string Anchor (starting with '#')
|
||||
*/
|
||||
public function guessSectionNameFromWikiText( $text ) {
|
||||
# Strip out wikitext links(they break the anchor)
|
||||
$text = $this->stripSectionName( $text );
|
||||
$sectionName = self::getSectionNameFromStrippedText( $text );
|
||||
return self::makeAnchor( $sectionName );
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as guessSectionNameFromWikiText(), but produces legacy anchors
|
||||
* instead, if possible. For use in redirects, since various versions
|
||||
* of Microsoft browsers interpret Location: headers as something other
|
||||
* than UTF-8, resulting in breakage.
|
||||
*
|
||||
* @param string $text The section name
|
||||
* @return string Anchor (starting with '#')
|
||||
*/
|
||||
public function guessLegacySectionNameFromWikiText( $text ) {
|
||||
# Strip out wikitext links(they break the anchor)
|
||||
$text = $this->stripSectionName( $text );
|
||||
$sectionName = self::getSectionNameFromStrippedText( $text );
|
||||
return self::makeLegacyAnchor( $sectionName );
|
||||
}
|
||||
|
||||
/**
|
||||
* Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
|
||||
* @param string $text Section name (plain text)
|
||||
* @return string Anchor (starting with '#')
|
||||
*/
|
||||
public static function guessSectionNameFromStrippedText( $text ) {
|
||||
$sectionName = self::getSectionNameFromStrippedText( $text );
|
||||
return self::makeAnchor( $sectionName );
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the same normalization as code making links to this section would
|
||||
*
|
||||
* @param string $text
|
||||
* @return string
|
||||
*/
|
||||
private function normalizeSectionName( $text ) {
|
||||
private static function normalizeSectionName( $text ) {
|
||||
# T90902: ensure the same normalization is applied for IDs as to links
|
||||
$titleParser = MediaWikiServices::getInstance()->getTitleParser();
|
||||
try {
|
||||
|
|
|
|||
Loading…
Reference in a new issue