(bug 34939) Handle mixed-case URL protocols in wikitext

This patch marks the regex matching url protocol as being case
insensitive. We will from now render links like [HTTP://ww].

Tests added.

Change-Id: I706acb7a0ae194b50d2318763beae4e5e83671f3
This commit is contained in:
Fran McCrory 2012-07-10 14:49:02 -04:00 committed by Antoine Musso
parent 82af9811ec
commit d6028a1811
8 changed files with 43 additions and 12 deletions

View file

@ -141,6 +141,7 @@ upgrade PHP if you have not done so prior to upgrading MediaWiki.
with auto-hide, multi-message support, and message replacement tags.
* jquery.messageBox which appears to be unused by both core and extensions has
been removed.
* (bug 34939) made link parsking insensitive ([HttP://])
=== Bug fixes in 1.20 ===
* (bug 30245) Use the correct way to construct a log page title.

View file

@ -786,6 +786,9 @@ function wfParseUrl( $url ) {
return false;
}
// parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase.
$bits['scheme'] = strtolower( $bits['scheme'] );
// most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
$bits['delimiter'] = '://';

View file

@ -1026,7 +1026,7 @@ class Sanitizer {
# Stupid hack
$encValue = preg_replace_callback(
'/(' . wfUrlProtocols() . ')/',
'/((?i)' . wfUrlProtocols() . ')/',
array( 'Sanitizer', 'armorLinksCallback' ),
$encValue );
return $encValue;

View file

@ -1063,7 +1063,7 @@ abstract class Skin extends ContextSource {
* @return String URL
*/
static function makeInternalOrExternalUrl( $name ) {
if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $name ) ) {
if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $name ) ) {
return $name;
} else {
return self::makeUrl( $name );
@ -1227,7 +1227,7 @@ abstract class Skin extends ContextSource {
$text = $line[1];
}
if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $link ) ) {
if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $link ) ) {
$href = $link;
// Parser::getExternalLinkAttribs won't work here because of the Namespace things

View file

@ -271,7 +271,7 @@ See the <a href='https://www.mediawiki.org/wiki/API'>complete documentation</a>,
// identify URLs
$protos = wfUrlProtocolsWithoutProtRel();
// This regex hacks around bug 13218 (&quot; included in the URL)
$text = preg_replace( "#(($protos).*?)(&quot;)?([ \\'\"<>\n]|&lt;|&gt;|&quot;)#", '<a href="\\1">\\1</a>\\3\\4', $text );
$text = preg_replace( "#(((?i)$protos).*?)(&quot;)?([ \\'\"<>\n]|&lt;|&gt;|&quot;)#", '<a href="\\1">\\1</a>\\3\\4', $text );
// identify requests to api.php
$text = preg_replace( "#api\\.php\\?[^ <\n\t]+#", '<a href="\\0">\\0</a>', $text );
if ( $this->mHelp ) {

View file

@ -207,7 +207,7 @@ class Parser {
public function __construct( $conf = array() ) {
$this->mConf = $conf;
$this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[((' . $this->mUrlProtocols . ')'.
$this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'.
self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
@ -1187,7 +1187,7 @@ class Parser {
'!(?: # Start cases
(<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
(<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
(\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
(\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
(?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
ISBN\s+(\b # m[5]: ISBN, capture number
(?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
@ -1853,7 +1853,7 @@ class Parser {
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
if ( preg_match( '/^(?:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
$s .= $prefix . '[[' . $line ;
wfProfileOut( __METHOD__."-misc" );
continue;
@ -2090,7 +2090,7 @@ class Parser {
* @return String: less-or-more HTML with NOPARSE bits
*/
function armorLinks( $text ) {
return preg_replace( '/\b(' . $this->mUrlProtocols . ')/',
return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
"{$this->mUniqPrefix}NOPARSE$1", $text );
}
@ -5095,8 +5095,8 @@ class Parser {
$paramName = 'no-link';
$value = true;
$validated = true;
} elseif ( preg_match( "/^$prots/", $value ) ) {
if ( preg_match( "/^($prots)$chars+$/u", $value, $m ) ) {
} elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );
if ( $this->mOptions->getExternalLinkTarget() ) {
@ -5622,7 +5622,7 @@ class Parser {
# @todo FIXME: Not tolerant to blank link text
# I.E. [http://www.mediawiki.org] will render as [1] or something depending
# on how many empty links there are on the page - need to figure that out.
$text = preg_replace( '/\[(?:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
$text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
# Parse wikitext quotes (italics & bold)
$text = $this->doQuotes( $text );

View file

@ -226,7 +226,7 @@ class Parser_LinkHooks extends Parser {
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
if( preg_match('/^\b(?i:' . wfUrlProtocols() . ')/', $titleText) ) {
wfProfileOut( __METHOD__ );
return $wt;
}

View file

@ -10343,6 +10343,33 @@ abc
</p>
!! end
!!test
Bug 34939 - Case insensitive link parsing ([HttP://])
!! input
[HttP://MediaWiki.Org/]
!! result
<p><a rel="nofollow" class="external autonumber" href="HttP://MediaWiki.Org/">[1]</a>
</p>
!! end
!!test
Bug 34939 - Case insensitive link parsing ([HttP:// title])
!! input
[HttP://MediaWiki.Org/ MediaWiki]
!! result
<p><a rel="nofollow" class="external text" href="HttP://MediaWiki.Org/">MediaWiki</a>
</p>
!! end
!!test
Bug 34939 - Case insensitive link parsing (HttP://)
!! input
HttP://MediaWiki.Org/
!! result
<p><a rel="nofollow" class="external free" href="HttP://MediaWiki.Org/">HttP://MediaWiki.Org/</a>
</p>
!! end
TODO:
more images