Create Parser::stripOuterParagraph to avoid code duplication

We've had the logic for stripping the outer <p/> element in three
separate places. The version in OutputPage was missing the '$' at the
end of the regex, that was most likely a mistake caused by the
duplication.

Also, extend the logic in order not to generate invalid HTML if the
input contains more than one <p/> tag. Added tests for this and the
previous behaviour.

https://www.mail-archive.com/mediawiki-api@lists.wikimedia.org/msg03188.html

Change-Id: I6bb3597898324556df912a23a7ffc9ff250b8f58
This commit is contained in:
Bartosz Dziewoński 2014-02-22 16:21:36 +01:00 committed by Brad Jorsch
parent ee9d875607
commit c3aa5ef597
5 changed files with 60 additions and 15 deletions

View file

@ -1726,10 +1726,7 @@ function wfMsgExt( $key, $options ) {
}
if ( $parseInline ) {
$m = array();
if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $string, $m ) ) {
$string = $m[1];
}
$string = Parser::stripOuterParagraph( $string );
}
} elseif ( in_array( 'parsemag', $options, true ) ) {
$string = $messageCache->transform( $string,

View file

@ -659,10 +659,7 @@ class Message {
# Maybe transform using the full parser
if ( $this->format === 'parse' ) {
$string = $this->parseText( $string );
$m = array();
if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $string, $m ) ) {
$string = $m[1];
}
$string = Parser::stripOuterParagraph( $string );
} elseif ( $this->format === 'block-parse' ) {
$string = $this->parseText( $string );
} elseif ( $this->format === 'text' ) {

View file

@ -1748,13 +1748,7 @@ class OutputPage extends ContextSource {
*/
public function parseInline( $text, $linestart = true, $interface = false ) {
$parsed = $this->parse( $text, $linestart, $interface );
$m = array();
if ( preg_match( '/^<p>(.*)\n?<\/p>\n?/sU', $parsed, $m ) ) {
$parsed = $m[1];
}
return $parsed;
return Parser::stripOuterParagraph( $parsed );
}
/**

View file

@ -6400,4 +6400,25 @@ class Parser {
return $recursiveCheck;
}
/**
* Strip outer <p></p> tag from the HTML source of a single paragraph.
*
* Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
* or if there is more than one <p/> tag in the input HTML.
*
* @param string $html
* @return string
* @since 1.23
*/
public static function stripOuterParagraph( $html ) {
$m = array();
if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
if ( strpos( $m[1], '</p>' ) === false ) {
$html = $m[1];
}
}
return $html;
}
}

View file

@ -29,6 +29,42 @@ class ParserMethodsTest extends MediaWikiLangTestCase {
$this->assertEquals( $expected, $text );
}
public static function provideStripOuterParagraph() {
// This mimics the most common use case (stripping paragraphs generated by the parser).
$message = new RawMessage( "Message text." );
return array(
array(
"<p>Text.</p>",
"Text.",
),
array(
"<p class='foo'>Text.</p>",
"<p class='foo'>Text.</p>",
),
array(
"<p>Text.\n</p>\n",
"Text.",
),
array(
"<p>Text.</p><p>More text.</p>",
"<p>Text.</p><p>More text.</p>",
),
array(
$message->parse(),
"Message text.",
),
);
}
/**
* @dataProvider provideStripOuterParagraph
* @covers Parser::stripOuterParagraph
*/
public function testStripOuterParagraph( $text, $expected ) {
$this->assertEquals( $expected, Parser::stripOuterParagraph( $text ) );
}
/**
* @expectedException MWException
* @expectedExceptionMessage Parser state cleared while parsing. Did you call Parser::parse recursively?