2012-09-24 20:51:53 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* Content object implementation for representing flat text.
|
|
|
|
|
*
|
2012-10-16 18:04:32 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2012-10-05 13:03:24 +00:00
|
|
|
* @since 1.21
|
2012-10-16 18:04:32 +00:00
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
* @ingroup Content
|
|
|
|
|
*
|
|
|
|
|
* @author Daniel Kinzler
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
2012-12-20 19:44:47 +00:00
|
|
|
|
2024-05-19 18:33:58 +00:00
|
|
|
namespace MediaWiki\Content;
|
|
|
|
|
|
|
|
|
|
use InvalidArgumentException;
|
2024-08-08 09:39:26 +00:00
|
|
|
use MediaWiki\Language\Language;
|
2022-04-25 15:19:41 +00:00
|
|
|
use MediaWiki\MainConfigNames;
|
2018-07-29 12:24:54 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2024-05-19 18:33:58 +00:00
|
|
|
use MWUnknownContentModelException;
|
2023-06-20 04:02:04 +00:00
|
|
|
use Wikimedia\Diff\Diff;
|
2018-07-29 12:24:54 +00:00
|
|
|
|
2012-12-20 19:44:47 +00:00
|
|
|
/**
|
|
|
|
|
* Content object implementation for representing flat text.
|
|
|
|
|
*
|
|
|
|
|
* TextContent instances are immutable
|
|
|
|
|
*
|
2020-06-26 12:56:03 +00:00
|
|
|
* @newable
|
2020-07-13 09:00:30 +00:00
|
|
|
* @stable to extend
|
2012-12-20 19:44:47 +00:00
|
|
|
* @ingroup Content
|
|
|
|
|
*/
|
2012-10-10 10:42:42 +00:00
|
|
|
class TextContent extends AbstractContent {
|
2014-03-03 17:08:05 +00:00
|
|
|
|
2017-10-10 16:02:11 +00:00
|
|
|
/**
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
protected $mText;
|
|
|
|
|
|
2014-03-03 17:08:05 +00:00
|
|
|
/**
|
2020-07-13 08:53:06 +00:00
|
|
|
* @stable to call
|
2014-03-03 17:08:05 +00:00
|
|
|
* @param string $text
|
|
|
|
|
* @param string $model_id
|
|
|
|
|
*/
|
2012-10-10 10:42:42 +00:00
|
|
|
public function __construct( $text, $model_id = CONTENT_MODEL_TEXT ) {
|
2012-09-24 20:51:53 +00:00
|
|
|
parent::__construct( $model_id );
|
|
|
|
|
|
2012-10-26 10:43:02 +00:00
|
|
|
if ( $text === null || $text === false ) {
|
|
|
|
|
wfWarn( "TextContent constructed with \$text = " . var_export( $text, true ) . "! "
|
2013-12-20 08:49:55 +00:00
|
|
|
. "This may indicate an error in the caller's scope.", 2 );
|
2012-10-26 10:43:02 +00:00
|
|
|
|
|
|
|
|
$text = '';
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-09 12:58:25 +00:00
|
|
|
if ( !is_string( $text ) ) {
|
2023-06-09 16:50:09 +00:00
|
|
|
throw new InvalidArgumentException( "TextContent expects a string in the constructor." );
|
2012-10-09 12:58:25 +00:00
|
|
|
}
|
|
|
|
|
|
2012-09-24 20:51:53 +00:00
|
|
|
$this->mText = $text;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-03 17:08:05 +00:00
|
|
|
/**
|
|
|
|
|
* @note Mutable subclasses MUST override this to return a copy!
|
|
|
|
|
*
|
|
|
|
|
* @return Content $this
|
|
|
|
|
*/
|
2012-09-24 20:51:53 +00:00
|
|
|
public function copy() {
|
|
|
|
|
return $this; # NOTE: this is ok since TextContent are immutable.
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-30 16:53:40 +00:00
|
|
|
/**
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
|
|
|
|
* @param int $maxlength
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2012-09-24 20:51:53 +00:00
|
|
|
public function getTextForSummary( $maxlength = 250 ) {
|
2018-11-08 15:19:23 +00:00
|
|
|
$text = $this->getText();
|
2012-09-24 20:51:53 +00:00
|
|
|
|
2018-07-29 12:24:54 +00:00
|
|
|
$truncatedtext = MediaWikiServices::getInstance()->getContentLanguage()->
|
|
|
|
|
truncateForDatabase( preg_replace( "/[\n\r]/", ' ', $text ), max( 0, $maxlength ) );
|
2012-09-24 20:51:53 +00:00
|
|
|
|
|
|
|
|
return $truncatedtext;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-05-05 15:04:10 +00:00
|
|
|
* Returns the text's size in bytes.
|
2012-09-24 20:51:53 +00:00
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
2014-05-05 15:04:10 +00:00
|
|
|
* @return int
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
2013-03-17 15:13:22 +00:00
|
|
|
public function getSize() {
|
2018-11-08 15:19:23 +00:00
|
|
|
$text = $this->getText();
|
2013-11-19 21:26:16 +00:00
|
|
|
|
2012-09-24 20:51:53 +00:00
|
|
|
return strlen( $text );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns true if this content is not a redirect, and $wgArticleCountMethod
|
|
|
|
|
* is "any".
|
|
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
2016-02-25 13:13:22 +00:00
|
|
|
* @param bool|null $hasLinks If it is known whether this content contains links,
|
2012-09-24 20:51:53 +00:00
|
|
|
* provide this information here, to avoid redundant parsing to find out.
|
|
|
|
|
*
|
2014-03-03 17:08:05 +00:00
|
|
|
* @return bool
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
|
|
|
|
public function isCountable( $hasLinks = null ) {
|
2022-04-25 15:19:41 +00:00
|
|
|
$articleCountMethod = MediaWikiServices::getInstance()->getMainConfig()->get(
|
|
|
|
|
MainConfigNames::ArticleCountMethod );
|
2012-09-24 20:51:53 +00:00
|
|
|
|
2013-03-17 15:13:22 +00:00
|
|
|
if ( $this->isRedirect() ) {
|
2012-09-24 20:51:53 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-06 18:44:56 +00:00
|
|
|
if ( $articleCountMethod === 'any' ) {
|
2012-09-24 20:51:53 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the text represented by this Content object, as a string.
|
|
|
|
|
*
|
2018-11-08 15:19:23 +00:00
|
|
|
* @deprecated since 1.33 use getText() instead.
|
|
|
|
|
*
|
|
|
|
|
* @return string The raw text. Subclasses may guarantee a specific syntax here.
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
2013-03-17 15:13:22 +00:00
|
|
|
public function getNativeData() {
|
2018-11-08 15:19:23 +00:00
|
|
|
return $this->getText();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the text represented by this Content object, as a string.
|
|
|
|
|
*
|
|
|
|
|
* @since 1.33
|
2020-06-30 16:53:40 +00:00
|
|
|
* @note This method should not be overwritten by subclasses. If a subclass find itself in
|
|
|
|
|
* need to override this method, it should probably not be based on TextContent, but
|
|
|
|
|
* should rather extend AbstractContent instead.
|
2018-11-08 15:19:23 +00:00
|
|
|
*
|
|
|
|
|
* @return string The raw text.
|
|
|
|
|
*/
|
|
|
|
|
public function getText() {
|
2014-05-23 14:00:23 +00:00
|
|
|
return $this->mText;
|
2012-09-24 20:51:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the text represented by this Content object, as a string.
|
|
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
2014-03-03 17:08:05 +00:00
|
|
|
* @return string The raw text.
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
2013-03-17 15:13:22 +00:00
|
|
|
public function getTextForSearchIndex() {
|
2018-11-08 15:19:23 +00:00
|
|
|
return $this->getText();
|
2012-09-24 20:51:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2012-11-05 15:53:48 +00:00
|
|
|
* Returns attempts to convert this content object to wikitext,
|
|
|
|
|
* and then returns the text string. The conversion may be lossy.
|
2012-09-24 20:51:53 +00:00
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
2014-07-24 09:30:25 +00:00
|
|
|
* @note this allows any text-based content to be transcluded as if it was wikitext.
|
2012-11-05 15:53:48 +00:00
|
|
|
*
|
2022-07-31 00:02:18 +00:00
|
|
|
* @return string|false The raw text, or false if the conversion failed.
|
2012-09-24 20:51:53 +00:00
|
|
|
*/
|
2013-03-17 15:13:22 +00:00
|
|
|
public function getWikitextForTransclusion() {
|
2019-08-31 16:14:38 +00:00
|
|
|
/** @var WikitextContent $wikitext */
|
2012-11-05 15:53:48 +00:00
|
|
|
$wikitext = $this->convert( CONTENT_MODEL_WIKITEXT, 'lossy' );
|
2019-08-31 16:14:38 +00:00
|
|
|
'@phan-var WikitextContent $wikitext';
|
2012-11-05 15:53:48 +00:00
|
|
|
|
|
|
|
|
if ( $wikitext ) {
|
2018-11-08 15:19:23 +00:00
|
|
|
return $wikitext->getText();
|
2012-11-05 15:53:48 +00:00
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2012-09-24 20:51:53 +00:00
|
|
|
}
|
|
|
|
|
|
2016-08-16 21:58:15 +00:00
|
|
|
/**
|
|
|
|
|
* Do a "\r\n" -> "\n" and "\r" -> "\n" transformation
|
|
|
|
|
* as well as trim trailing whitespace
|
|
|
|
|
*
|
|
|
|
|
* This was formerly part of Parser::preSaveTransform, but
|
|
|
|
|
* for non-wikitext content models they probably still want
|
|
|
|
|
* to normalize line endings without all of the other PST
|
|
|
|
|
* changes.
|
|
|
|
|
*
|
|
|
|
|
* @since 1.28
|
2017-08-11 15:46:31 +00:00
|
|
|
* @param string $text
|
2016-08-16 21:58:15 +00:00
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
public static function normalizeLineEndings( $text ) {
|
|
|
|
|
return str_replace( [ "\r\n", "\r" ], "\n", rtrim( $text ) );
|
|
|
|
|
}
|
|
|
|
|
|
2012-09-24 20:51:53 +00:00
|
|
|
/**
|
2012-10-22 10:29:52 +00:00
|
|
|
* Diff this content object with another content object.
|
2012-09-24 20:51:53 +00:00
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2013-11-16 18:53:30 +00:00
|
|
|
* @since 1.21
|
2012-09-24 20:51:53 +00:00
|
|
|
*
|
2014-05-05 16:04:43 +00:00
|
|
|
* @param Content $that The other content object to compare this content object to.
|
2018-06-26 21:14:43 +00:00
|
|
|
* @param Language|null $lang The language object to use for text segmentation.
|
2018-07-29 12:24:54 +00:00
|
|
|
* If not given, the content language is used.
|
2012-09-24 20:51:53 +00:00
|
|
|
*
|
2013-11-16 18:53:30 +00:00
|
|
|
* @return Diff A diff representing the changes that would have to be
|
2012-09-24 20:51:53 +00:00
|
|
|
* made to this content object to make it equal to $that.
|
|
|
|
|
*/
|
2024-10-16 18:58:33 +00:00
|
|
|
public function diff( Content $that, ?Language $lang = null ) {
|
2012-09-24 20:51:53 +00:00
|
|
|
$this->checkModelID( $that->getModel() );
|
2019-08-31 16:14:38 +00:00
|
|
|
/** @var self $that */
|
|
|
|
|
'@phan-var self $that';
|
2013-05-15 01:12:35 +00:00
|
|
|
// @todo could implement this in DifferenceEngine and just delegate here?
|
2012-09-24 20:51:53 +00:00
|
|
|
|
2012-12-09 03:27:02 +00:00
|
|
|
if ( !$lang ) {
|
2018-07-29 12:24:54 +00:00
|
|
|
$lang = MediaWikiServices::getInstance()->getContentLanguage();
|
2012-12-09 03:27:02 +00:00
|
|
|
}
|
2012-09-24 20:51:53 +00:00
|
|
|
|
2018-11-08 15:19:23 +00:00
|
|
|
$otext = $this->getText();
|
|
|
|
|
$ntext = $that->getText();
|
2012-09-24 20:51:53 +00:00
|
|
|
|
|
|
|
|
# Note: Use native PHP diff, external engines don't give us abstract output
|
2012-12-09 03:27:02 +00:00
|
|
|
$ota = explode( "\n", $lang->segmentForDiff( $otext ) );
|
|
|
|
|
$nta = explode( "\n", $lang->segmentForDiff( $ntext ) );
|
2012-09-24 20:51:53 +00:00
|
|
|
|
|
|
|
|
$diff = new Diff( $ota, $nta );
|
2013-11-19 21:26:16 +00:00
|
|
|
|
2012-09-24 20:51:53 +00:00
|
|
|
return $diff;
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-05 15:53:48 +00:00
|
|
|
/**
|
|
|
|
|
* This implementation provides lossless conversion between content models based
|
|
|
|
|
* on TextContent.
|
|
|
|
|
*
|
2020-07-13 08:57:12 +00:00
|
|
|
* @stable to override
|
2020-06-30 16:53:40 +00:00
|
|
|
*
|
2014-05-05 16:04:43 +00:00
|
|
|
* @param string $toModel The desired content model, use the CONTENT_MODEL_XXX flags.
|
|
|
|
|
* @param string $lossy Flag, set to "lossy" to allow lossy conversion. If lossy conversion is not
|
|
|
|
|
* allowed, full round-trip conversion is expected to work without losing information.
|
2012-11-05 15:53:48 +00:00
|
|
|
*
|
2022-07-31 00:02:18 +00:00
|
|
|
* @return Content|false A content object with the content model $toModel, or false if that
|
2014-05-05 16:04:43 +00:00
|
|
|
* conversion is not supported.
|
2020-01-18 20:25:04 +00:00
|
|
|
* @throws MWUnknownContentModelException
|
2014-03-03 17:08:05 +00:00
|
|
|
*
|
|
|
|
|
* @see Content::convert()
|
2012-11-05 15:53:48 +00:00
|
|
|
*/
|
|
|
|
|
public function convert( $toModel, $lossy = '' ) {
|
|
|
|
|
$converted = parent::convert( $toModel, $lossy );
|
|
|
|
|
|
|
|
|
|
if ( $converted !== false ) {
|
|
|
|
|
return $converted;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-18 20:25:04 +00:00
|
|
|
$toHandler = $this->getContentHandlerFactory()->getContentHandler( $toModel );
|
2012-11-05 15:53:48 +00:00
|
|
|
|
|
|
|
|
if ( $toHandler instanceof TextContentHandler ) {
|
2014-05-05 16:04:43 +00:00
|
|
|
// NOTE: ignore content serialization format - it's just text anyway.
|
2018-11-08 15:19:23 +00:00
|
|
|
$text = $this->getText();
|
2012-11-05 15:53:48 +00:00
|
|
|
$converted = $toHandler->unserializeContent( $text );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $converted;
|
|
|
|
|
}
|
2014-03-03 17:08:05 +00:00
|
|
|
|
2012-10-10 10:42:42 +00:00
|
|
|
}
|
2024-05-19 18:33:58 +00:00
|
|
|
/** @deprecated class alias since 1.43 */
|
|
|
|
|
class_alias( TextContent::class, 'TextContent' );
|