diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 0873d8722de..361ac9dfe6a 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -1059,6 +1059,15 @@ $wgAutoloadLocalClasses = array( 'UserloginTemplate' => 'includes/templates/Userlogin.php', 'UsercreateTemplate' => 'includes/templates/Usercreate.php', + # includes/title + 'PageLinkRenderer' => 'includes/title/PageLinkRenderer.php', + 'TitleFormatter' => 'includes/title/TitleFormatter.php', + 'TitleParser' => 'includes/title/TitleParser.php', + 'TitleValue' => 'includes/title/TitleValue.php', + 'MalformedTitleException' => 'includes/title/MalformedTitleException.php', + 'MediaWikiPageLinkRenderer' => 'includes/title/MediaWikiPageLinkRenderer.php', + 'MediaWikiTitleCodec' => 'includes/title/MediaWikiTitleCodec.php', + # includes/upload 'UploadBase' => 'includes/upload/UploadBase.php', 'UploadFromFile' => 'includes/upload/UploadFromFile.php', diff --git a/includes/Linker.php b/includes/Linker.php index 23bfd61cb0a..033926ac2ed 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -25,6 +25,8 @@ * for primarily page content: links, embedded images, table of contents. Links * are also used in the skin. * + * @todo: turn this into a legacy interface for HtmlPageLinkRenderer and similar services. + * * @ingroup Skins */ class Linker { diff --git a/includes/Title.php b/includes/Title.php index c85a0068fcf..995deeb26d6 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -27,6 +27,8 @@ * Optionally may contain an interwiki designation or namespace. * @note This class can fetch various kinds of data from the database; * however, it does so inefficiently. + * @note Consider using a TitleValue object instead. TitleValue is more lightweight + * and does not rely on global state or the database. * * @internal documentation reviewed 15 Mar 2010 */ @@ -85,8 +87,55 @@ class Title { var $mNotificationTimestamp = array(); // /< Associative array of user ID -> timestamp/false var $mHasSubpage; // /< Whether a page has any subpages private $mPageLanguage = false; // /< The (string) language code of the page's language and content code. + private $mTitleValue = null; // /< A corresponding TitleValue object // @} + /** + * B/C kludge: provide a TitleParser for use by Title. + * Ideally, Title would have no methods that need this. + * Avoid usage of this singleton by using TitleValue + * and the associated services when possible. + * + * @return TitleParser + */ + private static function getTitleParser() { + global $wgContLang, $wgLocalInterwikis; + + static $titleCodec = null; + static $titleCodecFingerprint = null; + + // $wgContLang and $wgLocalInterwikis may change (especially while testing), + // make sure we are using the right one. To detect changes over the course + // of a request, we remember a fingerprint of the config used to create the + // codec singleton, and re-create it if the fingerprint doesn't match. + $fingerprint = spl_object_hash( $wgContLang ) . '|' . join( '+', $wgLocalInterwikis ); + + if ( $fingerprint !== $titleCodecFingerprint ) { + $titleCodec = null; + } + + if ( !$titleCodec ) { + $titleCodec = new MediaWikiTitleCodec( $wgContLang, GenderCache::singleton(), $wgLocalInterwikis ); + $titleCodecFingerprint = $fingerprint; + } + + return $titleCodec; + } + + /** + * B/C kludge: provide a TitleParser for use by Title. + * Ideally, Title would have no methods that need this. + * Avoid usage of this singleton by using TitleValue + * and the associated services when possible. + * + * @return TitleFormatter + */ + private static function getTitleFormatter() { + //NOTE: we know that getTitleParser() returns a MediaWikiTitleCodec, + // which implements TitleFormatter. + return self::getTitleParser(); + } + /** * Constructor */ @@ -110,6 +159,20 @@ class Title { } } + /** + * Create a new Title from a TitleValue + * + * @param TitleValue $titleValue, assumed to be safe. + * + * @return Title + */ + public static function newFromTitleValue( TitleValue $titleValue ) { + return self::makeTitle( + $titleValue->getNamespace(), + $titleValue->getText(), + $titleValue->getFragment() ); + } + /** * Create a new Title from text, such as what one would find in a link. De- * codes any HTML entities in the text. @@ -147,7 +210,7 @@ class Title { $t = new Title(); $t->mDbkeyform = str_replace( ' ', '_', $filteredText ); - $t->mDefaultNamespace = $defaultNamespace; + $t->mDefaultNamespace = intval( $defaultNamespace ); if ( $t->secureAndSplit() ) { if ( $defaultNamespace == NS_MAIN ) { @@ -473,6 +536,8 @@ class Title { * Note that this doesn't pick up many things that could be wrong with titles, but that * replacing this regex with something valid will make many titles valid. * + * @todo: move this into MediaWikiTitleCodec + * * @return String regex string */ static function getTitleInvalidRegex() { @@ -741,6 +806,31 @@ class Title { return Interwiki::fetch( $this->mInterwiki )->getWikiID(); } + /** + * Get a TitleValue object representing this Title. + * + * @note: Not all valid Titles have a corresponding valid TitleValue + * (e.g. TitleValues cannot represent page-local links that have a + * fragment but no title text). + * + * @return TitleValue|null + */ + public function getTitleValue() { + if ( $this->mTitleValue === null ) { + try { + $this->mTitleValue = new TitleValue( + $this->getNamespace(), + $this->getDBkey(), + $this->getFragment() ); + } catch ( InvalidArgumentException $ex ) { + wfDebug( __METHOD__ . ': Can\'t create a TitleValue for [[' . + $this->getPrefixedText() . ']]: ' . $ex->getMessage() . "\n" ); + } + } + + return $this->mTitleValue; + } + /** * Get the text form (spaces not underscores) of the main part * @@ -830,8 +920,6 @@ class Title { * @return String: Namespace text */ public function getNsText() { - global $wgContLang; - if ( $this->isExternal() ) { // This probably shouldn't even happen. ohh man, oh yuck. // But for interwiki transclusion it sometimes does. @@ -844,13 +932,13 @@ class Title { } } - if ( $wgContLang->needsGenderDistinction() && - MWNamespace::hasGenderDistinction( $this->mNamespace ) ) { - $gender = GenderCache::singleton()->getGenderOf( $this->getText(), __METHOD__ ); - return $wgContLang->getGenderNsText( $this->mNamespace, $gender ); + try { + $formatter = $this->getTitleFormatter(); + return $formatter->getNamespaceName( $this->mNamespace, $this->mDbkeyform ); + } catch ( InvalidArgumentException $ex ) { + wfDebug( __METHOD__ . ': ' . $ex->getMessage() . "\n" ); + return false; } - - return $wgContLang->getNsText( $this->mNamespace ); } /** @@ -3200,8 +3288,6 @@ class Title { * @return Bool true on success */ private function secureAndSplit() { - global $wgContLang, $wgLocalInterwikis; - # Initialisation $this->mInterwiki = ''; $this->mFragment = ''; @@ -3209,179 +3295,25 @@ class Title { $dbkey = $this->mDbkeyform; - # Strip Unicode bidi override characters. - # Sometimes they slip into cut-n-pasted page titles, where the - # override chars get included in list displays. - $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); - - # Clean up whitespace - # Note: use of the /u option on preg_replace here will cause - # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, - # conveniently disabling them. - $dbkey = preg_replace( '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', '_', $dbkey ); - $dbkey = trim( $dbkey, '_' ); - - if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) { - # Contained illegal UTF-8 sequences or forbidden Unicode chars. - return false; - } - - $this->mDbkeyform = $dbkey; - - # Initial colon indicates main namespace rather than specified default - # but should not create invalid {ns,title} pairs such as {0,Project:Foo} - if ( $dbkey !== '' && ':' == $dbkey[0] ) { - $this->mNamespace = NS_MAIN; - $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing - $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace - } - - if ( $dbkey == '' ) { - return false; - } - - # Namespace or interwiki prefix - $firstPass = true; - $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; - do { - $m = array(); - if ( preg_match( $prefixRegexp, $dbkey, $m ) ) { - $p = $m[1]; - if ( ( $ns = $wgContLang->getNsIndex( $p ) ) !== false ) { - # Ordinary namespace - $dbkey = $m[2]; - $this->mNamespace = $ns; - # For Talk:X pages, check if X has a "namespace" prefix - if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) { - if ( $wgContLang->getNsIndex( $x[1] ) ) { - # Disallow Talk:File:x type titles... - return false; - } elseif ( Interwiki::isValidInterwiki( $x[1] ) ) { - # Disallow Talk:Interwiki:x type titles... - return false; - } - } - } elseif ( Interwiki::isValidInterwiki( $p ) ) { - if ( !$firstPass ) { - # Can't make a local interwiki link to an interwiki link. - # That's just crazy! - return false; - } - - # Interwiki link - $dbkey = $m[2]; - $this->mInterwiki = $wgContLang->lc( $p ); - - # Redundant interwiki prefix to the local wiki - foreach ( $wgLocalInterwikis as $localIW ) { - if ( 0 == strcasecmp( $this->mInterwiki, $localIW ) ) { - if ( $dbkey == '' ) { - # Can't have an empty self-link - return false; - } - $this->mInterwiki = ''; - $firstPass = false; - # Do another namespace split... - continue 2; - } - } - - # If there's an initial colon after the interwiki, that also - # resets the default namespace - if ( $dbkey !== '' && $dbkey[0] == ':' ) { - $this->mNamespace = NS_MAIN; - $dbkey = substr( $dbkey, 1 ); - } - } - # If there's no recognized interwiki or namespace, - # then let the colon expression be part of the title. - } - break; - } while ( true ); - - $fragment = strstr( $dbkey, '#' ); - if ( false !== $fragment ) { - $this->setFragment( $fragment ); - $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) ); - # remove whitespace again: prevents "Foo_bar_#" - # becoming "Foo_bar_" - $dbkey = preg_replace( '/_*$/', '', $dbkey ); - } - - # Reject illegal characters. - $rxTc = self::getTitleInvalidRegex(); - if ( preg_match( $rxTc, $dbkey ) ) { - return false; - } - - # Pages with "/./" or "/../" appearing in the URLs will often be un- - # reachable due to the way web browsers deal with 'relative' URLs. - # Also, they conflict with subpage syntax. Forbid them explicitly. - if ( - strpos( $dbkey, '.' ) !== false && - ( - $dbkey === '.' || $dbkey === '..' || - strpos( $dbkey, './' ) === 0 || - strpos( $dbkey, '../' ) === 0 || - strpos( $dbkey, '/./' ) !== false || - strpos( $dbkey, '/../' ) !== false || - substr( $dbkey, -2 ) == '/.' || - substr( $dbkey, -3 ) == '/..' - ) - ) { - return false; - } - - # Magic tilde sequences? Nu-uh! - if ( strpos( $dbkey, '~~~' ) !== false ) { - return false; - } - - # Limit the size of titles to 255 bytes. This is typically the size of the - # underlying database field. We make an exception for special pages, which - # don't need to be stored in the database, and may edge over 255 bytes due - # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] - if ( - ( $this->mNamespace != NS_SPECIAL && strlen( $dbkey ) > 255 ) - || strlen( $dbkey ) > 512 - ) { - return false; - } - - # Normally, all wiki links are forced to have an initial capital letter so [[foo]] - # and [[Foo]] point to the same place. Don't force it for interwikis, since the - # other site might be case-sensitive. - $this->mUserCaseDBKey = $dbkey; - if ( !$this->isExternal() ) { - $dbkey = self::capitalize( $dbkey, $this->mNamespace ); - } - - # Can't make a link to a namespace alone... "empty" local links can only be - # self-links with a fragment identifier. - if ( $dbkey == '' && !$this->isExternal() && $this->mNamespace != NS_MAIN ) { - return false; - } - - // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. - // IP names are not allowed for accounts, and can only be referring to - // edits from the IP. Given '::' abbreviations and caps/lowercaps, - // there are numerous ways to present the same IP. Having sp:contribs scan - // them all is silly and having some show the edits and others not is - // inconsistent. Same for talk/userpages. Keep them normalized instead. - if ( $this->mNamespace == NS_USER || $this->mNamespace == NS_USER_TALK ) { - $dbkey = IP::sanitizeIP( $dbkey ); - } - - // Any remaining initial :s are illegal. - if ( $dbkey !== '' && ':' == $dbkey[0] ) { + try { + // @note: splitTitleString() is a temporary hack to allow MediaWikiTitleCodec to share + // the parsing code with Title, while avoiding massive refactoring. + // @todo: get rid of secureAndSplit, refactor parsing code. + $parser = $this->getTitleParser(); + $parts = $parser->splitTitleString( $dbkey, $this->getDefaultNamespace() ); + } catch ( MalformedTitleException $ex ) { return false; } # Fill fields - $this->mDbkeyform = $dbkey; - $this->mUrlform = wfUrlencode( $dbkey ); + $this->setFragment( '#' . $parts['fragment'] ); + $this->mInterwiki = $parts['interwiki']; + $this->mNamespace = $parts['namespace']; + $this->mUserCaseDBKey = $parts['user_case_dbkey']; - $this->mTextform = str_replace( '_', ' ', $dbkey ); + $this->mDbkeyform = $parts['dbkey']; + $this->mUrlform = wfUrlencode( $this->mDbkeyform ); + $this->mTextform = str_replace( '_', ' ', $this->mDbkeyform ); # We already know that some pages won't be in the database! if ( $this->isExternal() || $this->mNamespace == NS_SPECIAL ) { diff --git a/includes/specials/SpecialCategories.php b/includes/specials/SpecialCategories.php index d01bfd7d3ca..b0705dc6fbd 100644 --- a/includes/specials/SpecialCategories.php +++ b/includes/specials/SpecialCategories.php @@ -26,18 +26,55 @@ */ class SpecialCategories extends SpecialPage { + /** + * @var PageLinkRenderer + */ + protected $linkRenderer = null; + function __construct() { parent::__construct( 'Categories' ); + + // Since we don't control the constructor parameters, we can't inject services that way. + // Instead, we initialize services in the execute() method, and allow them to be overridden + // using the initServices() method. + } + + /** + * Initialize or override the PageLinkRenderer SpecialCategories collaborates with. + * Useful mainly for testing. + * + * @todo: the pager should also be injected, and de-coupled from the rendering logic. + * + * @param PageLinkRenderer $linkRenderer + */ + public function setPageLinkRenderer( + PageLinkRenderer $linkRenderer + ) { + $this->linkRenderer = $linkRenderer; + } + + /** + * Initialize any services we'll need (unless it has already been provided via a setter). + * This allows for dependency injection even though we don't control object creation. + */ + private function initServices() { + if ( !$this->linkRenderer ) { + $lang = $this->getContext()->getLanguage(); + $titleFormatter = new MediaWikiTitleCodec( $lang, GenderCache::singleton() ); + $this->linkRenderer = new MediaWikiPageLinkRenderer( $titleFormatter ); + } } function execute( $par ) { + $this->initServices(); + $this->setHeaders(); $this->outputHeader(); $this->getOutput()->allowClickjacking(); $from = $this->getRequest()->getText( 'from', $par ); - $cap = new CategoryPager( $this->getContext(), $from ); + $cap = new CategoryPager( $this->getContext(), $from, $this->linkRenderer ); $cap->doQuery(); $this->getOutput()->addHTML( @@ -63,7 +100,19 @@ class SpecialCategories extends SpecialPage { * @ingroup SpecialPage Pager */ class CategoryPager extends AlphabeticPager { - function __construct( IContextSource $context, $from ) { + + /** + * @var PageLinkRenderer + */ + protected $linkRenderer; + + /** + * @param IContextSource $context + * @param string $from + * @param PageLinkRenderer $linkRenderer + */ + function __construct( IContextSource $context, $from, PageLinkRenderer $linkRenderer + ) { parent::__construct( $context ); $from = str_replace( ' ', '_', $from ); if ( $from !== '' ) { @@ -71,6 +120,8 @@ class CategoryPager extends AlphabeticPager { $this->setOffset( $from ); $this->setIncludeOffset( true ); } + + $this->linkRenderer = $linkRenderer; } function getQueryInfo() { @@ -120,11 +171,12 @@ class CategoryPager extends AlphabeticPager { } function formatRow( $result ) { - $title = Title::makeTitle( NS_CATEGORY, $result->cat_title ); - $titleText = Linker::link( $title, htmlspecialchars( $title->getText() ) ); - $count = $this->msg( 'nmembers' )->numParams( $result->cat_pages )->escaped(); + $title = new TitleValue( NS_CATEGORY, $result->cat_title ); + $text = $title->getText(); + $link = $this->linkRenderer->renderHtmlLink( $title, $text ); - return Xml::tags( 'li', null, $this->getLanguage()->specialList( $titleText, $count ) ) . "\n"; + $count = $this->msg( 'nmembers' )->numParams( $result->cat_pages )->escaped(); + return Html::rawElement( 'li', null, $this->getLanguage()->specialList( $link, $count ) ) . "\n"; } public function getStartForm( $from ) { diff --git a/includes/specials/SpecialLinkSearch.php b/includes/specials/SpecialLinkSearch.php index 0b5b8257138..59953473506 100644 --- a/includes/specials/SpecialLinkSearch.php +++ b/includes/specials/SpecialLinkSearch.php @@ -27,6 +27,12 @@ * @ingroup SpecialPage */ class LinkSearchPage extends QueryPage { + + /** + * @var PageLinkRenderer + */ + protected $linkRenderer = null; + function setParams( $params ) { $this->mQuery = $params['query']; $this->mNs = $params['namespace']; @@ -35,6 +41,36 @@ class LinkSearchPage extends QueryPage { function __construct( $name = 'LinkSearch' ) { parent::__construct( $name ); + + // Since we don't control the constructor parameters, we can't inject services that way. + // Instead, we initialize services in the execute() method, and allow them to be overridden + // using the setServices() method. + } + + /** + * Initialize or override the PageLinkRenderer LinkSearchPage collaborates with. + * Useful mainly for testing. + * + * @todo: query logic and rendering logic should be split and also injected + * + * @param PageLinkRenderer $linkRenderer + */ + public function setPageLinkRenderer( + PageLinkRenderer $linkRenderer + ) { + $this->linkRenderer = $linkRenderer; + } + + /** + * Initialize any services we'll need (unless it has already been provided via a setter). + * This allows for dependency injection even though we don't control object creation. + */ + private function initServices() { + if ( !$this->linkRenderer ) { + $lang = $this->getContext()->getLanguage(); + $titleFormatter = new MediaWikiTitleCodec( $lang, GenderCache::singleton() ); + $this->linkRenderer = new MediaWikiPageLinkRenderer( $titleFormatter ); + } } function isCacheable() { @@ -44,6 +80,8 @@ class LinkSearchPage extends QueryPage { function execute( $par ) { global $wgUrlProtocols, $wgMiserMode, $wgScript; + $this->initServices(); + $this->setHeaders(); $this->outputHeader(); @@ -219,9 +257,10 @@ class LinkSearchPage extends QueryPage { * @return string */ function formatResult( $skin, $result ) { - $title = Title::makeTitle( $result->namespace, $result->title ); + $title = new TitleValue( (int)$result->namespace, $result->title ); + $pageLink = $this->linkRenderer->renderHtmlLink( $title ); + $url = $result->url; - $pageLink = Linker::linkKnown( $title ); $urlLink = Linker::makeExternalLink( $url, $url ); return $this->msg( 'linksearch-line' )->rawParams( $urlLink, $pageLink )->escaped(); diff --git a/includes/title/MalformedTitleException.php b/includes/title/MalformedTitleException.php new file mode 100644 index 00000000000..4b14a195f42 --- /dev/null +++ b/includes/title/MalformedTitleException.php @@ -0,0 +1,33 @@ +formatter = $formatter; + $this->baseUrl = $baseUrl; + } + + /** + * Returns the (partial) URL for the given page (including any section identifier). + * + * @param TitleValue $page The link's target + * @param array $params any additional URL parameters. + * + * @return string + */ + public function getPageUrl( TitleValue $page, $params = array() ) { + //TODO: move the code from Linker::linkUrl here! + //The below is just a rough estimation! + + $name = $this->formatter->getPrefixedText( $page ); + $name = str_replace( ' ', '_', $name ); + $name = wfUrlencode( $name ); + + $url = $this->baseUrl . $name; + + if ( $params ) { + $separator = ( strpos( $url, '?' ) ) ? '&' : '?'; + $url .= $separator . wfArrayToCgi( $params ); + } + + $fragment = $page->getFragment(); + if ( $fragment !== '' ) { + $url = $url . '#' . wfUrlencode( $fragment ); + } + + return $url; + } + + /** + * Returns an HTML link to the given page, using the given surface text. + * + * @param TitleValue $page The link's target + * @param string $text The link's surface text (will be derived from $page if not given). + * + * @return string + */ + public function renderHtmlLink( TitleValue $page, $text = null ) { + if ( $text === null ) { + $text = $this->formatter->getFullText( $page ); + } + + // TODO: move the logic implemented by Linker here, + // using $this->formatter and $this->baseUrl, and + // re-implement Linker to use a HtmlPageLinkRenderer. + $title = Title::newFromTitleValue( $page ); + $link = Linker::link( $title, htmlspecialchars( $text ) ); + return $link; + } + + /** + * Returns a wikitext link to the given page, using the given surface text. + * + * @param TitleValue $page The link's target + * @param string $text The link's surface text (will be derived from $page if not given). + * + * @return string + */ + public function renderWikitextLink( TitleValue $page, $text = null ) { + if ( $text === null ) { + $text = $this->formatter->getFullText( $page ); + } + + $name = $this->formatter->getFullText( $page ); + + return '[[:' . $name . '|' . wfEscapeWikiText( $text ) . ']]'; + } +} diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php new file mode 100644 index 00000000000..878f95d5e89 --- /dev/null +++ b/includes/title/MediaWikiTitleCodec.php @@ -0,0 +1,391 @@ +language = $language; + $this->genderCache = $genderCache; + $this->localInterwikis = (array)$localInterwikis; + } + + /** + * @see TitleFormatter::getNamespaceName() + * + * @param int $namespace + * @param string $text + * + * @throws InvalidArgumentException if the namespace is invalid + * @return String + */ + public function getNamespaceName( $namespace, $text ) { + if ( $this->language->needsGenderDistinction() && + MWNamespace::hasGenderDistinction( $namespace ) ) { + + //NOTE: we are assuming here that the title text is a user name! + $gender = $this->genderCache->getGenderOf( $text, __METHOD__ ); + $name = $this->language->getGenderNsText( $namespace, $gender ); + } else { + $name = $this->language->getNsText( $namespace ); + } + + if ( $name === false ) { + throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace ); + } + + return $name; + } + + /** + * @see TitleFormatter::formatTitle() + * + * @param int|bool $namespace The namespace ID (or false, if the namespace should be ignored) + * @param string $text The page title. Should be valid. Only minimal normalization is applied. + * Underscores will be replaced. + * @param string $fragment The fragment name (may be empty). + * + * @throws InvalidArgumentException if the namespace is invalid + * @return string + */ + public function formatTitle( $namespace, $text, $fragment = '' ) { + if ( $namespace !== false ) { + $namespace = $this->getNamespaceName( $namespace, $text ); + + if ( $namespace !== '' ) { + $text = $namespace . ':' . $text; + } + } + + if ( $fragment !== '' ) { + $text = $text . '#' . $fragment; + } + + $text = str_replace( '_', ' ', $text ); + + return $text; + } + + /** + * Parses the given text and constructs a TitleValue. Normalization + * is applied according to the rules appropriate for the form specified by $form. + * + * @param string $text the text to parse + * @param int $defaultNamespace namespace to assume per default (usually NS_MAIN) + * + * @throws MalformedTitleException + * @return TitleValue + */ + public function parseTitle( $text, $defaultNamespace ) { + // NOTE: this is an ugly cludge that allows this class to share the + // code for parsing with the old Title class. The parser code should + // be refactored to avoid this. + $parts = $this->splitTitleString( $text, $defaultNamespace ); + + // Interwiki links are not supported by TitleValue + if ( $parts['interwiki'] !== '' ) { + throw new MalformedTitleException( 'Title must not contain an interwiki prefix: ' . $text ); + } + + // Relative fragment links are not supported by TitleValue + if ( $parts['dbkey'] === '' ) { + throw new MalformedTitleException( 'Title must not be empty: ' . $text ); + } + + return new TitleValue( $parts['namespace'], $parts['dbkey'], $parts['fragment'] ); + } + + /** + * @see TitleFormatter::getText() + * + * @param TitleValue $title + * + * @return string $title->getText() + */ + public function getText( TitleValue $title ) { + return $this->formatTitle( false, $title->getText(), '' ); + } + + /** + * @see TitleFormatter::getText() + * + * @param TitleValue $title + * + * @return string + */ + public function getPrefixedText( TitleValue $title ) { + return $this->formatTitle( $title->getNamespace(), $title->getText(), '' ); + } + + /** + * @see TitleFormatter::getText() + * + * @param TitleValue $title + * + * @return string + */ + public function getFullText( TitleValue $title ) { + return $this->formatTitle( $title->getNamespace(), $title->getText(), $title->getFragment() ); + } + + /** + * Normalizes and splits a title string. + * + * This function removes illegal characters, splits off the interwiki and + * namespace prefixes, sets the other forms, and canonicalizes + * everything. + * + * @todo: this method is only exposed as a temporary measure to ease refactoring. + * It was copied with minimal changes from Title::secureAndSplit(). + * + * @todo: This method should be split up and an appropriate interface + * defined for use by the Title class. + * + * @param string $text + * @param int $defaultNamespace + * + * @throws MalformedTitleException If $text is not a valid title string. + * @return array A mapp with the fields 'interwiki', 'fragment', 'namespace', + * 'user_case_dbkey', and 'dbkey'. + */ + public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) { + $dbkey = str_replace( ' ', '_', $text ); + + # Initialisation + $parts = array( + 'interwiki' => '', + 'fragment' => '', + 'namespace' => $defaultNamespace, + 'dbkey' => $dbkey, + 'user_case_dbkey' => $dbkey, + ); + + # Strip Unicode bidi override characters. + # Sometimes they slip into cut-n-pasted page titles, where the + # override chars get included in list displays. + $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); + + # Clean up whitespace + # Note: use of the /u option on preg_replace here will cause + # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, + # conveniently disabling them. + $dbkey = preg_replace( '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', '_', $dbkey ); + $dbkey = trim( $dbkey, '_' ); + + if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) { + # Contained illegal UTF-8 sequences or forbidden Unicode chars. + throw new MalformedTitleException( 'Bad UTF-8 sequences found in title: ' . $text ); + } + + $parts['dbkey'] = $dbkey; + + # Initial colon indicates main namespace rather than specified default + # but should not create invalid {ns,title} pairs such as {0,Project:Foo} + if ( $dbkey !== '' && ':' == $dbkey[0] ) { + $parts['namespace'] = NS_MAIN; + $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing + $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace + } + + if ( $dbkey == '' ) { + throw new MalformedTitleException( 'Empty title: ' . $text ); + } + + # Namespace or interwiki prefix + $firstPass = true; + $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; + do { + $m = array(); + if ( preg_match( $prefixRegexp, $dbkey, $m ) ) { + $p = $m[1]; + if ( ( $ns = $this->language->getNsIndex( $p ) ) !== false ) { + # Ordinary namespace + $dbkey = $m[2]; + $parts['namespace'] = $ns; + # For Talk:X pages, check if X has a "namespace" prefix + if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) { + if ( $this->language->getNsIndex( $x[1] ) ) { + # Disallow Talk:File:x type titles... + throw new MalformedTitleException( 'Bad namespace prefix: ' . $text ); + } elseif ( Interwiki::isValidInterwiki( $x[1] ) ) { + //TODO: get rid of global state! + # Disallow Talk:Interwiki:x type titles... + throw new MalformedTitleException( 'Interwiki prefix found in title: ' . $text ); + } + } + } elseif ( Interwiki::isValidInterwiki( $p ) ) { + if ( !$firstPass ) { + //TODO: get rid of global state! + # Can't make a local interwiki link to an interwiki link. + # That's just crazy! + throw new MalformedTitleException( 'Interwiki prefix found in title: ' . $text ); + } + + # Interwiki link + $dbkey = $m[2]; + $parts['interwiki'] = $this->language->lc( $p ); + + # Redundant interwiki prefix to the local wiki + foreach ( $this->localInterwikis as $localIW ) { + if ( 0 == strcasecmp( $parts['interwiki'], $localIW ) ) { + if ( $dbkey == '' ) { + # Can't have an empty self-link + throw new MalformedTitleException( 'Local interwiki with empty title: ' . $text ); + } + $parts['interwiki'] = ''; + $firstPass = false; + + # Do another namespace split... + continue 2; + } + } + + # If there's an initial colon after the interwiki, that also + # resets the default namespace + if ( $dbkey !== '' && $dbkey[0] == ':' ) { + $parts['namespace'] = NS_MAIN; + $dbkey = substr( $dbkey, 1 ); + } + } + # If there's no recognized interwiki or namespace, + # then let the colon expression be part of the title. + } + break; + } while ( true ); + + $fragment = strstr( $dbkey, '#' ); + if ( false !== $fragment ) { + $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) ); + $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) ); + # remove whitespace again: prevents "Foo_bar_#" + # becoming "Foo_bar_" + $dbkey = preg_replace( '/_*$/', '', $dbkey ); + } + + # Reject illegal characters. + $rxTc = Title::getTitleInvalidRegex(); + if ( preg_match( $rxTc, $dbkey ) ) { + throw new MalformedTitleException( 'Illegal characters found in title: ' . $text ); + } + + # Pages with "/./" or "/../" appearing in the URLs will often be un- + # reachable due to the way web browsers deal with 'relative' URLs. + # Also, they conflict with subpage syntax. Forbid them explicitly. + if ( + strpos( $dbkey, '.' ) !== false && + ( + $dbkey === '.' || $dbkey === '..' || + strpos( $dbkey, './' ) === 0 || + strpos( $dbkey, '../' ) === 0 || + strpos( $dbkey, '/./' ) !== false || + strpos( $dbkey, '/../' ) !== false || + substr( $dbkey, -2 ) == '/.' || + substr( $dbkey, -3 ) == '/..' + ) + ) { + throw new MalformedTitleException( 'Bad title: ' . $text ); + } + + # Magic tilde sequences? Nu-uh! + if ( strpos( $dbkey, '~~~' ) !== false ) { + throw new MalformedTitleException( 'Bad title: ' . $text ); + } + + # Limit the size of titles to 255 bytes. This is typically the size of the + # underlying database field. We make an exception for special pages, which + # don't need to be stored in the database, and may edge over 255 bytes due + # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] + if ( + ( $parts['namespace'] != NS_SPECIAL && strlen( $dbkey ) > 255 ) + || strlen( $dbkey ) > 512 + ) { + throw new MalformedTitleException( 'Title too long: ' . substr( $dbkey, 0, 255 ) . '...' ); + } + + # Normally, all wiki links are forced to have an initial capital letter so [[foo]] + # and [[Foo]] point to the same place. Don't force it for interwikis, since the + # other site might be case-sensitive. + $parts['user_case_dbkey'] = $dbkey; + if ( $parts['interwiki'] === '' ) { + $dbkey = Title::capitalize( $dbkey, $parts['namespace'] ); + } + + # Can't make a link to a namespace alone... "empty" local links can only be + # self-links with a fragment identifier. + if ( $dbkey == '' && $parts['interwiki'] === '' ) { + if ( $parts['namespace'] != NS_MAIN ) { + throw new MalformedTitleException( 'Empty title: ' . $text ); + } + } + + // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. + // IP names are not allowed for accounts, and can only be referring to + // edits from the IP. Given '::' abbreviations and caps/lowercaps, + // there are numerous ways to present the same IP. Having sp:contribs scan + // them all is silly and having some show the edits and others not is + // inconsistent. Same for talk/userpages. Keep them normalized instead. + if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) { + $dbkey = IP::sanitizeIP( $dbkey ); + } + + // Any remaining initial :s are illegal. + if ( $dbkey !== '' && ':' == $dbkey[0] ) { + throw new MalformedTitleException( 'Title must not start with a colon: ' . $text ); + } + + # Fill fields + $parts['dbkey'] = $dbkey; + return $parts; + } + +} diff --git a/includes/title/PageLinkRenderer.php b/includes/title/PageLinkRenderer.php new file mode 100644 index 00000000000..240a9618949 --- /dev/null +++ b/includes/title/PageLinkRenderer.php @@ -0,0 +1,69 @@ +namespace = $namespace; + $this->dbkey = $dbkey; + $this->fragment = $fragment; + } + + /** + * @return int + */ + public function getNamespace() { + return $this->namespace; + } + + /** + * @return string + */ + public function getFragment() { + return $this->fragment; + } + + /** + * Returns the title's DB key, as supplied to the constructor, + * without namespace prefix or fragment. + * + * @return string + */ + public function getDBkey() { + return $this->dbkey; + } + + /** + * Returns the title in text form, + * without namespace prefix or fragment. + * + * This is computed from the DB key by replacing any underscores with spaces. + * + * @note: To get a title string that includes the namespace and/or fragment, + * use a TitleFormatter. + * + * @return string + */ + public function getText() { + return str_replace( '_', ' ', $this->getDBkey() ); + } + + /** + * Creates a new TitleValue for a different fragment of the same page. + * + * @param string $fragment The fragment name, or "" for the entire page. + * + * @return TitleValue + */ + public function createFragmentTitle( $fragment ) { + return new TitleValue( $this->namespace, $this->dbkey, $fragment ); + } + + /** + * Returns a string representation of the title, for logging. This is purely informative + * and must not be used programmatically. Use the appropriate TitleFormatter to generate + * the correct string representation for a given use. + * + * @return string + */ + public function __toString() { + $name = $this->namespace . ':' . $this->dbkey; + + if ( $this->fragment !== '' ) { + $name .= '#' . $this->fragment; + } + + return $name; + } +} diff --git a/tests/phpunit/includes/TitleTest.php b/tests/phpunit/includes/TitleTest.php index ba3c6c07750..809382488df 100644 --- a/tests/phpunit/includes/TitleTest.php +++ b/tests/phpunit/includes/TitleTest.php @@ -3,6 +3,8 @@ /** * @group Database * ^--- needed for language cache stuff + * + * @group Title */ class TitleTest extends MediaWikiTestCase { protected function setUp() { @@ -38,6 +40,7 @@ class TitleTest extends MediaWikiTestCase { * See also mediawiki.Title.test.js * @covers Title::secureAndSplit * @todo This method should be split into 2 separate tests each with a provider + * @note: This mainly tests MediaWikiTitleCodec::parseTitle(). */ public function testSecureAndSplit() { $this->setMwGlobals( array( @@ -62,6 +65,7 @@ class TitleTest extends MediaWikiTestCase { 'A \'B\'', '.com', '~', + '#', '"', '\'', 'Talk:Sandbox', @@ -130,6 +134,7 @@ class TitleTest extends MediaWikiTestCase { str_repeat( 'x', 256 ), // Namespace prefix without actual title 'Talk:', + 'Talk:#', 'Category: ', 'Category: #bar', // interwiki prefix @@ -512,4 +517,74 @@ class TitleTest extends MediaWikiTestCase { array( 'User:John_Doe/subOne', 'subOne' ), ); } + + public function provideNewFromTitleValue() { + return array( + array( new TitleValue( NS_MAIN, 'Foo' ) ), + array( new TitleValue( NS_MAIN, 'Foo', 'bar' ) ), + array( new TitleValue( NS_USER, 'Hansi_Maier' ) ), + ); + } + + /** + * @dataProvider provideNewFromTitleValue + */ + public function testNewFromTitleValue( TitleValue $value ) { + $title = Title::newFromTitleValue( $value ); + + $dbkey = str_replace( ' ', '_', $value->getText() ); + $this->assertEquals( $dbkey, $title->getDBkey() ); + $this->assertEquals( $value->getNamespace(), $title->getNamespace() ); + $this->assertEquals( $value->getFragment(), $title->getFragment() ); + } + + public function provideGetTitleValue() { + return array( + array( 'Foo' ), + array( 'Foo#bar' ), + array( 'User:Hansi_Maier' ), + ); + } + + /** + * @dataProvider provideGetTitleValue + */ + public function testGetTitleValue( $text ) { + $title = Title::newFromText( $text ); + $value = $title->getTitleValue(); + + $dbkey = str_replace( ' ', '_', $value->getText() ); + $this->assertEquals( $title->getDBkey(), $dbkey ); + $this->assertEquals( $title->getNamespace(), $value->getNamespace() ); + $this->assertEquals( $title->getFragment(), $value->getFragment() ); + } + + public function provideGetFragment() { + return array( + array( 'Foo', '' ), + array( 'Foo#bar', 'bar' ), + array( 'Foo#bär', 'bär' ), + + // Inner whitespace is normalized + array( 'Foo#bar_bar', 'bar bar' ), + array( 'Foo#bar bar', 'bar bar' ), + array( 'Foo#bar bar', 'bar bar' ), + + // Leading whitespace is kept, trailing whitespace is trimmed. + // XXX: Is this really want we want? + array( 'Foo#_bar_bar_', ' bar bar' ), + array( 'Foo# bar bar ', ' bar bar' ), + ); + } + + /** + * @dataProvider provideGetFragment + * + * @param $full + * @param $fragment + */ + public function testGetFragment( $full, $fragment ) { + $title = Title::newFromText( $full ); + $this->assertEquals( $fragment, $title->getFragment() ); + } } diff --git a/tests/phpunit/includes/title/MediaWikiPageLinkRendererTest.php b/tests/phpunit/includes/title/MediaWikiPageLinkRendererTest.php new file mode 100644 index 00000000000..73d7ff9572a --- /dev/null +++ b/tests/phpunit/includes/title/MediaWikiPageLinkRendererTest.php @@ -0,0 +1,160 @@ +getMockBuilder( 'GenderCache' ) + ->disableOriginalConstructor() + ->getMock(); + + $genderCache->expects( $this->any() ) + ->method( 'getGenderOf' ) + ->will( $this->returnValue( 'female' ) ); + + return $genderCache; + } + + public function provideGetPageUrl() { + return array( + array( + new TitleValue( NS_MAIN, 'Foo_Bar' ), + array(), + '/Foo_Bar' + ), + array( + new TitleValue( NS_USER, 'Hansi_Maier', 'stuff' ), + array( 'foo' => 'bar' ), + '/User:Hansi_Maier?foo=bar#stuff' + ), + ); + } + + /** + * @dataProvider provideGetPageUrl + */ + public function testGetPageUrl( TitleValue $title, $params, $url ) { + // NOTE: was of Feb 2014, MediaWikiPageLinkRenderer *ignores* the + // WikitextTitleFormatter we pass here, and relies on the Linker + // class for generating the link! This may break the test e.g. + // of Linker uses a different language for the namespace names. + + $lang = Language::factory( 'en' ); + + $formatter = new MediaWikiTitleCodec( $lang, $this->getGenderCache() ); + $renderer = new MediaWikiPageLinkRenderer( $formatter, '/' ); + $actual = $renderer->getPageUrl( $title, $params ); + + $this->assertEquals( $url, $actual ); + } + + public function provideRenderHtmlLink() { + return array( + array( + new TitleValue( NS_MAIN, 'Foo_Bar' ), + 'Foo Bar', + '!Foo Bar!' + ), + array( + //NOTE: Linker doesn't include fragments in "broken" links + //NOTE: once this no longer uses Linker, we will get "2" instead of "User" for the namespace. + new TitleValue( NS_USER, 'Hansi_Maier', 'stuff' ), + 'Hansi Maier\'s Stuff', + '!renderWikitextLink( $title, $text ); + + $this->assertEquals( $expected, $actual ); + } +} diff --git a/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php new file mode 100644 index 00000000000..e4f9396b6f3 --- /dev/null +++ b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php @@ -0,0 +1,372 @@ +setMwGlobals( array( + 'wgLanguageCode' => 'en', + 'wgContLang' => Language::factory( 'en' ), + // User language + 'wgLang' => Language::factory( 'en' ), + 'wgAllowUserJs' => false, + 'wgDefaultLanguageVariant' => false, + 'wgLocalInterwikis' => array( 'localtestiw' ), + + // NOTE: this is why global state is evil. + // TODO: refactor access to the interwiki codes so it can be injected. + 'wgHooks' => array( + 'InterwikiLoadPrefix' => array( + function ( $prefix, &$data ) { + if ( $prefix === 'localtestiw' ) { + $data = array( 'iw_url' => 'localtestiw' ); + } elseif ( $prefix === 'remotetestiw' ) { + $data = array( 'iw_url' => 'remotetestiw' ); + } + return false; + } + ) + ) + ) ); + } + + /** + * Returns a mock GenderCache that will consider a user "female" if the + * first part of the user name ends with "a". + * + * @return GenderCache + */ + private function getGenderCache() { + $genderCache = $this->getMockBuilder( 'GenderCache' ) + ->disableOriginalConstructor() + ->getMock(); + + $genderCache->expects( $this->any() ) + ->method( 'getGenderOf' ) + ->will( $this->returnCallback( function( $userName ) { + return preg_match( '/^[^- _]+a( |_|$)/u', $userName ) ? 'female' : 'male'; + } ) ); + + return $genderCache; + } + + protected function makeCodec( $lang ) { + $gender = $this->getGenderCache(); + $lang = Language::factory( $lang ); + return new MediaWikiTitleCodec( $lang, $gender ); + } + + public function provideFormat() { + return array( + array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ), + array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ), + array( false, 'Hansi_Maier', '', 'en', 'Hansi Maier' ), + array( NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier', 'User talk:Hansi maier' ), + + // getGenderCache() provides a mock that considers first + // names ending in "a" to be female. + array( NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ), + ); + } + + /** + * @dataProvider provideFormat + */ + public function testFormat( $namespace, $text, $fragment, $lang, $expected, $normalized = null ) { + if ( $normalized === null ) { + $normalized = $expected; + } + + $codec = $this->makeCodec( $lang ); + $actual = $codec->formatTitle( $namespace, $text, $fragment ); + + $this->assertEquals( $expected, $actual, 'formatted' ); + + // test round trip + $parsed = $codec->parseTitle( $actual, NS_MAIN ); + $actual2 = $codec->formatTitle( $parsed->getNamespace(), $parsed->getText(), $parsed->getFragment() ); + + $this->assertEquals( $normalized, $actual2, 'normalized after round trip' ); + } + + public function provideGetText() { + return array( + array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ), + array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'Hansi Maier' ), + ); + } + + /** + * @dataProvider provideGetText + */ + public function testGetText( $namespace, $dbkey, $fragment, $lang, $expected ) { + $codec = $this->makeCodec( $lang ); + $title = new TitleValue( $namespace, $dbkey, $fragment ); + + $actual = $codec->getText( $title ); + + $this->assertEquals( $expected, $actual ); + } + + public function provideGetPrefixedText() { + return array( + array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ), + array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier' ), + + // No capitalization or normalization is applied while formatting! + array( NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ), + + // getGenderCache() provides a mock that considers first + // names ending in "a" to be female. + array( NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ), + ); + } + + /** + * @dataProvider provideGetPrefixedText + */ + public function testGetPrefixedText( $namespace, $dbkey, $fragment, $lang, $expected ) { + $codec = $this->makeCodec( $lang ); + $title = new TitleValue( $namespace, $dbkey, $fragment ); + + $actual = $codec->getPrefixedText( $title ); + + $this->assertEquals( $expected, $actual ); + } + + public function provideGetFullText() { + return array( + array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ), + array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ), + + // No capitalization or normalization is applied while formatting! + array( NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ), + ); + } + + /** + * @dataProvider provideGetFullText + */ + public function testGetFullText( $namespace, $dbkey, $fragment, $lang, $expected ) { + $codec = $this->makeCodec( $lang ); + $title = new TitleValue( $namespace, $dbkey, $fragment ); + + $actual = $codec->getFullText( $title ); + + $this->assertEquals( $expected, $actual ); + } + + public function provideParseTitle() { + //TODO: test capitalization and trimming + //TODO: test unicode normalization + + return array( + array( ' : Hansi_Maier _ ', NS_MAIN, 'en', + new TitleValue( NS_MAIN, 'Hansi_Maier', '' ) ), + array( 'User:::1', NS_MAIN, 'de', + new TitleValue( NS_USER, '0:0:0:0:0:0:0:1', '' ) ), + array( ' lisa Müller', NS_USER, 'de', + new TitleValue( NS_USER, 'Lisa_Müller', '' ) ), + array( 'benutzerin:lisa Müller#stuff', NS_MAIN, 'de', + new TitleValue( NS_USER, 'Lisa_Müller', 'stuff' ) ), + + array( ':Category:Quux', NS_MAIN, 'en', + new TitleValue( NS_CATEGORY, 'Quux', '' ) ), + array( 'Category:Quux', NS_MAIN, 'en', + new TitleValue( NS_CATEGORY, 'Quux', '' ) ), + array( 'Category:Quux', NS_CATEGORY, 'en', + new TitleValue( NS_CATEGORY, 'Quux', '' ) ), + array( 'Quux', NS_CATEGORY, 'en', + new TitleValue( NS_CATEGORY, 'Quux', '' ) ), + array( ':Quux', NS_CATEGORY, 'en', + new TitleValue( NS_MAIN, 'Quux', '' ) ), + + // getGenderCache() provides a mock that considers first + // names ending in "a" to be female. + + array( 'a b c', NS_MAIN, 'en', + new TitleValue( NS_MAIN, 'A_b_c' ) ), + array( ' a b c ', NS_MAIN, 'en', + new TitleValue( NS_MAIN, 'A_b_c' ) ), + array( ' _ Foo __ Bar_ _', NS_MAIN, 'en', + new TitleValue( NS_MAIN, 'Foo_Bar' ) ), + + //NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync. + array( 'Sandbox', NS_MAIN, 'en', ), + array( 'A "B"', NS_MAIN, 'en', ), + array( 'A \'B\'', NS_MAIN, 'en', ), + array( '.com', NS_MAIN, 'en', ), + array( '~', NS_MAIN, 'en', ), + array( '"', NS_MAIN, 'en', ), + array( '\'', NS_MAIN, 'en', ), + + array( 'Talk:Sandbox', NS_MAIN, 'en', + new TitleValue( NS_TALK, 'Sandbox' ) ), + array( 'Talk:Foo:Sandbox', NS_MAIN, 'en', + new TitleValue( NS_TALK, 'Foo:Sandbox' ) ), + array( 'File:Example.svg', NS_MAIN, 'en', + new TitleValue( NS_FILE, 'Example.svg' ) ), + array( 'File_talk:Example.svg', NS_MAIN, 'en', + new TitleValue( NS_FILE_TALK, 'Example.svg' ) ), + array( 'Foo/.../Sandbox', NS_MAIN, 'en', + 'Foo/.../Sandbox' ), + array( 'Sandbox/...', NS_MAIN, 'en', + 'Sandbox/...' ), + array( 'A~~', NS_MAIN, 'en', + 'A~~' ), + // Length is 256 total, but only title part matters + array( 'Category:' . str_repeat( 'x', 248 ), NS_MAIN, 'en', + new TitleValue( NS_CATEGORY, + 'X' . str_repeat( 'x', 247 ) ) ), + array( str_repeat( 'x', 252 ), NS_MAIN, 'en', + 'X' . str_repeat( 'x', 251 ) ) + ); + } + + /** + * @dataProvider provideParseTitle + */ + public function testParseTitle( $text, $ns, $lang, $title = null ) { + if ( $title === null ) { + $title = str_replace( ' ', '_', trim( $text ) ); + } + + if ( is_string( $title ) ) { + $title = new TitleValue( NS_MAIN, $title, '' ); + } + + $codec = $this->makeCodec( $lang ); + $actual = $codec->parseTitle( $text, $ns ); + + $this->assertEquals( $title, $actual ); + } + + public function provideParseTitle_invalid() { + //TODO: test unicode errors + + return array( + array( '#' ), + array( '::' ), + array( '::xx' ), + array( '::##' ), + array( ' :: x' ), + + array( 'Talk:File:Foo.jpg' ), + array( 'Talk:localtestiw:Foo' ), + array( 'remotetestiw:Foo' ), + array( '::1' ), // only valid in user namespace + array( 'User::x' ), // leading ":" in a user name is only valid of IPv6 addresses + + //NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync. + array( '' ), + array( ':' ), + array( '__ __' ), + array( ' __ ' ), + // Bad characters forbidden regardless of wgLegalTitleChars + array( 'A [ B' ), + array( 'A ] B' ), + array( 'A { B' ), + array( 'A } B' ), + array( 'A < B' ), + array( 'A > B' ), + array( 'A | B' ), + // URL encoding + array( 'A%20B' ), + array( 'A%23B' ), + array( 'A%2523B' ), + // XML/HTML character entity references + // Note: Commented out because they are not marked invalid by the PHP test as + // Title::newFromText runs Sanitizer::decodeCharReferencesAndNormalize first. + //array( 'A é B' ), + //array( 'A é B' ), + //array( 'A é B' ), + // Subject of NS_TALK does not roundtrip to NS_MAIN + array( 'Talk:File:Example.svg' ), + // Directory navigation + array( '.' ), + array( '..' ), + array( './Sandbox' ), + array( '../Sandbox' ), + array( 'Foo/./Sandbox' ), + array( 'Foo/../Sandbox' ), + array( 'Sandbox/.' ), + array( 'Sandbox/..' ), + // Tilde + array( 'A ~~~ Name' ), + array( 'A ~~~~ Signature' ), + array( 'A ~~~~~ Timestamp' ), + array( str_repeat( 'x', 256 ) ), + // Namespace prefix without actual title + array( 'Talk:' ), + array( 'Category: ' ), + array( 'Category: #bar' ) + ); + } + + /** + * @dataProvider provideParseTitle_invalid + */ + public function testParseTitle_invalid( $text ) { + $this->setExpectedException( 'MalformedTitleException' ); + + $codec = $this->makeCodec( 'en' ); + $codec->parseTitle( $text, NS_MAIN ); + } + + public function provideGetNamespaceName() { + return array( + array( NS_MAIN, 'Foo', 'en', '' ), + array( NS_USER, 'Foo', 'en', 'User' ), + array( NS_USER, 'Hansi Maier', 'de', 'Benutzer' ), + + // getGenderCache() provides a mock that considers first + // names ending in "a" to be female. + array( NS_USER, 'Lisa Müller', 'de', 'Benutzerin' ), + ); + } + + /** + * @dataProvider provideGetNamespaceName + * + * @param $namespace + * @param $text + * @param $lang + * @param $expected + * + * @internal param \TitleValue $title + */ + public function testGetNamespaceName( $namespace, $text, $lang, $expected ) { + $codec = $this->makeCodec( $lang ); + $name = $codec->getNamespaceName( $namespace, $text ); + + $this->assertEquals( $expected, $name ); + } +} diff --git a/tests/phpunit/includes/title/TitleValueTest.php b/tests/phpunit/includes/title/TitleValueTest.php new file mode 100644 index 00000000000..3ba008d6c4c --- /dev/null +++ b/tests/phpunit/includes/title/TitleValueTest.php @@ -0,0 +1,100 @@ +assertEquals( NS_USER, $title->getNamespace() ); + $this->assertEquals( 'TestThis', $title->getText() ); + $this->assertEquals( 'stuff', $title->getFragment() ); + } + + public function badConstructorProvider() { + return array( + array( 'foo', 'title', 'fragment' ), + array( null, 'title', 'fragment' ), + array( 2.3, 'title', 'fragment' ), + + array( NS_MAIN, 5, 'fragment' ), + array( NS_MAIN, null, 'fragment' ), + array( NS_MAIN, '', 'fragment' ), + array( NS_MAIN, 'foo bar', '' ), + array( NS_MAIN, 'bar_', '' ), + array( NS_MAIN, '_foo', '' ), + array( NS_MAIN, ' eek ', '' ), + + array( NS_MAIN, 'title', 5 ), + array( NS_MAIN, 'title', null ), + array( NS_MAIN, 'title', array() ), + ); + } + + /** + * @dataProvider badConstructorProvider + */ + public function testConstructionErrors( $ns, $text, $fragment ) { + $this->setExpectedException( 'InvalidArgumentException' ); + new TitleValue( $ns, $text, $fragment ); + } + + public function fragmentTitleProvider() { + return array( + array( new TitleValue( NS_MAIN, 'Test' ), 'foo' ), + array( new TitleValue( NS_TALK, 'Test', 'foo' ), '' ), + array( new TitleValue( NS_CATEGORY, 'Test', 'foo' ), 'bar' ), + ); + } + + /** + * @dataProvider fragmentTitleProvider + */ + public function testCreateFragmentTitle( TitleValue $title, $fragment ) { + $fragmentTitle = $title->createFragmentTitle( $fragment ); + + $this->assertEquals( $title->getNamespace(), $fragmentTitle->getNamespace() ); + $this->assertEquals( $title->getText(), $fragmentTitle->getText() ); + $this->assertEquals( $fragment, $fragmentTitle->getFragment() ); + } + + public function getTextProvider() { + return array( + array( 'Foo', 'Foo' ), + array( 'Foo_Bar', 'Foo Bar' ), + ); + } + + /** + * @dataProvider getTextProvider + */ + public function testGetText( $dbkey, $text ) { + $title = new TitleValue( NS_MAIN, $dbkey ); + + $this->assertEquals( $text, $title->getText() ); + } +}