2005-10-22 10:40:49 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
2012-07-17 05:40:40 +00:00
|
|
|
* Creates a sitemap for the site.
|
2005-11-03 00:23:07 +00:00
|
|
|
*
|
2010-09-01 19:36:18 +00:00
|
|
|
* Copyright © 2005, Ævar Arnfjörð Bjarmason, Jens Frank <jeluf@gmx.de> and
|
|
|
|
|
* Brion Vibber <brion@pobox.com>
|
|
|
|
|
*
|
2009-08-02 19:35:17 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2010-09-01 19:36:18 +00:00
|
|
|
* @file
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Maintenance
|
2008-04-17 18:13:54 +00:00
|
|
|
* @see http://www.sitemaps.org/
|
|
|
|
|
* @see http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-10-22 10:40:49 +00:00
|
|
|
*/
|
|
|
|
|
|
2013-05-17 00:16:59 +00:00
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
2009-08-02 19:35:17 +00:00
|
|
|
|
2012-07-17 05:40:40 +00:00
|
|
|
/**
|
|
|
|
|
* Maintenance script that generates a sitemap for the site.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
class GenerateSitemap extends Maintenance {
|
2011-05-13 17:47:24 +00:00
|
|
|
const GS_MAIN = -2;
|
|
|
|
|
const GS_TALK = -1;
|
|
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
/**
|
|
|
|
|
* The maximum amount of urls in a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $url_limit;
|
2005-11-03 11:53:21 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The maximum size of a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/faq.php#faq_sitemap_size
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $size_limit;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* The path to prepend to the filename
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $fspath;
|
2005-11-03 06:06:50 +00:00
|
|
|
|
|
|
|
|
/**
|
2010-11-23 19:26:36 +00:00
|
|
|
* The URL path to prepend to filenames in the index; should resolve to the same directory as $fspath
|
2005-11-03 06:06:50 +00:00
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $urlpath;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* Whether or not to use compression
|
|
|
|
|
*
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $compress;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-04 14:39:37 +00:00
|
|
|
/**
|
|
|
|
|
* Whether or not to include redirection pages
|
|
|
|
|
*
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $skipRedirects;
|
2012-04-04 14:39:37 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* The number of entries to save in each sitemap file
|
|
|
|
|
*
|
2005-11-03 11:53:21 +00:00
|
|
|
* @var array
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $limit = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Key => value entries of namespaces and their priorities
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $priorities = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A one-dimensional array of namespaces in the wiki
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $namespaces = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* When this sitemap batch was generated
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $timestamp;
|
2005-11-03 06:06:50 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A database slave object
|
|
|
|
|
*
|
|
|
|
|
* @var object
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $dbr;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A resource pointing to the sitemap index file
|
|
|
|
|
*
|
|
|
|
|
* @var resource
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $findex;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A resource pointing to a sitemap file
|
|
|
|
|
*
|
|
|
|
|
* @var resource
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $file;
|
2005-11-03 04:23:02 +00:00
|
|
|
|
2011-05-13 17:54:34 +00:00
|
|
|
/**
|
|
|
|
|
* Identifier to use in filenames, default $wgDBname
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
private $identifier;
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
2009-08-02 19:35:17 +00:00
|
|
|
* Constructor
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->mDescription = "Creates a sitemap for the site";
|
2010-11-23 19:26:36 +00:00
|
|
|
$this->addOption( 'fspath', 'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory', false, true );
|
|
|
|
|
$this->addOption( 'urlpath', 'The URL path corresponding to --fspath, prepended to filenames in the index; defaults to an empty string', false, true );
|
2009-10-16 12:30:56 +00:00
|
|
|
$this->addOption( 'compress', 'Compress the sitemap files, can take value yes|no, default yes', false, true );
|
2012-04-04 14:39:37 +00:00
|
|
|
$this->addOption( 'skip-redirects', 'Do not include redirecting articles in the sitemap' );
|
2011-05-13 17:54:34 +00:00
|
|
|
$this->addOption( 'identifier', 'What site identifier to use for the wiki, defaults to $wgDBname', false, true );
|
2009-08-02 19:35:17 +00:00
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2009-08-02 19:35:17 +00:00
|
|
|
* Execute
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
public function execute() {
|
2009-08-10 17:10:32 +00:00
|
|
|
$this->setNamespacePriorities();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->url_limit = 50000;
|
|
|
|
|
$this->size_limit = pow( 2, 20 ) * 10;
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->fspath = self::init_path( $this->getOption( 'fspath', getcwd() ) );
|
2010-11-23 19:26:36 +00:00
|
|
|
$this->urlpath = $this->getOption( 'urlpath', "" );
|
2010-12-02 22:40:54 +00:00
|
|
|
if ( $this->urlpath !== "" && substr( $this->urlpath, -1 ) !== '/' ) {
|
|
|
|
|
$this->urlpath .= '/';
|
|
|
|
|
}
|
2011-05-13 17:54:34 +00:00
|
|
|
$this->identifier = $this->getOption( 'identifier', wfWikiID() );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->compress = $this->getOption( 'compress', 'yes' ) !== 'no';
|
2013-04-18 18:48:44 +00:00
|
|
|
$this->skipRedirects = $this->getOption( 'skip-redirects', false ) !== false;
|
2007-01-22 23:50:42 +00:00
|
|
|
$this->dbr = wfGetDB( DB_SLAVE );
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->generateNamespaces();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() );
|
2011-05-13 17:54:34 +00:00
|
|
|
$this->findex = fopen( "{$this->fspath}sitemap-index-{$this->identifier}.xml", 'wb' );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->main();
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2009-08-10 17:10:32 +00:00
|
|
|
private function setNamespacePriorities() {
|
2011-08-23 19:27:38 +00:00
|
|
|
global $wgSitemapNamespacesPriorities;
|
|
|
|
|
|
2009-08-10 17:10:32 +00:00
|
|
|
// Custom main namespaces
|
2011-05-13 17:47:24 +00:00
|
|
|
$this->priorities[self::GS_MAIN] = '0.5';
|
2009-08-10 17:10:32 +00:00
|
|
|
// Custom talk namesspaces
|
2011-05-13 17:47:24 +00:00
|
|
|
$this->priorities[self::GS_TALK] = '0.1';
|
2009-08-10 17:10:32 +00:00
|
|
|
// MediaWiki standard namespaces
|
|
|
|
|
$this->priorities[NS_MAIN] = '1.0';
|
|
|
|
|
$this->priorities[NS_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_USER] = '0.5';
|
|
|
|
|
$this->priorities[NS_USER_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_PROJECT] = '0.5';
|
|
|
|
|
$this->priorities[NS_PROJECT_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_FILE] = '0.5';
|
|
|
|
|
$this->priorities[NS_FILE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI] = '0.0';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_TEMPLATE] = '0.0';
|
|
|
|
|
$this->priorities[NS_TEMPLATE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_HELP] = '0.5';
|
|
|
|
|
$this->priorities[NS_HELP_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_CATEGORY] = '0.5';
|
|
|
|
|
$this->priorities[NS_CATEGORY_TALK] = '0.1';
|
2011-08-23 19:27:38 +00:00
|
|
|
|
|
|
|
|
// Custom priorities
|
|
|
|
|
if ( $wgSitemapNamespacesPriorities !== false ) {
|
2011-10-18 17:31:54 +00:00
|
|
|
/**
|
|
|
|
|
* @var $wgSitemapNamespacesPriorities array
|
|
|
|
|
*/
|
2011-08-23 19:27:38 +00:00
|
|
|
foreach ( $wgSitemapNamespacesPriorities as $namespace => $priority ) {
|
|
|
|
|
$float = floatval( $priority );
|
|
|
|
|
if ( $float > 1.0 ) {
|
|
|
|
|
$priority = '1.0';
|
|
|
|
|
} elseif ( $float < 0.0 ) {
|
|
|
|
|
$priority = '0.0';
|
|
|
|
|
}
|
|
|
|
|
$this->priorities[$namespace] = $priority;
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-08-10 17:10:32 +00:00
|
|
|
}
|
|
|
|
|
|
2008-07-19 12:53:59 +00:00
|
|
|
/**
|
|
|
|
|
* Create directory if it does not exist and return pathname with a trailing slash
|
2011-10-18 17:31:54 +00:00
|
|
|
* @param $fspath string
|
|
|
|
|
* @return null|string
|
2008-07-19 12:53:59 +00:00
|
|
|
*/
|
|
|
|
|
private static function init_path( $fspath ) {
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( !isset( $fspath ) ) {
|
2008-07-19 12:53:59 +00:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
# Create directory if needed
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( $fspath && !is_dir( $fspath ) ) {
|
2011-07-25 22:01:19 +00:00
|
|
|
wfMkdirParents( $fspath, null, __METHOD__ ) or die( "Can not create directory $fspath.\n" );
|
2008-07-19 12:53:59 +00:00
|
|
|
}
|
|
|
|
|
|
2013-04-18 18:48:44 +00:00
|
|
|
return realpath( $fspath ) . DIRECTORY_SEPARATOR;
|
2008-07-19 12:53:59 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Generate a one-dimensional array of existing namespaces
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function generateNamespaces() {
|
2008-04-17 18:02:05 +00:00
|
|
|
// Only generate for specific namespaces if $wgSitemapNamespaces is an array.
|
|
|
|
|
global $wgSitemapNamespaces;
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( is_array( $wgSitemapNamespaces ) ) {
|
2008-04-17 18:02:05 +00:00
|
|
|
$this->namespaces = $wgSitemapNamespaces;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
$res = $this->dbr->select( 'page',
|
|
|
|
|
array( 'page_namespace' ),
|
|
|
|
|
array(),
|
2009-08-02 19:35:17 +00:00
|
|
|
__METHOD__,
|
2005-11-03 00:23:07 +00:00
|
|
|
array(
|
|
|
|
|
'GROUP BY' => 'page_namespace',
|
|
|
|
|
'ORDER BY' => 'page_namespace',
|
|
|
|
|
)
|
|
|
|
|
);
|
|
|
|
|
|
2013-04-18 18:48:44 +00:00
|
|
|
foreach ( $res as $row ) {
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->namespaces[] = $row->page_namespace;
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Get the priority of a given namespace
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $namespace Integer: the namespace to get the priority for
|
|
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function priority( $namespace ) {
|
|
|
|
|
return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* If the namespace isn't listed on the priority list return the
|
|
|
|
|
* default priority for the namespace, varies depending on whether it's
|
|
|
|
|
* a talkpage or not.
|
2006-01-07 13:09:30 +00:00
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $namespace Integer: the namespace to get the priority for
|
|
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function guessPriority( $namespace ) {
|
2012-02-09 21:08:06 +00:00
|
|
|
return MWNamespace::isSubject( $namespace ) ? $this->priorities[self::GS_MAIN] : $this->priorities[self::GS_TALK];
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return a database resolution of all the pages in a given namespace
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $namespace Integer: limit the query to this namespace
|
|
|
|
|
* @return Resource
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function getPageRes( $namespace ) {
|
|
|
|
|
return $this->dbr->select( 'page',
|
2006-01-07 13:09:30 +00:00
|
|
|
array(
|
2005-11-03 00:23:07 +00:00
|
|
|
'page_namespace',
|
|
|
|
|
'page_title',
|
|
|
|
|
'page_touched',
|
2012-04-04 14:39:37 +00:00
|
|
|
'page_is_redirect'
|
2005-11-03 00:23:07 +00:00
|
|
|
),
|
|
|
|
|
array( 'page_namespace' => $namespace ),
|
2009-08-02 19:35:17 +00:00
|
|
|
__METHOD__
|
2005-11-03 00:23:07 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Main loop
|
|
|
|
|
*/
|
2010-05-29 19:54:07 +00:00
|
|
|
public function main() {
|
2006-10-04 09:06:18 +00:00
|
|
|
global $wgContLang;
|
2005-11-03 00:23:07 +00:00
|
|
|
|
|
|
|
|
fwrite( $this->findex, $this->openIndex() );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
foreach ( $this->namespaces as $namespace ) {
|
|
|
|
|
$res = $this->getPageRes( $namespace );
|
|
|
|
|
$this->file = false;
|
2005-11-03 09:31:13 +00:00
|
|
|
$this->generateLimit( $namespace );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = $smcount = 0;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
$fns = $wgContLang->getFormattedNsText( $namespace );
|
2011-02-14 17:01:42 +00:00
|
|
|
$this->output( "$namespace ($fns)\n" );
|
2012-04-04 14:39:37 +00:00
|
|
|
$skippedRedirects = 0; // Number of redirects skipped for that namespace
|
2009-08-17 21:15:31 +00:00
|
|
|
foreach ( $res as $row ) {
|
2013-04-27 11:23:52 +00:00
|
|
|
if ( $this->skipRedirects && $row->page_is_redirect ) {
|
2012-04-04 14:39:37 +00:00
|
|
|
$skippedRedirects++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) {
|
2005-11-03 00:23:07 +00:00
|
|
|
if ( $this->file !== false ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
$filename = $this->sitemapFilename( $namespace, $smcount++ );
|
|
|
|
|
$this->file = $this->open( $this->fspath . $filename, 'wb' );
|
|
|
|
|
$this->write( $this->file, $this->openFile() );
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->indexEntry( $filename ) );
|
2009-10-16 08:58:59 +00:00
|
|
|
$this->output( "\t$this->fspath$filename\n" );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = 1;
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
|
2005-11-03 02:22:53 +00:00
|
|
|
$date = wfTimestamp( TS_ISO_8601, $row->page_touched );
|
2011-08-19 17:33:41 +00:00
|
|
|
$entry = $this->fileEntry( $title->getCanonicalURL(), $date, $this->priority( $namespace ) );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
2006-10-12 10:34:49 +00:00
|
|
|
// generate pages for language variants
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( $wgContLang->hasVariants() ) {
|
2006-10-12 10:34:49 +00:00
|
|
|
$variants = $wgContLang->getVariants();
|
2010-05-22 16:50:39 +00:00
|
|
|
foreach ( $variants as $vCode ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $vCode == $wgContLang->getCode() ) {
|
|
|
|
|
continue; // we don't want default variant
|
|
|
|
|
}
|
2011-08-19 17:33:41 +00:00
|
|
|
$entry = $this->fileEntry( $title->getCanonicalURL( '', $vCode ), $date, $this->priority( $namespace ) );
|
2006-10-12 10:34:49 +00:00
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2012-04-04 14:39:37 +00:00
|
|
|
|
2013-04-27 11:23:52 +00:00
|
|
|
if ( $this->skipRedirects && $skippedRedirects > 0 ) {
|
2012-04-04 14:39:37 +00:00
|
|
|
$this->output( " skipped $skippedRedirects redirect(s)\n" );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
if ( $this->file ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-10-22 10:40:49 +00:00
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->closeIndex() );
|
|
|
|
|
fclose( $this->findex );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzopen() / fopen() wrapper
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return Resource
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
|
|
|
|
function open( $file, $flags ) {
|
2011-11-07 15:36:06 +00:00
|
|
|
$resource = $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags );
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $resource === false ) {
|
2011-11-07 14:46:49 +00:00
|
|
|
wfDebugDieBacktrace( __METHOD__ . " error opening file $file with flags $flags. Check permissions?" );
|
|
|
|
|
}
|
2011-11-07 15:36:06 +00:00
|
|
|
return $resource;
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzwrite() / fwrite() wrapper
|
|
|
|
|
*/
|
|
|
|
|
function write( &$handle, $str ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $handle === true || $handle === false ) {
|
2011-11-07 14:46:49 +00:00
|
|
|
wfDebugDieBacktrace( __METHOD__ . " was passed a boolean as a file handle.\n" );
|
|
|
|
|
}
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $this->compress ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
gzwrite( $handle, $str );
|
2013-04-18 18:48:44 +00:00
|
|
|
} else {
|
2005-11-03 06:06:50 +00:00
|
|
|
fwrite( $handle, $str );
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gzclose() / fclose() wrapper
|
|
|
|
|
*/
|
|
|
|
|
function close( &$handle ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $this->compress ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
gzclose( $handle );
|
2013-04-18 18:48:44 +00:00
|
|
|
} else {
|
2005-11-03 06:06:50 +00:00
|
|
|
fclose( $handle );
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get a sitemap filename
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $namespace Integer: the namespace
|
|
|
|
|
* @param $count Integer: the count
|
|
|
|
|
* @return String
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
|
|
|
|
function sitemapFilename( $namespace, $count ) {
|
|
|
|
|
$ext = $this->compress ? '.gz' : '';
|
2011-05-13 17:54:34 +00:00
|
|
|
return "sitemap-{$this->identifier}-NS_$namespace-$count.xml$ext";
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open an XML file
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function xmlHead() {
|
|
|
|
|
return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML schema being used
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function xmlSchema() {
|
2008-04-17 18:13:54 +00:00
|
|
|
return 'http://www.sitemaps.org/schemas/sitemap/0.9';
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap index file
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function openIndex() {
|
|
|
|
|
return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap indexfile entry
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $filename String: the filename of the sitemap file
|
|
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function indexEntry( $filename ) {
|
|
|
|
|
return
|
|
|
|
|
"\t<sitemap>\n" .
|
2010-11-23 19:26:36 +00:00
|
|
|
"\t\t<loc>{$this->urlpath}$filename</loc>\n" .
|
2006-01-07 13:09:30 +00:00
|
|
|
"\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
|
2005-11-03 00:23:07 +00:00
|
|
|
"\t</sitemap>\n";
|
|
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close a sitemap index file
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function closeIndex() {
|
|
|
|
|
return "</sitemapindex>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap file
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function openFile() {
|
|
|
|
|
return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap entry
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @param $url String: an RFC 2396 compliant URL
|
|
|
|
|
* @param $date String: a ISO 8601 date
|
|
|
|
|
* @param $priority String: a priority indicator, 0.0 - 1.0 inclusive with a 0.1 stepsize
|
|
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function fileEntry( $url, $date, $priority ) {
|
|
|
|
|
return
|
|
|
|
|
"\t<url>\n" .
|
2013-05-12 15:36:59 +00:00
|
|
|
// bug 34666: $url may contain bad characters such as ampersands.
|
|
|
|
|
"\t\t<loc>" . htmlspecialchars( $url ) . "</loc>\n" .
|
2005-11-03 00:23:07 +00:00
|
|
|
"\t\t<lastmod>$date</lastmod>\n" .
|
|
|
|
|
"\t\t<priority>$priority</priority>\n" .
|
|
|
|
|
"\t</url>\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close sitemap file
|
|
|
|
|
*
|
2010-05-29 19:54:07 +00:00
|
|
|
* @return String
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function closeFile() {
|
|
|
|
|
return "</urlset>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2005-11-03 11:53:21 +00:00
|
|
|
* Populate $this->limit
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
|
|
|
|
function generateLimit( $namespace ) {
|
2011-01-06 22:45:59 +00:00
|
|
|
// bug 17961: make a title with the longest possible URL in this namespace
|
2005-11-03 11:53:21 +00:00
|
|
|
$title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->limit = array(
|
|
|
|
|
strlen( $this->openFile() ),
|
2011-08-19 17:33:41 +00:00
|
|
|
strlen( $this->fileEntry( $title->getCanonicalURL(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ),
|
2005-11-03 11:53:21 +00:00
|
|
|
strlen( $this->closeFile() )
|
|
|
|
|
);
|
2005-11-03 08:10:23 +00:00
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
$maintClass = "GenerateSitemap";
|
2013-05-07 23:00:15 +00:00
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|