2005-10-22 10:40:49 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
2012-07-17 05:40:40 +00:00
|
|
|
* Creates a sitemap for the site.
|
2005-11-03 00:23:07 +00:00
|
|
|
*
|
2010-09-01 19:36:18 +00:00
|
|
|
* Copyright © 2005, Ævar Arnfjörð Bjarmason, Jens Frank <jeluf@gmx.de> and
|
|
|
|
|
* Brion Vibber <brion@pobox.com>
|
|
|
|
|
*
|
2009-08-02 19:35:17 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
2010-09-01 19:36:18 +00:00
|
|
|
* @file
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Maintenance
|
2008-04-17 18:13:54 +00:00
|
|
|
* @see http://www.sitemaps.org/
|
|
|
|
|
* @see http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-10-22 10:40:49 +00:00
|
|
|
*/
|
|
|
|
|
|
2018-07-29 12:24:54 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2020-11-12 23:16:01 +00:00
|
|
|
use Wikimedia\Rdbms\IDatabase;
|
2019-06-18 18:28:37 +00:00
|
|
|
use Wikimedia\Rdbms\IResultWrapper;
|
2018-07-29 12:24:54 +00:00
|
|
|
|
2013-05-17 00:16:59 +00:00
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
2009-08-02 19:35:17 +00:00
|
|
|
|
2012-07-17 05:40:40 +00:00
|
|
|
/**
|
|
|
|
|
* Maintenance script that generates a sitemap for the site.
|
|
|
|
|
*
|
|
|
|
|
* @ingroup Maintenance
|
|
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
class GenerateSitemap extends Maintenance {
|
2020-05-09 23:22:50 +00:00
|
|
|
private const GS_MAIN = -2;
|
|
|
|
|
private const GS_TALK = -1;
|
2011-05-13 17:47:24 +00:00
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
/**
|
|
|
|
|
* The maximum amount of urls in a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $url_limit;
|
2005-11-03 11:53:21 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The maximum size of a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/faq.php#faq_sitemap_size
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $size_limit;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* The path to prepend to the filename
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $fspath;
|
2005-11-03 06:06:50 +00:00
|
|
|
|
|
|
|
|
/**
|
2014-04-23 08:53:03 +00:00
|
|
|
* The URL path to prepend to filenames in the index;
|
|
|
|
|
* should resolve to the same directory as $fspath.
|
2005-11-03 06:06:50 +00:00
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $urlpath;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* Whether or not to use compression
|
|
|
|
|
*
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $compress;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-04 14:39:37 +00:00
|
|
|
/**
|
|
|
|
|
* Whether or not to include redirection pages
|
|
|
|
|
*
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $skipRedirects;
|
2012-04-04 14:39:37 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* The number of entries to save in each sitemap file
|
|
|
|
|
*
|
2005-11-03 11:53:21 +00:00
|
|
|
* @var array
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
public $limit = [];
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Key => value entries of namespaces and their priorities
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
public $priorities = [];
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A one-dimensional array of namespaces in the wiki
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
public $namespaces = [];
|
2005-11-03 04:23:02 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* When this sitemap batch was generated
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $timestamp;
|
2005-11-03 06:06:50 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
2016-09-05 20:14:41 +00:00
|
|
|
* A database replica DB object
|
2005-11-03 04:23:02 +00:00
|
|
|
*
|
2020-11-12 23:16:01 +00:00
|
|
|
* @var IDatabase
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $dbr;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A resource pointing to the sitemap index file
|
|
|
|
|
*
|
|
|
|
|
* @var resource
|
|
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $findex;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A resource pointing to a sitemap file
|
|
|
|
|
*
|
2019-08-30 16:01:28 +00:00
|
|
|
* @var resource|false
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2012-09-14 18:57:14 +00:00
|
|
|
public $file;
|
2005-11-03 04:23:02 +00:00
|
|
|
|
2011-05-13 17:54:34 +00:00
|
|
|
/**
|
|
|
|
|
* Identifier to use in filenames, default $wgDBname
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
private $identifier;
|
|
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
2016-01-30 02:48:47 +00:00
|
|
|
$this->addDescription( 'Creates a sitemap for the site' );
|
2014-04-23 08:53:03 +00:00
|
|
|
$this->addOption(
|
|
|
|
|
'fspath',
|
|
|
|
|
'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'urlpath',
|
|
|
|
|
'The URL path corresponding to --fspath, prepended to filenames in the index; '
|
|
|
|
|
. 'defaults to an empty string',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
|
|
|
|
$this->addOption(
|
|
|
|
|
'compress',
|
|
|
|
|
'Compress the sitemap files, can take value yes|no, default yes',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
2012-04-04 14:39:37 +00:00
|
|
|
$this->addOption( 'skip-redirects', 'Do not include redirecting articles in the sitemap' );
|
2014-04-23 08:53:03 +00:00
|
|
|
$this->addOption(
|
|
|
|
|
'identifier',
|
|
|
|
|
'What site identifier to use for the wiki, defaults to $wgDBname',
|
|
|
|
|
false,
|
|
|
|
|
true
|
|
|
|
|
);
|
2009-08-02 19:35:17 +00:00
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2009-08-02 19:35:17 +00:00
|
|
|
* Execute
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
public function execute() {
|
2009-08-10 17:10:32 +00:00
|
|
|
$this->setNamespacePriorities();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->url_limit = 50000;
|
2017-10-06 19:17:52 +00:00
|
|
|
$this->size_limit = ( 2 ** 20 ) * 10;
|
2014-10-24 14:42:19 +00:00
|
|
|
|
|
|
|
|
# Create directory if needed
|
|
|
|
|
$fspath = $this->getOption( 'fspath', getcwd() );
|
|
|
|
|
if ( !wfMkdirParents( $fspath, null, __METHOD__ ) ) {
|
2017-11-20 00:36:54 +00:00
|
|
|
$this->fatalError( "Can not create directory $fspath." );
|
2014-10-24 14:42:19 +00:00
|
|
|
}
|
|
|
|
|
|
2019-07-04 08:28:18 +00:00
|
|
|
$dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
|
2014-10-24 14:42:19 +00:00
|
|
|
$this->fspath = realpath( $fspath ) . DIRECTORY_SEPARATOR;
|
2010-11-23 19:26:36 +00:00
|
|
|
$this->urlpath = $this->getOption( 'urlpath', "" );
|
2010-12-02 22:40:54 +00:00
|
|
|
if ( $this->urlpath !== "" && substr( $this->urlpath, -1 ) !== '/' ) {
|
|
|
|
|
$this->urlpath .= '/';
|
|
|
|
|
}
|
2019-07-04 08:28:18 +00:00
|
|
|
$this->identifier = $this->getOption( 'identifier', $dbDomain );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->compress = $this->getOption( 'compress', 'yes' ) !== 'no';
|
2017-07-19 19:51:30 +00:00
|
|
|
$this->skipRedirects = $this->hasOption( 'skip-redirects' );
|
2016-09-05 19:55:19 +00:00
|
|
|
$this->dbr = $this->getDB( DB_REPLICA );
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->generateNamespaces();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() );
|
2019-07-04 08:28:18 +00:00
|
|
|
$encIdentifier = rawurlencode( $this->identifier );
|
|
|
|
|
$this->findex = fopen( "{$this->fspath}sitemap-index-{$encIdentifier}.xml", 'wb' );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->main();
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2009-08-10 17:10:32 +00:00
|
|
|
private function setNamespacePriorities() {
|
2011-08-23 19:27:38 +00:00
|
|
|
global $wgSitemapNamespacesPriorities;
|
|
|
|
|
|
2009-08-10 17:10:32 +00:00
|
|
|
// Custom main namespaces
|
2011-05-13 17:47:24 +00:00
|
|
|
$this->priorities[self::GS_MAIN] = '0.5';
|
2009-08-10 17:10:32 +00:00
|
|
|
// Custom talk namesspaces
|
2011-05-13 17:47:24 +00:00
|
|
|
$this->priorities[self::GS_TALK] = '0.1';
|
2009-08-10 17:10:32 +00:00
|
|
|
// MediaWiki standard namespaces
|
|
|
|
|
$this->priorities[NS_MAIN] = '1.0';
|
|
|
|
|
$this->priorities[NS_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_USER] = '0.5';
|
|
|
|
|
$this->priorities[NS_USER_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_PROJECT] = '0.5';
|
|
|
|
|
$this->priorities[NS_PROJECT_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_FILE] = '0.5';
|
|
|
|
|
$this->priorities[NS_FILE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI] = '0.0';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_TEMPLATE] = '0.0';
|
|
|
|
|
$this->priorities[NS_TEMPLATE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_HELP] = '0.5';
|
|
|
|
|
$this->priorities[NS_HELP_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_CATEGORY] = '0.5';
|
|
|
|
|
$this->priorities[NS_CATEGORY_TALK] = '0.1';
|
2011-08-23 19:27:38 +00:00
|
|
|
|
|
|
|
|
// Custom priorities
|
|
|
|
|
if ( $wgSitemapNamespacesPriorities !== false ) {
|
2011-10-18 17:31:54 +00:00
|
|
|
/**
|
2019-02-02 13:39:58 +00:00
|
|
|
* @var array $wgSitemapNamespacesPriorities
|
2011-10-18 17:31:54 +00:00
|
|
|
*/
|
2011-08-23 19:27:38 +00:00
|
|
|
foreach ( $wgSitemapNamespacesPriorities as $namespace => $priority ) {
|
|
|
|
|
$float = floatval( $priority );
|
|
|
|
|
if ( $float > 1.0 ) {
|
|
|
|
|
$priority = '1.0';
|
|
|
|
|
} elseif ( $float < 0.0 ) {
|
|
|
|
|
$priority = '0.0';
|
|
|
|
|
}
|
|
|
|
|
$this->priorities[$namespace] = $priority;
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-08-10 17:10:32 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Generate a one-dimensional array of existing namespaces
|
|
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function generateNamespaces() {
|
2008-04-17 18:02:05 +00:00
|
|
|
// Only generate for specific namespaces if $wgSitemapNamespaces is an array.
|
|
|
|
|
global $wgSitemapNamespaces;
|
2010-05-22 16:50:39 +00:00
|
|
|
if ( is_array( $wgSitemapNamespaces ) ) {
|
2008-04-17 18:02:05 +00:00
|
|
|
$this->namespaces = $wgSitemapNamespaces;
|
2014-04-23 18:09:13 +00:00
|
|
|
|
2008-04-17 18:02:05 +00:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
$res = $this->dbr->select( 'page',
|
2016-02-17 09:09:32 +00:00
|
|
|
[ 'page_namespace' ],
|
|
|
|
|
[],
|
2009-08-02 19:35:17 +00:00
|
|
|
__METHOD__,
|
2016-02-17 09:09:32 +00:00
|
|
|
[
|
2005-11-03 00:23:07 +00:00
|
|
|
'GROUP BY' => 'page_namespace',
|
|
|
|
|
'ORDER BY' => 'page_namespace',
|
2016-02-17 09:09:32 +00:00
|
|
|
]
|
2005-11-03 00:23:07 +00:00
|
|
|
);
|
|
|
|
|
|
2013-04-18 18:48:44 +00:00
|
|
|
foreach ( $res as $row ) {
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->namespaces[] = $row->page_namespace;
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Get the priority of a given namespace
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param int $namespace The namespace to get the priority for
|
|
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function priority( $namespace ) {
|
2017-10-06 22:17:58 +00:00
|
|
|
return $this->priorities[$namespace] ?? $this->guessPriority( $namespace );
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* If the namespace isn't listed on the priority list return the
|
|
|
|
|
* default priority for the namespace, varies depending on whether it's
|
|
|
|
|
* a talkpage or not.
|
2006-01-07 13:09:30 +00:00
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param int $namespace The namespace to get the priority for
|
|
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function guessPriority( $namespace ) {
|
2018-08-05 17:58:51 +00:00
|
|
|
return MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $namespace )
|
2014-04-23 08:53:03 +00:00
|
|
|
? $this->priorities[self::GS_MAIN]
|
|
|
|
|
: $this->priorities[self::GS_TALK];
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return a database resolution of all the pages in a given namespace
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param int $namespace Limit the query to this namespace
|
2019-06-18 18:28:37 +00:00
|
|
|
* @return IResultWrapper
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function getPageRes( $namespace ) {
|
2020-08-17 17:05:44 +00:00
|
|
|
return $this->dbr->select(
|
|
|
|
|
[ 'page', 'page_props' ],
|
2016-02-17 09:09:32 +00:00
|
|
|
[
|
2005-11-03 00:23:07 +00:00
|
|
|
'page_namespace',
|
|
|
|
|
'page_title',
|
|
|
|
|
'page_touched',
|
2020-08-17 17:05:44 +00:00
|
|
|
'page_is_redirect',
|
|
|
|
|
'pp_propname',
|
2016-02-17 09:09:32 +00:00
|
|
|
],
|
|
|
|
|
[ 'page_namespace' => $namespace ],
|
2020-08-17 17:05:44 +00:00
|
|
|
__METHOD__,
|
|
|
|
|
[],
|
|
|
|
|
[
|
|
|
|
|
'page_props' => [
|
|
|
|
|
'LEFT JOIN',
|
|
|
|
|
[
|
|
|
|
|
'page_id = pp_page',
|
|
|
|
|
'pp_propname' => 'noindex'
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
]
|
2005-11-03 00:23:07 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Main loop
|
|
|
|
|
*/
|
2010-05-29 19:54:07 +00:00
|
|
|
public function main() {
|
2021-08-04 00:58:28 +00:00
|
|
|
$services = MediaWikiServices::getInstance();
|
|
|
|
|
$contLang = $services->getContentLanguage();
|
|
|
|
|
$langConverter = $services->getLanguageConverterFactory()->getLanguageConverter( $contLang );
|
2005-11-03 00:23:07 +00:00
|
|
|
|
|
|
|
|
fwrite( $this->findex, $this->openIndex() );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
foreach ( $this->namespaces as $namespace ) {
|
|
|
|
|
$res = $this->getPageRes( $namespace );
|
|
|
|
|
$this->file = false;
|
2005-11-03 09:31:13 +00:00
|
|
|
$this->generateLimit( $namespace );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = $smcount = 0;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2018-07-29 12:24:54 +00:00
|
|
|
$fns = $contLang->getFormattedNsText( $namespace );
|
2011-02-14 17:01:42 +00:00
|
|
|
$this->output( "$namespace ($fns)\n" );
|
2014-04-23 18:09:13 +00:00
|
|
|
$skippedRedirects = 0; // Number of redirects skipped for that namespace
|
2020-08-17 17:05:44 +00:00
|
|
|
$skippedNoindex = 0; // Number of pages with __NOINDEX__ switch for that NS
|
2009-08-17 21:15:31 +00:00
|
|
|
foreach ( $res as $row ) {
|
2020-08-17 17:05:44 +00:00
|
|
|
if ( $row->pp_propname === 'noindex' ) {
|
|
|
|
|
$skippedNoindex++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-27 11:23:52 +00:00
|
|
|
if ( $this->skipRedirects && $row->page_is_redirect ) {
|
2012-04-04 14:39:37 +00:00
|
|
|
$skippedRedirects++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-23 08:53:03 +00:00
|
|
|
if ( $i++ === 0
|
|
|
|
|
|| $i === $this->url_limit + 1
|
|
|
|
|
|| $length + $this->limit[1] + $this->limit[2] > $this->size_limit
|
|
|
|
|
) {
|
2005-11-03 00:23:07 +00:00
|
|
|
if ( $this->file !== false ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
$filename = $this->sitemapFilename( $namespace, $smcount++ );
|
|
|
|
|
$this->file = $this->open( $this->fspath . $filename, 'wb' );
|
|
|
|
|
$this->write( $this->file, $this->openFile() );
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->indexEntry( $filename ) );
|
2009-10-16 08:58:59 +00:00
|
|
|
$this->output( "\t$this->fspath$filename\n" );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = 1;
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
|
2005-11-03 02:22:53 +00:00
|
|
|
$date = wfTimestamp( TS_ISO_8601, $row->page_touched );
|
2011-08-19 17:33:41 +00:00
|
|
|
$entry = $this->fileEntry( $title->getCanonicalURL(), $date, $this->priority( $namespace ) );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
2006-10-12 10:34:49 +00:00
|
|
|
// generate pages for language variants
|
2021-08-04 00:58:28 +00:00
|
|
|
if ( $langConverter->hasVariants() ) {
|
|
|
|
|
$variants = $langConverter->getVariants();
|
2010-05-22 16:50:39 +00:00
|
|
|
foreach ( $variants as $vCode ) {
|
2018-07-29 12:24:54 +00:00
|
|
|
if ( $vCode == $contLang->getCode() ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
continue; // we don't want default variant
|
|
|
|
|
}
|
2014-04-23 08:53:03 +00:00
|
|
|
$entry = $this->fileEntry(
|
|
|
|
|
$title->getCanonicalURL( '', $vCode ),
|
|
|
|
|
$date,
|
|
|
|
|
$this->priority( $namespace )
|
|
|
|
|
);
|
2006-10-12 10:34:49 +00:00
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2012-04-04 14:39:37 +00:00
|
|
|
|
2020-08-17 17:05:44 +00:00
|
|
|
if ( $skippedNoindex > 0 ) {
|
|
|
|
|
$this->output( " skipped $skippedNoindex page(s) with __NOINDEX__ switch\n" );
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-27 11:23:52 +00:00
|
|
|
if ( $this->skipRedirects && $skippedRedirects > 0 ) {
|
2012-04-04 14:39:37 +00:00
|
|
|
$this->output( " skipped $skippedRedirects redirect(s)\n" );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
if ( $this->file ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-10-22 10:40:49 +00:00
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->closeIndex() );
|
|
|
|
|
fclose( $this->findex );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzopen() / fopen() wrapper
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param string $file
|
|
|
|
|
* @param string $flags
|
|
|
|
|
* @return resource
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function open( $file, $flags ) {
|
2011-11-07 15:36:06 +00:00
|
|
|
$resource = $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags );
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $resource === false ) {
|
2014-04-23 08:53:03 +00:00
|
|
|
throw new MWException( __METHOD__
|
|
|
|
|
. " error opening file $file with flags $flags. Check permissions?" );
|
2011-11-07 14:46:49 +00:00
|
|
|
}
|
2014-04-23 18:09:13 +00:00
|
|
|
|
2011-11-07 15:36:06 +00:00
|
|
|
return $resource;
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzwrite() / fwrite() wrapper
|
2014-04-17 20:48:32 +00:00
|
|
|
*
|
2017-09-09 20:47:04 +00:00
|
|
|
* @param resource &$handle
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param string $str
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function write( &$handle, $str ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $handle === true || $handle === false ) {
|
2013-09-27 23:57:41 +00:00
|
|
|
throw new MWException( __METHOD__ . " was passed a boolean as a file handle.\n" );
|
2011-11-07 14:46:49 +00:00
|
|
|
}
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $this->compress ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
gzwrite( $handle, $str );
|
2013-04-18 18:48:44 +00:00
|
|
|
} else {
|
2005-11-03 06:06:50 +00:00
|
|
|
fwrite( $handle, $str );
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gzclose() / fclose() wrapper
|
2014-04-17 20:48:32 +00:00
|
|
|
*
|
2017-09-09 20:47:04 +00:00
|
|
|
* @param resource &$handle
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function close( &$handle ) {
|
2013-04-18 18:48:44 +00:00
|
|
|
if ( $this->compress ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
gzclose( $handle );
|
2013-04-18 18:48:44 +00:00
|
|
|
} else {
|
2005-11-03 06:06:50 +00:00
|
|
|
fclose( $handle );
|
2013-04-18 18:48:44 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get a sitemap filename
|
|
|
|
|
*
|
2017-12-28 15:06:10 +00:00
|
|
|
* @param int $namespace
|
|
|
|
|
* @param int $count
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 06:06:50 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function sitemapFilename( $namespace, $count ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$ext = $this->compress ? '.gz' : '';
|
2014-04-23 18:09:13 +00:00
|
|
|
|
2011-05-13 17:54:34 +00:00
|
|
|
return "sitemap-{$this->identifier}-NS_$namespace-$count.xml$ext";
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open an XML file
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function xmlHead() {
|
2005-11-03 00:23:07 +00:00
|
|
|
return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML schema being used
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function xmlSchema() {
|
2008-04-17 18:13:54 +00:00
|
|
|
return 'http://www.sitemaps.org/schemas/sitemap/0.9';
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap index file
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function openIndex() {
|
2005-11-03 00:23:07 +00:00
|
|
|
return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap indexfile entry
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param string $filename The filename of the sitemap file
|
|
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function indexEntry( $filename ) {
|
2018-01-01 13:10:16 +00:00
|
|
|
return "\t<sitemap>\n" .
|
2018-08-29 18:02:23 +00:00
|
|
|
"\t\t<loc>" . wfGetServerUrl( PROTO_CANONICAL ) .
|
|
|
|
|
( substr( $this->urlpath, 0, 1 ) === "/" ? "" : "/" ) .
|
|
|
|
|
"{$this->urlpath}$filename</loc>\n" .
|
2006-01-07 13:09:30 +00:00
|
|
|
"\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
|
2005-11-03 00:23:07 +00:00
|
|
|
"\t</sitemap>\n";
|
|
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close a sitemap index file
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function closeIndex() {
|
2005-11-03 00:23:07 +00:00
|
|
|
return "</sitemapindex>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap file
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function openFile() {
|
2005-11-03 00:23:07 +00:00
|
|
|
return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap entry
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @param string $url An RFC 2396 compliant URL
|
|
|
|
|
* @param string $date A ISO 8601 date
|
|
|
|
|
* @param string $priority A priority indicator, 0.0 - 1.0 inclusive with a 0.1 stepsize
|
|
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function fileEntry( $url, $date, $priority ) {
|
2018-01-01 13:10:16 +00:00
|
|
|
return "\t<url>\n" .
|
2017-02-20 22:48:21 +00:00
|
|
|
// T36666: $url may contain bad characters such as ampersands.
|
2013-05-12 15:36:59 +00:00
|
|
|
"\t\t<loc>" . htmlspecialchars( $url ) . "</loc>\n" .
|
2005-11-03 00:23:07 +00:00
|
|
|
"\t\t<lastmod>$date</lastmod>\n" .
|
|
|
|
|
"\t\t<priority>$priority</priority>\n" .
|
|
|
|
|
"\t</url>\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close sitemap file
|
|
|
|
|
*
|
2014-04-17 20:48:32 +00:00
|
|
|
* @return string
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function closeFile() {
|
2005-11-03 00:23:07 +00:00
|
|
|
return "</urlset>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2005-11-03 11:53:21 +00:00
|
|
|
* Populate $this->limit
|
2014-04-17 20:48:32 +00:00
|
|
|
*
|
|
|
|
|
* @param int $namespace
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2019-10-11 19:07:32 +00:00
|
|
|
private function generateLimit( $namespace ) {
|
2017-02-20 22:48:21 +00:00
|
|
|
// T19961: make a title with the longest possible URL in this namespace
|
2017-10-07 00:26:23 +00:00
|
|
|
$title = Title::makeTitle( $namespace, str_repeat( "\u{28B81}", 63 ) . "\u{5583}" );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->limit = [
|
2005-11-03 11:53:21 +00:00
|
|
|
strlen( $this->openFile() ),
|
2014-04-23 08:53:03 +00:00
|
|
|
strlen( $this->fileEntry(
|
|
|
|
|
$title->getCanonicalURL(),
|
|
|
|
|
wfTimestamp( TS_ISO_8601, wfTimestamp() ),
|
|
|
|
|
$this->priority( $namespace )
|
|
|
|
|
) ),
|
2005-11-03 11:53:21 +00:00
|
|
|
strlen( $this->closeFile() )
|
2016-02-17 09:09:32 +00:00
|
|
|
];
|
2005-11-03 08:10:23 +00:00
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2018-01-13 00:02:09 +00:00
|
|
|
$maintClass = GenerateSitemap::class;
|
2013-05-07 23:00:15 +00:00
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|