2005-10-22 10:40:49 +00:00
|
|
|
<?php
|
2005-11-03 06:06:50 +00:00
|
|
|
define( 'GS_MAIN', -2 );
|
|
|
|
|
define( 'GS_TALK', -1 );
|
2005-10-22 10:40:49 +00:00
|
|
|
/**
|
2008-04-17 18:13:54 +00:00
|
|
|
* Creates a sitemap for the site
|
2005-11-03 00:23:07 +00:00
|
|
|
*
|
2009-08-02 19:35:17 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Maintenance
|
2005-10-22 10:40:49 +00:00
|
|
|
*
|
2005-11-03 00:23:07 +00:00
|
|
|
* @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
|
|
|
|
|
* @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de>
|
|
|
|
|
* @copyright Copyright © 2005, Brion Vibber <brion@pobox.com>
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @see http://www.sitemaps.org/
|
|
|
|
|
* @see http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-11-03 00:23:07 +00:00
|
|
|
*
|
|
|
|
|
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
2005-10-22 10:40:49 +00:00
|
|
|
*/
|
|
|
|
|
|
2009-08-03 21:56:41 +00:00
|
|
|
require_once( dirname(__FILE__) . '/Maintenance.php' );
|
2009-08-02 19:35:17 +00:00
|
|
|
|
|
|
|
|
class GenerateSitemap extends Maintenance {
|
2005-11-03 11:53:21 +00:00
|
|
|
/**
|
|
|
|
|
* The maximum amount of urls in a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
|
|
|
|
var $url_limit;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The maximum size of a sitemap file
|
|
|
|
|
*
|
2008-04-17 18:13:54 +00:00
|
|
|
* @link http://www.sitemaps.org/faq.php#faq_sitemap_size
|
2005-11-03 11:53:21 +00:00
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
|
|
|
|
var $size_limit;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* The path to prepend to the filename
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
var $fspath;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The path to append to the domain name
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
var $path;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* Whether or not to use compression
|
|
|
|
|
*
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
|
|
|
|
var $compress;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* The number of entries to save in each sitemap file
|
|
|
|
|
*
|
2005-11-03 11:53:21 +00:00
|
|
|
* @var array
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2005-11-03 11:53:21 +00:00
|
|
|
var $limit = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Key => value entries of namespaces and their priorities
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2009-08-10 17:10:32 +00:00
|
|
|
var $priorities = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A one-dimensional array of namespaces in the wiki
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
var $namespaces = array();
|
2005-11-03 04:23:02 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* When this sitemap batch was generated
|
|
|
|
|
*
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
2005-11-03 08:10:23 +00:00
|
|
|
var $timestamp;
|
2005-11-03 06:06:50 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A database slave object
|
|
|
|
|
*
|
|
|
|
|
* @var object
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
var $dbr;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* A resource pointing to the sitemap index file
|
|
|
|
|
*
|
|
|
|
|
* @var resource
|
|
|
|
|
*/
|
|
|
|
|
var $findex;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A resource pointing to a sitemap file
|
|
|
|
|
*
|
|
|
|
|
* @var resource
|
|
|
|
|
*/
|
|
|
|
|
var $file;
|
|
|
|
|
|
|
|
|
|
/**
|
2009-08-02 19:35:17 +00:00
|
|
|
* Constructor
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
public function __construct() {
|
|
|
|
|
parent::__construct();
|
|
|
|
|
$this->mDescription = "Creates a sitemap for the site";
|
|
|
|
|
$this->addOption( 'fspath', 'The file system path to save to, e.g. /tmp/sitemap' .
|
|
|
|
|
"\n\t\tdefaults to current directory", false, true );
|
|
|
|
|
$this->addOption( 'server', "The protocol and server name to use in URLs, e.g.\n" .
|
|
|
|
|
"\t\thttp://en.wikipedia.org. This is sometimes necessary because\n" .
|
|
|
|
|
"\t\tserver name detection may fail in command line scripts.", false, true );
|
|
|
|
|
$this->addOption( 'compress', 'Compress the sitemap files, can take value yes|no, default yes' );
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2009-08-02 19:35:17 +00:00
|
|
|
* Execute
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
2009-08-02 19:35:17 +00:00
|
|
|
public function execute() {
|
2006-10-04 09:06:18 +00:00
|
|
|
global $wgScriptPath;
|
2009-08-10 17:10:32 +00:00
|
|
|
$this->setNamespacePriorities();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->url_limit = 50000;
|
|
|
|
|
$this->size_limit = pow( 2, 20 ) * 10;
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->fspath = self::init_path( $this->getOption( 'fspath', getcwd() ) );
|
|
|
|
|
$this->compress = $this->getOption( 'compress', 'yes' ) !== 'no';
|
2007-01-22 23:50:42 +00:00
|
|
|
$this->dbr = wfGetDB( DB_SLAVE );
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->generateNamespaces();
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() );
|
2006-10-04 09:06:18 +00:00
|
|
|
$this->findex = fopen( "{$this->fspath}sitemap-index-" . wfWikiID() . ".xml", 'wb' );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->main();
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2009-08-10 17:10:32 +00:00
|
|
|
private function setNamespacePriorities() {
|
|
|
|
|
// Custom main namespaces
|
|
|
|
|
$this->priorities[GS_MAIN] = '0.5';
|
|
|
|
|
// Custom talk namesspaces
|
|
|
|
|
$this->priorities[GS_TALK] = '0.1';
|
|
|
|
|
// MediaWiki standard namespaces
|
|
|
|
|
$this->priorities[NS_MAIN] = '1.0';
|
|
|
|
|
$this->priorities[NS_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_USER] = '0.5';
|
|
|
|
|
$this->priorities[NS_USER_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_PROJECT] = '0.5';
|
|
|
|
|
$this->priorities[NS_PROJECT_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_FILE] = '0.5';
|
|
|
|
|
$this->priorities[NS_FILE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI] = '0.0';
|
|
|
|
|
$this->priorities[NS_MEDIAWIKI_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_TEMPLATE] = '0.0';
|
|
|
|
|
$this->priorities[NS_TEMPLATE_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_HELP] = '0.5';
|
|
|
|
|
$this->priorities[NS_HELP_TALK] = '0.1';
|
|
|
|
|
$this->priorities[NS_CATEGORY] = '0.5';
|
|
|
|
|
$this->priorities[NS_CATEGORY_TALK] = '0.1';
|
|
|
|
|
}
|
|
|
|
|
|
2008-07-19 12:53:59 +00:00
|
|
|
/**
|
|
|
|
|
* Create directory if it does not exist and return pathname with a trailing slash
|
|
|
|
|
*/
|
|
|
|
|
private static function init_path( $fspath ) {
|
|
|
|
|
if( !isset( $fspath ) ) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
# Create directory if needed
|
|
|
|
|
if( $fspath && !is_dir( $fspath ) ) {
|
2009-08-02 19:35:17 +00:00
|
|
|
wfMkdirParents( $fspath ) or die("Can not create directory $fspath.\n");
|
2008-07-19 12:53:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return realpath( $fspath ). DIRECTORY_SEPARATOR ;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Generate a one-dimensional array of existing namespaces
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function generateNamespaces() {
|
2008-04-17 18:02:05 +00:00
|
|
|
// Only generate for specific namespaces if $wgSitemapNamespaces is an array.
|
|
|
|
|
global $wgSitemapNamespaces;
|
|
|
|
|
if( is_array( $wgSitemapNamespaces ) ) {
|
|
|
|
|
$this->namespaces = $wgSitemapNamespaces;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
$res = $this->dbr->select( 'page',
|
|
|
|
|
array( 'page_namespace' ),
|
|
|
|
|
array(),
|
2009-08-02 19:35:17 +00:00
|
|
|
__METHOD__,
|
2005-11-03 00:23:07 +00:00
|
|
|
array(
|
|
|
|
|
'GROUP BY' => 'page_namespace',
|
|
|
|
|
'ORDER BY' => 'page_namespace',
|
|
|
|
|
)
|
|
|
|
|
);
|
|
|
|
|
|
2009-08-17 21:15:31 +00:00
|
|
|
foreach ( $res as $row )
|
2005-11-03 00:23:07 +00:00
|
|
|
$this->namespaces[] = $row->page_namespace;
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Get the priority of a given namespace
|
|
|
|
|
*
|
|
|
|
|
* @param int $namespace The namespace to get the priority for
|
|
|
|
|
+
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
function priority( $namespace ) {
|
|
|
|
|
return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* If the namespace isn't listed on the priority list return the
|
|
|
|
|
* default priority for the namespace, varies depending on whether it's
|
|
|
|
|
* a talkpage or not.
|
2006-01-07 13:09:30 +00:00
|
|
|
*
|
2005-11-03 04:23:02 +00:00
|
|
|
* @param int $namespace The namespace to get the priority for
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function guessPriority( $namespace ) {
|
2008-03-21 23:13:34 +00:00
|
|
|
return MWNamespace::isMain( $namespace ) ? $this->priorities[GS_MAIN] : $this->priorities[GS_TALK];
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return a database resolution of all the pages in a given namespace
|
|
|
|
|
*
|
|
|
|
|
* @param int $namespace Limit the query to this namespace
|
|
|
|
|
*
|
|
|
|
|
* @return resource
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function getPageRes( $namespace ) {
|
|
|
|
|
return $this->dbr->select( 'page',
|
2006-01-07 13:09:30 +00:00
|
|
|
array(
|
2005-11-03 00:23:07 +00:00
|
|
|
'page_namespace',
|
|
|
|
|
'page_title',
|
|
|
|
|
'page_touched',
|
|
|
|
|
),
|
|
|
|
|
array( 'page_namespace' => $namespace ),
|
2009-08-02 19:35:17 +00:00
|
|
|
__METHOD__
|
2005-11-03 00:23:07 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Main loop
|
|
|
|
|
*
|
|
|
|
|
* @access public
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function main() {
|
2006-10-04 09:06:18 +00:00
|
|
|
global $wgContLang;
|
2005-11-03 00:23:07 +00:00
|
|
|
|
|
|
|
|
fwrite( $this->findex, $this->openIndex() );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 00:23:07 +00:00
|
|
|
foreach ( $this->namespaces as $namespace ) {
|
|
|
|
|
$res = $this->getPageRes( $namespace );
|
|
|
|
|
$this->file = false;
|
2005-11-03 09:31:13 +00:00
|
|
|
$this->generateLimit( $namespace );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = $smcount = 0;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
$fns = $wgContLang->getFormattedNsText( $namespace );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "$namespace ($fns)" );
|
2009-08-17 21:15:31 +00:00
|
|
|
foreach ( $res as $row ) {
|
2005-11-03 11:53:21 +00:00
|
|
|
if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) {
|
2005-11-03 00:23:07 +00:00
|
|
|
if ( $this->file !== false ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-11-03 06:06:50 +00:00
|
|
|
$filename = $this->sitemapFilename( $namespace, $smcount++ );
|
|
|
|
|
$this->file = $this->open( $this->fspath . $filename, 'wb' );
|
|
|
|
|
$this->write( $this->file, $this->openFile() );
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->indexEntry( $filename ) );
|
2009-08-02 19:35:17 +00:00
|
|
|
$this->output( "\t$this->fspath$filename" );
|
2005-11-03 11:53:21 +00:00
|
|
|
$length = $this->limit[0];
|
|
|
|
|
$i = 1;
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
|
2005-11-03 02:22:53 +00:00
|
|
|
$date = wfTimestamp( TS_ISO_8601, $row->page_touched );
|
2005-11-03 11:53:21 +00:00
|
|
|
$entry = $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) );
|
|
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
2006-10-12 10:34:49 +00:00
|
|
|
// generate pages for language variants
|
|
|
|
|
if($wgContLang->hasVariants()){
|
|
|
|
|
$variants = $wgContLang->getVariants();
|
|
|
|
|
foreach($variants as $vCode){
|
|
|
|
|
if($vCode==$wgContLang->getCode()) continue; // we don't want default variant
|
|
|
|
|
$entry = $this->fileEntry( $title->getFullURL('',$vCode), $date, $this->priority( $namespace ) );
|
|
|
|
|
$length += strlen( $entry );
|
|
|
|
|
$this->write( $this->file, $entry );
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
if ( $this->file ) {
|
2005-11-03 06:06:50 +00:00
|
|
|
$this->write( $this->file, $this->closeFile() );
|
|
|
|
|
$this->close( $this->file );
|
2005-10-22 10:40:49 +00:00
|
|
|
}
|
|
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
fwrite( $this->findex, $this->closeIndex() );
|
|
|
|
|
fclose( $this->findex );
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzopen() / fopen() wrapper
|
|
|
|
|
*
|
|
|
|
|
* @return resource
|
|
|
|
|
*/
|
|
|
|
|
function open( $file, $flags ) {
|
|
|
|
|
return $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 06:06:50 +00:00
|
|
|
/**
|
|
|
|
|
* gzwrite() / fwrite() wrapper
|
|
|
|
|
*/
|
|
|
|
|
function write( &$handle, $str ) {
|
|
|
|
|
if ( $this->compress )
|
|
|
|
|
gzwrite( $handle, $str );
|
2006-01-07 13:09:30 +00:00
|
|
|
else
|
2005-11-03 06:06:50 +00:00
|
|
|
fwrite( $handle, $str );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gzclose() / fclose() wrapper
|
|
|
|
|
*/
|
|
|
|
|
function close( &$handle ) {
|
|
|
|
|
if ( $this->compress )
|
|
|
|
|
gzclose( $handle );
|
|
|
|
|
else
|
|
|
|
|
fclose( $handle );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get a sitemap filename
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
|
|
|
|
* @param int $namespace The namespace
|
|
|
|
|
* @param int $count The count
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
function sitemapFilename( $namespace, $count ) {
|
|
|
|
|
$ext = $this->compress ? '.gz' : '';
|
2006-10-04 09:06:18 +00:00
|
|
|
return "sitemap-".wfWikiID()."-NS_$namespace-$count.xml$ext";
|
2005-11-03 06:06:50 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open an XML file
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function xmlHead() {
|
|
|
|
|
return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML schema being used
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
|
|
|
|
* @returns string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function xmlSchema() {
|
2008-04-17 18:13:54 +00:00
|
|
|
return 'http://www.sitemaps.org/schemas/sitemap/0.9';
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap index file
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function openIndex() {
|
|
|
|
|
return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap indexfile entry
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
|
|
|
|
* @param string $filename The filename of the sitemap file
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function indexEntry( $filename ) {
|
|
|
|
|
return
|
|
|
|
|
"\t<sitemap>\n" .
|
2005-11-04 22:55:51 +00:00
|
|
|
"\t\t<loc>$filename</loc>\n" .
|
2006-01-07 13:09:30 +00:00
|
|
|
"\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
|
2005-11-03 00:23:07 +00:00
|
|
|
"\t</sitemap>\n";
|
|
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close a sitemap index file
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function closeIndex() {
|
|
|
|
|
return "</sitemapindex>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML required to open a sitemap file
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function openFile() {
|
|
|
|
|
return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the XML for a single sitemap entry
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
*
|
2009-01-01 01:25:24 +00:00
|
|
|
* @param string $url An RFC 2396 compliant URL
|
2005-11-03 04:23:02 +00:00
|
|
|
* @param string $date A ISO 8601 date
|
|
|
|
|
* @param string $priority A priority indicator, 0.0 - 1.0 inclusive with a 0.1 stepsize
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function fileEntry( $url, $date, $priority ) {
|
|
|
|
|
return
|
|
|
|
|
"\t<url>\n" .
|
|
|
|
|
"\t\t<loc>$url</loc>\n" .
|
|
|
|
|
"\t\t<lastmod>$date</lastmod>\n" .
|
|
|
|
|
"\t\t<priority>$priority</priority>\n" .
|
|
|
|
|
"\t</url>\n";
|
|
|
|
|
}
|
|
|
|
|
|
2005-11-03 04:23:02 +00:00
|
|
|
/**
|
|
|
|
|
* Return the XML required to close sitemap file
|
|
|
|
|
*
|
|
|
|
|
* @static
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-11-03 00:23:07 +00:00
|
|
|
function closeFile() {
|
|
|
|
|
return "</urlset>\n";
|
|
|
|
|
}
|
2005-11-03 04:23:02 +00:00
|
|
|
|
|
|
|
|
/**
|
2005-11-03 11:53:21 +00:00
|
|
|
* Populate $this->limit
|
2005-11-03 04:23:02 +00:00
|
|
|
*/
|
|
|
|
|
function generateLimit( $namespace ) {
|
2005-11-03 11:53:21 +00:00
|
|
|
$title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-11-03 11:53:21 +00:00
|
|
|
$this->limit = array(
|
|
|
|
|
strlen( $this->openFile() ),
|
|
|
|
|
strlen( $this->fileEntry( $title->getFullUrl(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ),
|
|
|
|
|
strlen( $this->closeFile() )
|
|
|
|
|
);
|
2005-11-03 08:10:23 +00:00
|
|
|
}
|
2005-11-03 00:23:07 +00:00
|
|
|
}
|
2005-10-22 10:40:49 +00:00
|
|
|
|
2009-08-02 19:35:17 +00:00
|
|
|
$maintClass = "GenerateSitemap";
|
|
|
|
|
require_once( DO_MAINTENANCE );
|