wiki.techinc.nl/maintenance/dumpHTML.php
Ævar Arnfjörð Bjarmason a26d5a49d7 * s~\t+$~~
2006-01-07 13:31:29 +00:00

126 lines
2.9 KiB
PHP

<?php
/**
* @todo document
* @package MediaWiki
* @subpackage Maintenance
*/
/**
* Usage:
* php dumpHTML.php [options...]
*
* -d <dest> destination directory
* -s <start> start ID
* -e <end> end ID
* --images only do image description pages
* --categories only do category pages
* --redirects only do redirects
* --special only do miscellaneous stuff
* --force-copy copy commons instead of symlink, needed for Wikimedia
* --interlang allow interlanguage links
*/
$optionsWithArgs = array( 's', 'd', 'e' );
$profiling = false;
if ( $profiling ) {
define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' );
function wfSetupDump() {
global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate;
$wgProfiling = true;
$wgProfileToDatabase = false;
$wgProfileSampleRate = 1;
}
}
require_once( "commandLine.inc" );
require_once( "dumpHTML.inc" );
error_reporting( E_ALL & (~E_NOTICE) );
define( 'CHUNK_SIZE', 50 );
if ( !empty( $options['s'] ) ) {
$start = $options['s'];
} else {
$start = 1;
}
if ( !empty( $options['e'] ) ) {
$end = $options['e'];
} else {
$dbr =& wfGetDB( DB_SLAVE );
$end = $dbr->selectField( 'page', 'max(page_id)', false );
}
if ( !empty( $options['d'] ) ) {
$dest = $options['d'];
} else {
$dest = 'static';
}
$wgHTMLDump = new DumpHTML( array(
'dest' => $dest,
'forceCopy' => $options['force-copy'],
'alternateScriptPath' => $options['interlang'],
'interwiki' => $options['interlang'],
));
if ( $options['special'] ) {
$wgHTMLDump->doSpecials();
} elseif ( $options['images'] ) {
$wgHTMLDump->doImageDescriptions();
} elseif ( $options['categories'] ) {
$wgHTMLDump->doCategories();
} elseif ( $options['redirects'] ) {
$wgHTMLDump->doRedirects();
} else {
print("Creating static HTML dump in directory $dest. \n".
"Starting from page_id $start of $end.\n");
$dbr =& wfGetDB( DB_SLAVE );
print "Using database {$dbr->mServer}\n";
$wgHTMLDump->doArticles( $start, $end );
if ( !isset( $options['e'] ) ) {
$wgHTMLDump->doImageDescriptions();
$wgHTMLDump->doCategories();
$wgHTMLDump->doSpecials();
}
/*
if ( $end - $start > CHUNK_SIZE * 2 ) {
// Split the problem into smaller chunks, run them in different PHP instances
// This is a memory/resource leak workaround
print("Creating static HTML dump in directory $dest. \n".
"Starting from page_id $start of $end.\n");
chdir( "maintenance" );
for ( $chunkStart = $start; $chunkStart < $end; $chunkStart += CHUNK_SIZE ) {
$chunkEnd = $chunkStart + CHUNK_SIZE - 1;
if ( $chunkEnd > $end ) {
$chunkEnd = $end;
}
passthru( "php dumpHTML.php -d " . wfEscapeShellArg( $dest ) . " -s $chunkStart -e $chunkEnd" );
}
chdir( ".." );
$d->doImageDescriptions();
$d->doCategories();
$d->doMainPage( $dest );
} else {
$d->doArticles( $start, $end );
}
*/
}
if ( isset( $options['debug'] ) ) {
print_r($GLOBALS);
}
if ( $profiling ) {
echo $wgProfiler->getOutput();
}
?>