Rewrite of XML Dump Processing:
* Full rewrite of the WikiImporter class to a new class, XMLDumpImporter, which uses XMLReader instead of the deprecated PHP SAX interface. * Fixes for the following bugs: ** XML Dump Processor chokes on unrecognised tags. ** Special:Import does not allow you to import a dump including only log entries. * Added hooks to allow extensions to handle tags in any part of the XML document. * Fully source-compatible with the previous interface.
This commit is contained in:
parent
1061d519f8
commit
c9ebbcd09b
8 changed files with 727 additions and 659 deletions
|
|
@ -605,7 +605,7 @@ $wgAutoloadLocalClasses = array(
|
|||
'WantedPagesPage' => 'includes/specials/SpecialWantedpages.php',
|
||||
'WantedTemplatesPage' => 'includes/specials/SpecialWantedtemplates.php',
|
||||
'WhatLinksHerePage' => 'includes/specials/SpecialWhatlinkshere.php',
|
||||
'WikiImporter' => 'includes/Import.php',
|
||||
'ImportXMLReader' => 'includes/ImportXMLReader.php',
|
||||
'WikiRevision' => 'includes/Import.php',
|
||||
'WithoutInterwikiPage' => 'includes/specials/SpecialWithoutinterwiki.php',
|
||||
|
||||
|
|
|
|||
|
|
@ -370,658 +370,6 @@ class WikiRevision {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* implements Special:Import
|
||||
* @ingroup SpecialPage
|
||||
*/
|
||||
class WikiImporter {
|
||||
var $mDebug = false;
|
||||
var $mSource = null;
|
||||
var $mPageCallback = null;
|
||||
var $mPageOutCallback = null;
|
||||
var $mRevisionCallback = null;
|
||||
var $mLogItemCallback = null;
|
||||
var $mUploadCallback = null;
|
||||
var $mTargetNamespace = null;
|
||||
var $mXmlNamespace = false;
|
||||
var $lastfield;
|
||||
var $tagStack = array();
|
||||
|
||||
function __construct( $source ) {
|
||||
$this->setRevisionCallback( array( $this, "importRevision" ) );
|
||||
$this->setUploadCallback( array( $this, "importUpload" ) );
|
||||
$this->setLogItemCallback( array( $this, "importLogItem" ) );
|
||||
$this->mSource = $source;
|
||||
}
|
||||
|
||||
function throwXmlError( $err ) {
|
||||
$this->debug( "FAILURE: $err" );
|
||||
wfDebug( "WikiImporter XML error: $err\n" );
|
||||
}
|
||||
|
||||
function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
|
||||
if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
|
||||
$prefix = str_replace( '/','\/',$prefix );
|
||||
$this->mXmlNamespace='/^'.$prefix.':/';
|
||||
}
|
||||
}
|
||||
|
||||
function stripXmlNamespace($name) {
|
||||
if( $this->mXmlNamespace ) {
|
||||
return(preg_replace($this->mXmlNamespace,'',$name,1));
|
||||
}
|
||||
else {
|
||||
return($name);
|
||||
}
|
||||
}
|
||||
|
||||
# --------------
|
||||
|
||||
function doImport() {
|
||||
if( empty( $this->mSource ) ) {
|
||||
return new WikiErrorMsg( "importnotext" );
|
||||
}
|
||||
|
||||
$parser = xml_parser_create_ns( "UTF-8" );
|
||||
|
||||
# case folding violates XML standard, turn it off
|
||||
xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
|
||||
|
||||
xml_set_object( $parser, $this );
|
||||
xml_set_element_handler( $parser, "in_start", "" );
|
||||
xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
|
||||
|
||||
$offset = 0; // for context extraction on error reporting
|
||||
do {
|
||||
$chunk = $this->mSource->readChunk();
|
||||
if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
|
||||
wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
|
||||
return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
|
||||
}
|
||||
$offset += strlen( $chunk );
|
||||
} while( $chunk !== false && !$this->mSource->atEnd() );
|
||||
xml_parser_free( $parser );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function debug( $data ) {
|
||||
if( $this->mDebug ) {
|
||||
wfDebug( "IMPORT: $data\n" );
|
||||
}
|
||||
}
|
||||
|
||||
function notice( $data ) {
|
||||
global $wgCommandLineMode;
|
||||
if( $wgCommandLineMode ) {
|
||||
print "$data\n";
|
||||
} else {
|
||||
global $wgOut;
|
||||
$wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set debug mode...
|
||||
*/
|
||||
function setDebug( $debug ) {
|
||||
$this->mDebug = $debug;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each new page in the stream is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setPageCallback( $callback ) {
|
||||
$previous = $this->mPageCallback;
|
||||
$this->mPageCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each page in the stream is completed.
|
||||
* Callback accepts the page title (as a Title object), a second object
|
||||
* with the original title form (in case it's been overridden into a
|
||||
* local namespace), and a count of revisions.
|
||||
*
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setPageOutCallback( $callback ) {
|
||||
$previous = $this->mPageOutCallback;
|
||||
$this->mPageOutCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each page revision is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setRevisionCallback( $callback ) {
|
||||
$previous = $this->mRevisionCallback;
|
||||
$this->mRevisionCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each file upload version is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setUploadCallback( $callback ) {
|
||||
$previous = $this->mUploadCallback;
|
||||
$this->mUploadCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each log item reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setLogItemCallback( $callback ) {
|
||||
$previous = $this->mLogItemCallback;
|
||||
$this->mLogItemCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a target namespace to override the defaults
|
||||
*/
|
||||
function setTargetNamespace( $namespace ) {
|
||||
if( is_null( $namespace ) ) {
|
||||
// Don't override namespaces
|
||||
$this->mTargetNamespace = null;
|
||||
} elseif( $namespace >= 0 ) {
|
||||
// FIXME: Check for validity
|
||||
$this->mTargetNamespace = intval( $namespace );
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default per-revision callback, performs the import.
|
||||
* @param $revision WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function importRevision( $revision ) {
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Default per-revision callback, performs the import.
|
||||
* @param $rev WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function importLogItem( $rev ) {
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy for now...
|
||||
*/
|
||||
function importUpload( $revision ) {
|
||||
//$dbw = wfGetDB( DB_MASTER );
|
||||
//return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alternate per-revision callback, for debugging.
|
||||
* @param $revision WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function debugRevisionHandler( &$revision ) {
|
||||
$this->debug( "Got revision:" );
|
||||
if( is_object( $revision->title ) ) {
|
||||
$this->debug( "-- Title: " . $revision->title->getPrefixedText() );
|
||||
} else {
|
||||
$this->debug( "-- Title: <invalid>" );
|
||||
}
|
||||
$this->debug( "-- User: " . $revision->user_text );
|
||||
$this->debug( "-- Timestamp: " . $revision->timestamp );
|
||||
$this->debug( "-- Comment: " . $revision->comment );
|
||||
$this->debug( "-- Text: " . $revision->text );
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the callback function when a new <page> is reached.
|
||||
* @param $title Title
|
||||
* @private
|
||||
*/
|
||||
function pageCallback( $title ) {
|
||||
if( is_callable( $this->mPageCallback ) ) {
|
||||
call_user_func( $this->mPageCallback, $title );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the callback function when a </page> is closed.
|
||||
* @param $title Title
|
||||
* @param $origTitle Title
|
||||
* @param $revisionCount int
|
||||
* @param $successCount Int: number of revisions for which callback returned true
|
||||
* @private
|
||||
*/
|
||||
function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
|
||||
if( is_callable( $this->mPageOutCallback ) ) {
|
||||
call_user_func( $this->mPageOutCallback, $title, $origTitle,
|
||||
$revisionCount, $successCount );
|
||||
}
|
||||
}
|
||||
|
||||
# XML parser callbacks from here out -- beware!
|
||||
function donothing( $parser, $x, $y="" ) {
|
||||
#$this->debug( "donothing" );
|
||||
}
|
||||
|
||||
function in_start( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_start $name" );
|
||||
if( $name != "mediawiki" ) {
|
||||
return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
|
||||
}
|
||||
|
||||
function in_mediawiki( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_mediawiki $name" );
|
||||
if( $name == 'siteinfo' ) {
|
||||
xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
|
||||
} elseif( $name == 'page' ) {
|
||||
$this->push( $name );
|
||||
$this->workRevisionCount = 0;
|
||||
$this->workSuccessCount = 0;
|
||||
$this->uploadCount = 0;
|
||||
$this->uploadSuccessCount = 0;
|
||||
xml_set_element_handler( $parser, "in_page", "out_page" );
|
||||
} elseif( $name == 'logitem' ) {
|
||||
$this->push( $name );
|
||||
$this->workRevision = new WikiRevision;
|
||||
xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
|
||||
} else {
|
||||
return $this->throwXMLerror( "Expected <page>, got <$name>" );
|
||||
}
|
||||
}
|
||||
function out_mediawiki( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_mediawiki $name" );
|
||||
if( $name != "mediawiki" ) {
|
||||
return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "donothing", "donothing" );
|
||||
}
|
||||
|
||||
|
||||
function in_siteinfo( $parser, $name, $attribs ) {
|
||||
// no-ops for now
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_siteinfo $name" );
|
||||
switch( $name ) {
|
||||
case "sitename":
|
||||
case "base":
|
||||
case "generator":
|
||||
case "case":
|
||||
case "namespaces":
|
||||
case "namespace":
|
||||
break;
|
||||
default:
|
||||
return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
|
||||
}
|
||||
}
|
||||
|
||||
function out_siteinfo( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
if( $name == "siteinfo" ) {
|
||||
xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function in_page( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_page $name" );
|
||||
switch( $name ) {
|
||||
case "id":
|
||||
case "title":
|
||||
case "redirect":
|
||||
case "restrictions":
|
||||
$this->appendfield = $name;
|
||||
$this->appenddata = "";
|
||||
xml_set_element_handler( $parser, "in_nothing", "out_append" );
|
||||
xml_set_character_data_handler( $parser, "char_append" );
|
||||
break;
|
||||
case "revision":
|
||||
$this->push( "revision" );
|
||||
if( is_object( $this->pageTitle ) ) {
|
||||
$this->workRevision = new WikiRevision;
|
||||
$this->workRevision->setTitle( $this->pageTitle );
|
||||
$this->workRevisionCount++;
|
||||
} else {
|
||||
// Skipping items due to invalid page title
|
||||
$this->workRevision = null;
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_revision", "out_revision" );
|
||||
break;
|
||||
case "upload":
|
||||
$this->push( "upload" );
|
||||
if( is_object( $this->pageTitle ) ) {
|
||||
$this->workRevision = new WikiRevision;
|
||||
$this->workRevision->setTitle( $this->pageTitle );
|
||||
$this->uploadCount++;
|
||||
} else {
|
||||
// Skipping items due to invalid page title
|
||||
$this->workRevision = null;
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_upload", "out_upload" );
|
||||
break;
|
||||
default:
|
||||
return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
|
||||
}
|
||||
}
|
||||
|
||||
function out_page( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_page $name" );
|
||||
$this->pop();
|
||||
if( $name != "page" ) {
|
||||
return $this->throwXMLerror( "Expected </page>, got </$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
|
||||
|
||||
$this->pageOutCallback( $this->pageTitle, $this->origTitle,
|
||||
$this->workRevisionCount, $this->workSuccessCount );
|
||||
|
||||
$this->workTitle = null;
|
||||
$this->workRevision = null;
|
||||
$this->workRevisionCount = 0;
|
||||
$this->workSuccessCount = 0;
|
||||
$this->pageTitle = null;
|
||||
$this->origTitle = null;
|
||||
}
|
||||
|
||||
function in_nothing( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_nothing $name" );
|
||||
return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
|
||||
}
|
||||
|
||||
function char_append( $parser, $data ) {
|
||||
$this->debug( "char_append '$data'" );
|
||||
$this->appenddata .= $data;
|
||||
}
|
||||
|
||||
function out_append( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_append $name" );
|
||||
if( $name != $this->appendfield ) {
|
||||
return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
|
||||
}
|
||||
|
||||
switch( $this->appendfield ) {
|
||||
case "title":
|
||||
$this->workTitle = $this->appenddata;
|
||||
$this->origTitle = Title::newFromText( $this->workTitle );
|
||||
if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
|
||||
$this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
|
||||
$this->origTitle->getDBkey() );
|
||||
} else {
|
||||
$this->pageTitle = Title::newFromText( $this->workTitle );
|
||||
}
|
||||
if( is_null( $this->pageTitle ) ) {
|
||||
// Invalid page title? Ignore the page
|
||||
$this->notice( "Skipping invalid page title '$this->workTitle'" );
|
||||
} elseif( $this->pageTitle->getInterwiki() != '' ) {
|
||||
$this->notice( "Skipping interwiki page title '$this->workTitle'" );
|
||||
$this->pageTitle = null;
|
||||
} else {
|
||||
$this->pageCallback( $this->workTitle );
|
||||
}
|
||||
break;
|
||||
case "id":
|
||||
if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setID( $this->appenddata );
|
||||
}
|
||||
break;
|
||||
case "text":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setText( $this->appenddata );
|
||||
break;
|
||||
case "username":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setUsername( $this->appenddata );
|
||||
break;
|
||||
case "ip":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setUserIP( $this->appenddata );
|
||||
break;
|
||||
case "timestamp":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setTimestamp( $this->appenddata );
|
||||
break;
|
||||
case "comment":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setComment( $this->appenddata );
|
||||
break;
|
||||
case "type":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setType( $this->appenddata );
|
||||
break;
|
||||
case "action":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setAction( $this->appenddata );
|
||||
break;
|
||||
case "logtitle":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
|
||||
break;
|
||||
case "params":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setParams( $this->appenddata );
|
||||
break;
|
||||
case "minor":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setMinor( true );
|
||||
break;
|
||||
case "filename":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setFilename( $this->appenddata );
|
||||
break;
|
||||
case "src":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setSrc( $this->appenddata );
|
||||
break;
|
||||
case "size":
|
||||
if( $this->workRevision )
|
||||
$this->workRevision->setSize( intval( $this->appenddata ) );
|
||||
break;
|
||||
default:
|
||||
$this->debug( "Bad append: {$this->appendfield}" );
|
||||
}
|
||||
$this->appendfield = "";
|
||||
$this->appenddata = "";
|
||||
|
||||
$parent = $this->parentTag();
|
||||
xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
|
||||
xml_set_character_data_handler( $parser, "donothing" );
|
||||
}
|
||||
|
||||
function in_revision( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_revision $name" );
|
||||
switch( $name ) {
|
||||
case "id":
|
||||
case "timestamp":
|
||||
case "comment":
|
||||
case "minor":
|
||||
case "text":
|
||||
$this->appendfield = $name;
|
||||
xml_set_element_handler( $parser, "in_nothing", "out_append" );
|
||||
xml_set_character_data_handler( $parser, "char_append" );
|
||||
break;
|
||||
case "contributor":
|
||||
$this->push( "contributor" );
|
||||
xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
|
||||
break;
|
||||
default:
|
||||
return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
|
||||
}
|
||||
}
|
||||
|
||||
function out_revision( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_revision $name" );
|
||||
$this->pop();
|
||||
if( $name != "revision" ) {
|
||||
return $this->throwXMLerror( "Expected </revision>, got </$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_page", "out_page" );
|
||||
|
||||
if( $this->workRevision ) {
|
||||
$ok = call_user_func_array( $this->mRevisionCallback,
|
||||
array( $this->workRevision, $this ) );
|
||||
if( $ok ) {
|
||||
$this->workSuccessCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function in_logitem( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_logitem $name" );
|
||||
switch( $name ) {
|
||||
case "id":
|
||||
case "timestamp":
|
||||
case "comment":
|
||||
case "type":
|
||||
case "action":
|
||||
case "logtitle":
|
||||
case "params":
|
||||
$this->appendfield = $name;
|
||||
xml_set_element_handler( $parser, "in_nothing", "out_append" );
|
||||
xml_set_character_data_handler( $parser, "char_append" );
|
||||
break;
|
||||
case "contributor":
|
||||
$this->push( "contributor" );
|
||||
xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
|
||||
break;
|
||||
default:
|
||||
return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
|
||||
}
|
||||
}
|
||||
|
||||
function out_logitem( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_logitem $name" );
|
||||
$this->pop();
|
||||
if( $name != "logitem" ) {
|
||||
return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
|
||||
|
||||
if( $this->workRevision ) {
|
||||
$ok = call_user_func_array( $this->mLogItemCallback,
|
||||
array( $this->workRevision, $this ) );
|
||||
if( $ok ) {
|
||||
$this->workSuccessCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function in_upload( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_upload $name" );
|
||||
switch( $name ) {
|
||||
case "timestamp":
|
||||
case "comment":
|
||||
case "text":
|
||||
case "filename":
|
||||
case "src":
|
||||
case "size":
|
||||
$this->appendfield = $name;
|
||||
xml_set_element_handler( $parser, "in_nothing", "out_append" );
|
||||
xml_set_character_data_handler( $parser, "char_append" );
|
||||
break;
|
||||
case "contributor":
|
||||
$this->push( "contributor" );
|
||||
xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
|
||||
break;
|
||||
default:
|
||||
return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
|
||||
}
|
||||
}
|
||||
|
||||
function out_upload( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_revision $name" );
|
||||
$this->pop();
|
||||
if( $name != "upload" ) {
|
||||
return $this->throwXMLerror( "Expected </upload>, got </$name>" );
|
||||
}
|
||||
xml_set_element_handler( $parser, "in_page", "out_page" );
|
||||
|
||||
if( $this->workRevision ) {
|
||||
$ok = call_user_func_array( $this->mUploadCallback,
|
||||
array( $this->workRevision, $this ) );
|
||||
if( $ok ) {
|
||||
$this->workUploadSuccessCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function in_contributor( $parser, $name, $attribs ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "in_contributor $name" );
|
||||
switch( $name ) {
|
||||
case "username":
|
||||
case "ip":
|
||||
case "id":
|
||||
$this->appendfield = $name;
|
||||
xml_set_element_handler( $parser, "in_nothing", "out_append" );
|
||||
xml_set_character_data_handler( $parser, "char_append" );
|
||||
break;
|
||||
default:
|
||||
$this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
|
||||
}
|
||||
}
|
||||
|
||||
function out_contributor( $parser, $name ) {
|
||||
$name = $this->stripXmlNamespace($name);
|
||||
$this->debug( "out_contributor $name" );
|
||||
$this->pop();
|
||||
if( $name != "contributor" ) {
|
||||
return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
|
||||
}
|
||||
$parent = $this->parentTag();
|
||||
xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
|
||||
}
|
||||
|
||||
private function push( $name ) {
|
||||
array_push( $this->tagStack, $name );
|
||||
$this->debug( "PUSH $name" );
|
||||
}
|
||||
|
||||
private function pop() {
|
||||
$name = array_pop( $this->tagStack );
|
||||
$this->debug( "POP $name" );
|
||||
return $name;
|
||||
}
|
||||
|
||||
private function parentTag() {
|
||||
$name = $this->tagStack[count( $this->tagStack ) - 1];
|
||||
$this->debug( "PARENT $name" );
|
||||
return $name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo document (e.g. one-sentence class description).
|
||||
* @ingroup SpecialPage
|
||||
|
|
|
|||
703
includes/ImportXMLReader.php
Normal file
703
includes/ImportXMLReader.php
Normal file
|
|
@ -0,0 +1,703 @@
|
|||
<?php
|
||||
/**
|
||||
* implements Special:Import
|
||||
* @ingroup SpecialPage
|
||||
*/
|
||||
class ImportXMLReader {
|
||||
private $reader = null;
|
||||
private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
|
||||
private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
|
||||
private $mDebug;
|
||||
|
||||
function __construct( $source ) {
|
||||
$this->reader = new XMLReader();
|
||||
|
||||
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
|
||||
$id = UploadSourceAdapter::registerSource( $source );
|
||||
$this->reader->open( "uploadsource://$id" );
|
||||
|
||||
// Default callbacks
|
||||
$this->setRevisionCallback( array( $this, "importRevision" ) );
|
||||
$this->setUploadCallback( array( $this, 'importUpload' ) );
|
||||
$this->setLogItemCallback( array( $this, 'importLogItem' ) );
|
||||
}
|
||||
|
||||
function throwXmlError( $err ) {
|
||||
$this->debug( "FAILURE: $err" );
|
||||
wfDebug( "WikiImporter XML error: $err\n" );
|
||||
}
|
||||
|
||||
function debug( $data ) {
|
||||
if( $this->mDebug ) {
|
||||
wfDebug( "IMPORT: $data\n" );
|
||||
}
|
||||
}
|
||||
|
||||
function warn( $data ) {
|
||||
wfDebug( "IMPORT: $data\n" );
|
||||
}
|
||||
|
||||
function notice( $data ) {
|
||||
global $wgCommandLineMode;
|
||||
if( $wgCommandLineMode ) {
|
||||
print "$data\n";
|
||||
} else {
|
||||
global $wgOut;
|
||||
$wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set debug mode...
|
||||
*/
|
||||
function setDebug( $debug ) {
|
||||
$this->mDebug = $debug;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each new page in the stream is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setPageCallback( $callback ) {
|
||||
$previous = $this->mPageCallback;
|
||||
$this->mPageCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each page in the stream is completed.
|
||||
* Callback accepts the page title (as a Title object), a second object
|
||||
* with the original title form (in case it's been overridden into a
|
||||
* local namespace), and a count of revisions.
|
||||
*
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setPageOutCallback( $callback ) {
|
||||
$previous = $this->mPageOutCallback;
|
||||
$this->mPageOutCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each page revision is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setRevisionCallback( $callback ) {
|
||||
$previous = $this->mRevisionCallback;
|
||||
$this->mRevisionCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each file upload version is reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setUploadCallback( $callback ) {
|
||||
$previous = $this->mUploadCallback;
|
||||
$this->mUploadCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform as each log item reached.
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setLogItemCallback( $callback ) {
|
||||
$previous = $this->mLogItemCallback;
|
||||
$this->mLogItemCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the action to perform when site info is encountered
|
||||
* @param $callback callback
|
||||
* @return callback
|
||||
*/
|
||||
function setSiteInfoCallback( $callback ) {
|
||||
$previous = $this->mSiteInfoCallback;
|
||||
$this->mSiteInfoCallback = $callback;
|
||||
return $previous;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a target namespace to override the defaults
|
||||
*/
|
||||
function setTargetNamespace( $namespace ) {
|
||||
if( is_null( $namespace ) ) {
|
||||
// Don't override namespaces
|
||||
$this->mTargetNamespace = null;
|
||||
} elseif( $namespace >= 0 ) {
|
||||
// FIXME: Check for validity
|
||||
$this->mTargetNamespace = intval( $namespace );
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default per-revision callback, performs the import.
|
||||
* @param $revision WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function importRevision( $revision ) {
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Default per-revision callback, performs the import.
|
||||
* @param $rev WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function importLogItem( $rev ) {
|
||||
$dbw = wfGetDB( DB_MASTER );
|
||||
return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy for now...
|
||||
*/
|
||||
function importUpload( $revision ) {
|
||||
//$dbw = wfGetDB( DB_MASTER );
|
||||
//return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alternate per-revision callback, for debugging.
|
||||
* @param $revision WikiRevision
|
||||
* @private
|
||||
*/
|
||||
function debugRevisionHandler( &$revision ) {
|
||||
$this->debug( "Got revision:" );
|
||||
if( is_object( $revision->title ) ) {
|
||||
$this->debug( "-- Title: " . $revision->title->getPrefixedText() );
|
||||
} else {
|
||||
$this->debug( "-- Title: <invalid>" );
|
||||
}
|
||||
$this->debug( "-- User: " . $revision->user_text );
|
||||
$this->debug( "-- Timestamp: " . $revision->timestamp );
|
||||
$this->debug( "-- Comment: " . $revision->comment );
|
||||
$this->debug( "-- Text: " . $revision->text );
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the callback function when a new <page> is reached.
|
||||
* @param $title Title
|
||||
* @private
|
||||
*/
|
||||
function pageCallback( $title ) {
|
||||
if( is_callable( $this->mPageCallback ) ) {
|
||||
call_user_func( $this->mPageCallback, $title );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the callback function when a </page> is closed.
|
||||
* @param $title Title
|
||||
* @param $origTitle Title
|
||||
* @param $revisionCount int
|
||||
* @param $successCount Int: number of revisions for which callback returned true
|
||||
* @private
|
||||
*/
|
||||
function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
|
||||
if( is_callable( $this->mPageOutCallback ) ) {
|
||||
call_user_func_array( $this->mPageOutCallback,
|
||||
array( $title, $origTitle, $revisionCount, $successCount ) );
|
||||
}
|
||||
}
|
||||
|
||||
function revisionCallback( $revision ) {
|
||||
if ( is_callable( $this->mRevisionCallback ) ) {
|
||||
return call_user_func_array( $this->mRevisionCallback,
|
||||
array( $revision, $this ) );
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function logItemCallback( $revision ) {
|
||||
if ( is_callable( $this->mLogItemCallback ) ) {
|
||||
return call_user_func_array( $this->mLogItemCallback,
|
||||
array( $revision, $this ) );
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shouldn't something like this be built-in to XMLReader?
|
||||
* Fetches text contents of the current element, assuming
|
||||
* no sub-elements or such scary things.
|
||||
* @return string
|
||||
* @access private
|
||||
*/
|
||||
function nodeContents() {
|
||||
if( $this->reader->isEmptyElement ) {
|
||||
return "";
|
||||
}
|
||||
$buffer = "";
|
||||
while( $this->reader->read() ) {
|
||||
switch( $this->reader->nodeType ) {
|
||||
case XmlReader::TEXT:
|
||||
case XmlReader::SIGNIFICANT_WHITESPACE:
|
||||
$buffer .= $this->reader->value;
|
||||
break;
|
||||
case XmlReader::END_ELEMENT:
|
||||
return $buffer;
|
||||
}
|
||||
}
|
||||
return $this->close();
|
||||
}
|
||||
|
||||
# --------------
|
||||
|
||||
function dumpElement() {
|
||||
static $lookup = null;
|
||||
if (!$lookup) {
|
||||
$xmlReaderConstants = array(
|
||||
"NONE",
|
||||
"ELEMENT",
|
||||
"ATTRIBUTE",
|
||||
"TEXT",
|
||||
"CDATA",
|
||||
"ENTITY_REF",
|
||||
"ENTITY",
|
||||
"PI",
|
||||
"COMMENT",
|
||||
"DOC",
|
||||
"DOC_TYPE",
|
||||
"DOC_FRAGMENT",
|
||||
"NOTATION",
|
||||
"WHITESPACE",
|
||||
"SIGNIFICANT_WHITESPACE",
|
||||
"END_ELEMENT",
|
||||
"END_ENTITY",
|
||||
"XML_DECLARATION",
|
||||
);
|
||||
$lookup = array();
|
||||
|
||||
foreach( $xmlReaderConstants as $name ) {
|
||||
$lookup[constant("XmlReader::$name")] = $name;
|
||||
}
|
||||
}
|
||||
|
||||
print( var_dump(
|
||||
$lookup[$this->reader->nodeType],
|
||||
$this->reader->name,
|
||||
$this->reader->value
|
||||
)."\n\n" );
|
||||
}
|
||||
|
||||
function doImport() {
|
||||
$this->reader->read();
|
||||
|
||||
if ( $this->reader->name != 'mediawiki' ) {
|
||||
throw new MWException( "Expected <mediawiki> tag, got ".
|
||||
$this->reader->name );
|
||||
}
|
||||
$this->debug( "<mediawiki> tag is correct." );
|
||||
|
||||
$this->debug( "Starting primary dump processing loop." );
|
||||
|
||||
$keepReading = $this->reader->read();
|
||||
$skip = false;
|
||||
while ( $keepReading ) {
|
||||
$tag = $this->reader->name;
|
||||
$type = $this->reader->nodeType;
|
||||
|
||||
if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this->reader ) ) {
|
||||
// Do nothing
|
||||
} elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
|
||||
break;
|
||||
} elseif ( $tag == 'siteinfo' ) {
|
||||
$this->handleSiteInfo();
|
||||
} elseif ( $tag == 'page' ) {
|
||||
$this->handlePage();
|
||||
} elseif ( $tag == 'logitem' ) {
|
||||
$this->handleLogItem();
|
||||
} elseif ( $tag != '#text' ) {
|
||||
$this->warn( "Unhandled top-level XML tag $tag" );
|
||||
|
||||
$skip = true;
|
||||
}
|
||||
|
||||
if ($skip) {
|
||||
$keepReading = $this->reader->next();
|
||||
$skip = false;
|
||||
$this->debug( "Skip" );
|
||||
} else {
|
||||
$keepReading = $this->reader->read();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function handleSiteInfo() {
|
||||
// Site info is useful, but not actually used for dump imports.
|
||||
// Includes a quick short-circuit to save performance.
|
||||
if ( ! $this->mSiteInfoCallback ) {
|
||||
$this->reader->next();
|
||||
return true;
|
||||
}
|
||||
throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
|
||||
}
|
||||
|
||||
function handleLogItem() {
|
||||
$this->debug( "Enter log item handler." );
|
||||
$logInfo = array();
|
||||
|
||||
// Fields that can just be stuffed in the pageInfo object
|
||||
$normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
|
||||
'logtitle', 'params' );
|
||||
|
||||
while ( $this->reader->read() ) {
|
||||
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
|
||||
$this->reader->name == 'logitem') {
|
||||
break;
|
||||
}
|
||||
|
||||
$tag = $this->reader->name;
|
||||
|
||||
if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
|
||||
$this->reader, &$logInfo ) ) {
|
||||
// Do nothing
|
||||
} if ( in_array( $tag, $normalFields ) ) {
|
||||
$logInfo[$tag] = $this->nodeContents();
|
||||
} elseif ( $tag == 'contributor' ) {
|
||||
$logInfo['contributor'] = $this->handleContributor();
|
||||
} elseif ( $tag != '#text' ) {
|
||||
$this->warn( "Unhandled log-item XML tag $tag" );
|
||||
}
|
||||
}
|
||||
|
||||
$this->processLogItem( $logInfo );
|
||||
}
|
||||
|
||||
function processLogItem( $logInfo ) {
|
||||
$revision = new WikiRevision;
|
||||
|
||||
$revision->setID( $logInfo['id'] );
|
||||
$revision->setType( $logInfo['type'] );
|
||||
$revision->setAction( $logInfo['action'] );
|
||||
$revision->setTimestamp( $logInfo['timestamp'] );
|
||||
$revision->setParams( $logInfo['params'] );
|
||||
$revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
|
||||
|
||||
if ( isset( $logInfo['comment'] ) ) {
|
||||
$revision->setComment( $logInfo['comment'] );
|
||||
}
|
||||
|
||||
if ( isset( $logInfo['contributor']['ip'] ) ) {
|
||||
$revision->setUserIP( $logInfo['contributor']['ip'] );
|
||||
}
|
||||
if ( isset( $logInfo['contributor']['username'] ) ) {
|
||||
$revision->setUserName( $logInfo['contributor']['username'] );
|
||||
}
|
||||
|
||||
return $this->logItemCallback( $revision );
|
||||
}
|
||||
|
||||
function handlePage() {
|
||||
// Handle page data.
|
||||
$this->debug( "Enter page handler." );
|
||||
$pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
|
||||
|
||||
// Fields that can just be stuffed in the pageInfo object
|
||||
$normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
|
||||
|
||||
$skip = false;
|
||||
$badTitle = false;
|
||||
|
||||
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
||||
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
|
||||
$this->reader->name == 'page') {
|
||||
break;
|
||||
}
|
||||
|
||||
$tag = $this->reader->name;
|
||||
|
||||
if ( $badTitle ) {
|
||||
// The title is invalid, bail out of this page
|
||||
$skip = true;
|
||||
} elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', $this->reader,
|
||||
&$pageInfo ) ) {
|
||||
// Do nothing
|
||||
} if ( in_array( $tag, $normalFields ) ) {
|
||||
$pageInfo[$tag] = $this->nodeContents();
|
||||
if ( $tag == 'title' ) {
|
||||
$title = $this->processTitle( $pageInfo['title'] );
|
||||
|
||||
if ( !$title ) {
|
||||
$badTitle = true;
|
||||
$skip = true;
|
||||
}
|
||||
|
||||
$this->pageCallback( $title );
|
||||
list( $pageInfo['_title'], $origTitle ) = $title;
|
||||
}
|
||||
} elseif ( $tag == 'revision' ) {
|
||||
$this->handleRevision( $pageInfo );
|
||||
} elseif ( $tag == 'upload' ) {
|
||||
$this->handleUpload( $pageInfo );
|
||||
} elseif ( $tag != '#text' ) {
|
||||
$this->warn( "Unhandled page XML tag $tag" );
|
||||
$skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
$this->pageOutCallback( $pageInfo['_title'], $origTitle,
|
||||
$pageInfo['revisionCount'],
|
||||
$pageInfo['successfulRevisionCount'] );
|
||||
}
|
||||
|
||||
function handleRevision( &$pageInfo ) {
|
||||
$this->debug( "Enter revision handler" );
|
||||
$revisionInfo = array();
|
||||
|
||||
$normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
|
||||
|
||||
$skip = false;
|
||||
|
||||
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
||||
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
|
||||
$this->reader->name == 'revision') {
|
||||
break;
|
||||
}
|
||||
|
||||
$tag = $this->reader->name;
|
||||
|
||||
if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this->reader,
|
||||
&$pageInfo, &$revisionInfo ) ) {
|
||||
// Do nothing
|
||||
} if ( in_array( $tag, $normalFields ) ) {
|
||||
$revisionInfo[$tag] = $this->nodeContents();
|
||||
} elseif ( $tag == 'contributor' ) {
|
||||
$revisionInfo['contributor'] = $this->handleContributor();
|
||||
} elseif ( $tag != '#text' ) {
|
||||
$this->warn( "Unhandled revision XML tag $tag" );
|
||||
$skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
$pageInfo['revisionCount']++;
|
||||
if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
|
||||
$pageInfo['successfulRevisionCount']++;
|
||||
}
|
||||
}
|
||||
|
||||
function processRevision( $pageInfo, $revisionInfo ) {
|
||||
$revision = new WikiRevision;
|
||||
|
||||
$revision->setID( $revisionInfo['id'] );
|
||||
$revision->setText( $revisionInfo['text'] );
|
||||
$revision->setTitle( $pageInfo['_title'] );
|
||||
$revision->setTimestamp( $revisionInfo['timestamp'] );
|
||||
|
||||
if ( isset( $revisionInfo['comment'] ) ) {
|
||||
$revision->setComment( $revisionInfo['comment'] );
|
||||
}
|
||||
|
||||
if ( isset( $revisionInfo['minor'] ) )
|
||||
$revision->setMinor( true );
|
||||
|
||||
if ( isset( $revisionInfo['contributor']['ip'] ) ) {
|
||||
$revision->setUserIP( $revisionInfo['contributor']['ip'] );
|
||||
}
|
||||
if ( isset( $revisionInfo['contributor']['username'] ) ) {
|
||||
$revision->setUserName( $revisionInfo['contributor']['username'] );
|
||||
}
|
||||
|
||||
return $this->revisionCallback( $revision );
|
||||
}
|
||||
|
||||
function handleUpload( &$pageInfo ) {
|
||||
$this->debug( "Enter upload handler" );
|
||||
$uploadInfo = array();
|
||||
|
||||
$normalFields = array( 'timestamp', 'comment', 'filename', 'text',
|
||||
'src', 'size' );
|
||||
|
||||
$skip = false;
|
||||
|
||||
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
||||
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
|
||||
$this->reader->name == 'upload') {
|
||||
break;
|
||||
}
|
||||
|
||||
$tag = $this->reader->name;
|
||||
|
||||
if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
|
||||
&$pageInfo, &$revisionInfo ) ) {
|
||||
// Do nothing
|
||||
} if ( in_array( $tag, $normalFields ) ) {
|
||||
$uploadInfo[$tag] = $this->nodeContents();
|
||||
} elseif ( $tag == 'contributor' ) {
|
||||
$uploadInfo['contributor'] = $this->handleContributor();
|
||||
} elseif ( $tag != '#text' ) {
|
||||
$this->warn( "Unhandled upload XML tag $tag" );
|
||||
$skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->processUpload( $pageInfo, $uploadInfo );
|
||||
}
|
||||
|
||||
function processUpload( $pageInfo, $uploadInfo ) {
|
||||
$revision = new WikiRevision;
|
||||
|
||||
$revision->setTitle( $pageInfo['_title'] );
|
||||
$revision->setID( $uploadInfo['id'] );
|
||||
$revision->setTimestamp( $uploadInfo['timestamp'] );
|
||||
$revision->setText( $uploadInfo['text'] );
|
||||
$revision->setFilename( $uploadInfo['filename'] );
|
||||
$revision->setSrc( $uploadInfo['src'] );
|
||||
$revision->setSize( intval( $uploadInfo['size'] ) );
|
||||
$revision->setComment( $uploadInfo['comment'] );
|
||||
|
||||
if ( isset( $uploadInfo['contributor']['ip'] ) ) {
|
||||
$revision->setUserIP( $revisionInfo['contributor']['ip'] );
|
||||
}
|
||||
if ( isset( $uploadInfo['contributor']['username'] ) ) {
|
||||
$revision->setUserName( $revisionInfo['contributor']['username'] );
|
||||
}
|
||||
|
||||
return $this->uploadCallback( $revision );
|
||||
}
|
||||
|
||||
function handleContributor() {
|
||||
$fields = array( 'id', 'ip', 'username' );
|
||||
$info = array();
|
||||
|
||||
while ( $this->reader->read() ) {
|
||||
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
|
||||
$this->reader->name == 'contributor') {
|
||||
break;
|
||||
}
|
||||
|
||||
$tag = $this->reader->name;
|
||||
|
||||
if ( in_array( $tag, $fields ) ) {
|
||||
$info[$tag] = $this->nodeContents();
|
||||
}
|
||||
}
|
||||
|
||||
return $info;
|
||||
}
|
||||
|
||||
function processTitle( $text ) {
|
||||
$workTitle = $text;
|
||||
$origTitle = Title::newFromText( $workTitle );
|
||||
$title = null;
|
||||
|
||||
if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
|
||||
$title = Title::makeTitle( $this->mTargetNamespace,
|
||||
$origTitle->getDBkey() );
|
||||
} else {
|
||||
$title = Title::newFromText( $workTitle );
|
||||
}
|
||||
|
||||
if( is_null( $title ) ) {
|
||||
// Invalid page title? Ignore the page
|
||||
$this->notice( "Skipping invalid page title '$workTitle'" );
|
||||
} elseif( $title->getInterwiki() != '' ) {
|
||||
$this->notice( "Skipping interwiki page title '$workTitle'" );
|
||||
$title = null;
|
||||
}
|
||||
|
||||
return array( $origTitle, $title );
|
||||
}
|
||||
}
|
||||
|
||||
class UploadSourceAdapter {
|
||||
static $sourceRegistrations = array();
|
||||
|
||||
private $mSource;
|
||||
private $mBuffer;
|
||||
private $mPosition;
|
||||
|
||||
static function registerSource( $source ) {
|
||||
$id = wfGenerateToken();
|
||||
|
||||
self::$sourceRegistrations[$id] = $source;
|
||||
|
||||
return $id;
|
||||
}
|
||||
|
||||
function stream_open( $path, $mode, $options, &$opened_path ) {
|
||||
$url = parse_url($path);
|
||||
$id = $url['host'];
|
||||
|
||||
if ( !isset( self::$sourceRegistrations[$id] ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->mSource = self::$sourceRegistrations[$id];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function stream_read( $count ) {
|
||||
$return = '';
|
||||
$leave = false;
|
||||
|
||||
while ( !$leave && !$this->mSource->atEnd() &&
|
||||
count($this->mBuffer) < $count ) {
|
||||
$read = $this->mSource->readChunk();
|
||||
|
||||
if ( !count($read) ) {
|
||||
$leave = true;
|
||||
}
|
||||
|
||||
$this->mBuffer .= $read;
|
||||
}
|
||||
|
||||
if ( count($this->mBuffer) ) {
|
||||
$return = substr( $this->mBuffer, 0, $count );
|
||||
$this->mBuffer = substr( $this->mBuffer, $count );
|
||||
}
|
||||
|
||||
$this->mPosition += strlen($return);
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
function stream_write( $data ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function stream_tell() {
|
||||
return $this->mPosition;
|
||||
}
|
||||
|
||||
function stream_eof() {
|
||||
return $this->mSource->atEnd();
|
||||
}
|
||||
|
||||
function url_stat() {
|
||||
$result = array();
|
||||
|
||||
$result['dev'] = $result[0] = 0;
|
||||
$result['ino'] = $result[1] = 0;
|
||||
$result['mode'] = $result[2] = 0;
|
||||
$result['nlink'] = $result[3] = 0;
|
||||
$result['uid'] = $result[4] = 0;
|
||||
$result['gid'] = $result[5] = 0;
|
||||
$result['rdev'] = $result[6] = 0;
|
||||
$result['size'] = $result[7] = 0;
|
||||
$result['atime'] = $result[8] = 0;
|
||||
$result['mtime'] = $result[9] = 0;
|
||||
$result['ctime'] = $result[10] = 0;
|
||||
$result['blksize'] = $result[11] = 0;
|
||||
$result['blocks'] = $result[12] = 0;
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
|
@ -74,7 +74,7 @@ class ApiImport extends ApiBase {
|
|||
$this->dieUsageMsg( array( 'import-unknownerror', $source->getMessage() ) );
|
||||
}
|
||||
|
||||
$importer = new WikiImporter( $source );
|
||||
$importer = new ImportXMLReader( $source );
|
||||
if ( isset( $params['namespace'] ) ) {
|
||||
$importer->setTargetNamespace( $params['namespace'] );
|
||||
}
|
||||
|
|
@ -193,4 +193,4 @@ class ApiImportReporter extends ImportReporter {
|
|||
function getData() {
|
||||
return $this->mResultArr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class SpecialImport extends SpecialPage {
|
|||
} else {
|
||||
$wgOut->addWikiMsg( "importstart" );
|
||||
|
||||
$importer = new WikiImporter( $source );
|
||||
$importer = new ImportXMLReader( $source );
|
||||
if( !is_null( $this->namespace ) ) {
|
||||
$importer->setTargetNamespace( $this->namespace );
|
||||
}
|
||||
|
|
@ -274,9 +274,13 @@ class SpecialImport extends SpecialPage {
|
|||
*/
|
||||
class ImportReporter {
|
||||
private $reason=false;
|
||||
private $mOriginalLogCallback = null;
|
||||
private $mLogItemCount = 0;
|
||||
|
||||
function __construct( $importer, $upload, $interwiki , $reason=false ) {
|
||||
$importer->setPageOutCallback( array( $this, 'reportPage' ) );
|
||||
$this->mOriginalLogCallback =
|
||||
$importer->setLogItemCallback( array( $this, 'reportLogItem' ) );
|
||||
$this->mPageCount = 0;
|
||||
$this->mIsUpload = $upload;
|
||||
$this->mInterwiki = $interwiki;
|
||||
|
|
@ -287,6 +291,13 @@ class ImportReporter {
|
|||
global $wgOut;
|
||||
$wgOut->addHTML( "<ul>\n" );
|
||||
}
|
||||
|
||||
function reportLogItem( /* ... */ ) {
|
||||
$this->mLogItemCount++;
|
||||
if ( is_callable( $this->mOriginalLogCallback ) ) {
|
||||
call_user_func_array( $this->mOriginalLogCallback, func_get_args() );
|
||||
}
|
||||
}
|
||||
|
||||
function reportPage( $title, $origTitle, $revisionCount, $successCount ) {
|
||||
global $wgOut, $wgUser, $wgLang, $wgContLang;
|
||||
|
|
@ -340,7 +351,12 @@ class ImportReporter {
|
|||
|
||||
function close() {
|
||||
global $wgOut;
|
||||
if( $this->mPageCount == 0 ) {
|
||||
|
||||
if ( $this->mLogItemCount > 0 ) {
|
||||
$msg = wfMsgExt( 'imported-log-entries', 'parseinline',
|
||||
$this->mLogItemCount );
|
||||
$wgOut->addHTML( Xml::tags( 'li', null, $msg ) );
|
||||
} elseif( $this->mPageCount == 0 && $this->mLogItemCount == 0 ) {
|
||||
$wgOut->addHTML( "</ul>\n" );
|
||||
return new WikiErrorMsg( "importnopages" );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3203,6 +3203,7 @@ Save it to your computer and upload it here.',
|
|||
'importstart' => 'Importing pages...',
|
||||
'import-revision-count' => '$1 {{PLURAL:$1|revision|revisions}}',
|
||||
'importnopages' => 'No pages to import.',
|
||||
'imported-log-entries' => 'Imported $1 {{PLURAL:$1|log entry|log entries}}.',
|
||||
'importfailed' => 'Import failed: <nowiki>$1</nowiki>',
|
||||
'importunknownsource' => 'Unknown import source type',
|
||||
'importcantopen' => 'Could not open import file',
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ class BackupReader {
|
|||
$this->startTime = wfTime();
|
||||
|
||||
$source = new ImportStreamSource( $handle );
|
||||
$importer = new WikiImporter( $source );
|
||||
$importer = new ImportXMLReader( $source );
|
||||
|
||||
$importer->setDebug( $this->debug );
|
||||
$importer->setPageCallback( array( &$this, 'reportPage' ) );
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class DumpRenderer extends Maintenance {
|
|||
$this->startTime = wfTime();
|
||||
|
||||
$source = new ImportStreamSource( $this->getStdin() );
|
||||
$importer = new WikiImporter( $source );
|
||||
$importer = new ImportXMLReader( $source );
|
||||
|
||||
$importer->setRevisionCallback(
|
||||
array( &$this, 'handleRevision' ) );
|
||||
|
|
|
|||
Loading…
Reference in a new issue